From e3b4cc02aa3d18a6f436093774356ceaffba6a46 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 07:26:01 -0400
Subject: [PATCH 01/26] Done GSA-282: Unindexed traversals crash if a read goes
 off the end of a contig

-- Already fixed in the codebase.  Added unindexed bam and integration tests to ensure this is fine going forward.
---
 .../walkers/PileupWalkerIntegrationTest.java     | 16 +++++++++++++++-
 1 file changed, 15 insertions(+), 1 deletion(-)
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java
index 667b325ed..e16ef3125 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PileupWalkerIntegrationTest.java
@@ -16,13 +16,27 @@ public class PileupWalkerIntegrationTest extends WalkerTest {
         executeTest("Testing the standard (no-indel) pileup on three merged FHS pools with 27 deletions in 969 bases", spec);
     }
 
+
+
+    private final static String SingleReadAligningOffChromosome1MD5 = "4a45fe1f85aaa8c4158782f2b6dee2bd";
     @Test
     public void testSingleReadAligningOffChromosome1() {
         String gatk_args = "-T Pileup "
                 + " -I " + privateTestDir + "readOffb37contig1.bam"
                 + " -R " + b37KGReference
                 + " -o %s";
-        WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList("4a45fe1f85aaa8c4158782f2b6dee2bd"));
+        WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
         executeTest("Testing single read spanning off chromosome 1", spec);
     }
+
+    @Test
+    public void testSingleReadAligningOffChromosome1NoIndex() {
+        String gatk_args = "-T Pileup "
+                + " -I " + privateTestDir + "readOffb37contig1.noIndex.bam"
+                + " -R " + b37KGReference
+                + " -U ALLOW_UNINDEXED_BAM"
+                + " -o %s";
+        WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 1, Arrays.asList(SingleReadAligningOffChromosome1MD5));
+        executeTest("Testing single read spanning off chromosome 1 unindexed", spec);
+    }
 }

From 397a5551ef73e87971ba255c68a9f82b73d21490 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 31 Aug 2012 19:54:33 -0400
Subject: [PATCH 02/26] More memory for gatkdocs and extracthelp targets

---
 build.xml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/build.xml b/build.xml
index f681ddafa..0d1deba29 100644
--- a/build.xml
+++ b/build.xml
@@ -577,6 +577,7 @@
                  docletpathref="doclet.classpath"
                  classpathref="external.dependencies"
                  classpath="${java.classes}"
+		 maxmemory="2g"
                  additionalparam="-build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -out ${basedir}/${resource.path} -quiet">
             <sourcefiles>
                 <union>
@@ -780,6 +781,7 @@
                  docletpathref="doclet.classpath"
                  classpathref="external.dependencies"
                  classpath="${java.classes}"
+		 maxmemory="2g"
                  additionalparam="${gatkdocs.include.hidden.arg} -private -build-timestamp &quot;${build.timestamp}&quot; -absolute-version ${build.version} -quiet"> <!-- -test to only do DocumentationTest walker -->
             <sourcefiles>
                 <fileset refid="java.source.files"/>

From 6055101df8965a3391a19fe686edb8ba85f10487 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 31 Aug 2012 20:10:26 -0400
Subject: [PATCH 03/26] NanoScheduler no longer groups inputs, each map() call
 is interlaced now

-- Maximizes the efficiency of the threads
-- Simplifies interface (yea!)
-- Reduces number of combinatorial tests that need to be performed
---
 .../gatk/traversals/TraverseReadsNano.java    |  4 +-
 .../utils/nanoScheduler/NanoScheduler.java    | 76 ++++++-------------
 .../nanoScheduler/NanoSchedulerUnitTest.java  | 41 +++++-----
 3 files changed, 43 insertions(+), 78 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
index b397cb8c0..dbddeb092 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@@ -55,13 +55,11 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
     /** our log, which we want to capture anything from this class */
     protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
     private static final boolean DEBUG = false;
-    private static final int MIN_GROUP_SIZE = 100;
     final NanoScheduler<MapData, M, T> nanoScheduler;
 
     public TraverseReadsNano(int nThreads) {
         final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
-        final int mapGroupSize = (int)Math.max(Math.ceil(bufferSize / 50.0 + 1), MIN_GROUP_SIZE);
-        nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, mapGroupSize, nThreads);
+        nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, nThreads);
     }
 
     @Override
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 668c82524..5c6aa6a35 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -3,7 +3,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.Iterator;
@@ -47,7 +46,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
 
     final int bufferSize;
-    final int mapGroupSize;
     final int nThreads;
     final ExecutorService executor;
     boolean shutdown = false;
@@ -57,29 +55,15 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * Create a new nanoschedule with the desire characteristics requested by the argument
      *
      * @param bufferSize the number of input elements to read in each scheduling cycle.
-     * @param mapGroupSize How many inputs should be grouped together per map?  If -1 we make a reasonable guess
      * @param nThreads the number of threads to use to get work done, in addition to the thread calling execute
      */
     public NanoScheduler(final int bufferSize,
-                         final int mapGroupSize,
                          final int nThreads) {
         if ( bufferSize < 1 ) throw new IllegalArgumentException("bufferSize must be >= 1, got " + bufferSize);
         if ( nThreads < 1 ) throw new IllegalArgumentException("nThreads must be >= 1, got " + nThreads);
 
-        if ( mapGroupSize > bufferSize ) throw new IllegalArgumentException("mapGroupSize " + mapGroupSize + " must be <= bufferSize " + bufferSize);
-        if ( mapGroupSize == 0 || mapGroupSize < -1 ) throw new IllegalArgumentException("mapGroupSize cannot be <= 0" + mapGroupSize);
-
         this.bufferSize = bufferSize;
         this.nThreads = nThreads;
-
-        if ( mapGroupSize == -1 ) {
-            this.mapGroupSize = (int)Math.ceil(this.bufferSize / (10.0*this.nThreads));
-            logger.info(String.format("Dynamically setting grouping size to %d based on buffer size %d and n threads %d",
-                    this.mapGroupSize, this.bufferSize, this.nThreads));
-        } else {
-            this.mapGroupSize = mapGroupSize;
-        }
-
         this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
     }
 
@@ -101,15 +85,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         return bufferSize;
     }
 
-    /**
-     * The grouping size used by this NanoScheduler
-     * @return
-     */
-    @Ensures("result > 0")
-    public int getMapGroupSize() {
-        return mapGroupSize;
-    }
-
     /**
      * Tells this nanoScheduler to shutdown immediately, releasing all its resources.
      *
@@ -214,10 +189,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                 final List<InputType> inputs = readInputs(inputReader);
 
                 // send jobs for map
-                final Queue<Future<List<MapType>>> mapQueue = submitMapJobs(map, executor, inputs);
+                final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
 
                 // send off the reduce job, and block until we get at least one reduce result
-                sum = reduceParallel(reduce, mapQueue, sum);
+                sum = reduceSerial(reduce, mapQueue, sum);
             } catch (InterruptedException ex) {
                 throw new ReviewedStingException("got execution exception", ex);
             } catch (ExecutionException ex) {
@@ -229,16 +204,16 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     }
 
     @Requires({"reduce != null", "! mapQueue.isEmpty()"})
-    private ReduceType reduceParallel(final ReduceFunction<MapType, ReduceType> reduce,
-                                      final Queue<Future<List<MapType>>> mapQueue,
-                                      final ReduceType initSum)
+    private ReduceType reduceSerial(final ReduceFunction<MapType, ReduceType> reduce,
+                                    final Queue<Future<MapType>> mapQueue,
+                                    final ReduceType initSum)
             throws InterruptedException, ExecutionException {
         ReduceType sum = initSum;
 
         // while mapQueue has something in it to reduce
-        for ( final Future<List<MapType>> future : mapQueue ) {
-            for ( final MapType value : future.get() ) // block until we get the values for this task
-                sum = reduce.apply(value, sum);
+        for ( final Future<MapType> future : mapQueue ) {
+            final MapType value = future.get(); // block until we get the values for this task
+            sum = reduce.apply(value, sum);
         }
 
         return sum;
@@ -247,7 +222,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     /**
      * Read up to inputBufferSize elements from inputReader
      *
-     * @return a queue of inputs read in, containing one or more values of InputType read in
+     * @return a queue of input read in, containing one or more values of InputType read in
      */
     @Requires("inputReader.hasNext()")
     @Ensures("!result.isEmpty()")
@@ -263,14 +238,14 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     }
 
     @Requires({"map != null", "! inputs.isEmpty()"})
-    private Queue<Future<List<MapType>>> submitMapJobs(final MapFunction<InputType, MapType> map,
-                                                       final ExecutorService executor,
-                                                       final List<InputType> inputs) {
-        final Queue<Future<List<MapType>>> mapQueue = new LinkedList<Future<List<MapType>>>();
+    private Queue<Future<MapType>> submitMapJobs(final MapFunction<InputType, MapType> map,
+                                                 final ExecutorService executor,
+                                                 final List<InputType> inputs) {
+        final Queue<Future<MapType>> mapQueue = new LinkedList<Future<MapType>>();
 
-        for ( final List<InputType> subinputs : Utils.groupList(inputs, getMapGroupSize()) ) {
-            final CallableMap doMap = new CallableMap(map, subinputs);
-            final Future<List<MapType>> future = executor.submit(doMap);
+        for ( final InputType input : inputs ) {
+            final CallableMap doMap = new CallableMap(map, input);
+            final Future<MapType> future = executor.submit(doMap);
             mapQueue.add(future);
         }
 
@@ -280,23 +255,18 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     /**
      * A simple callable version of the map function for use with the executor pool
      */
-    private class CallableMap implements Callable<List<MapType>> {
-        final List<InputType> inputs;
+    private class CallableMap implements Callable<MapType> {
+        final InputType input;
         final MapFunction<InputType, MapType> map;
 
-        @Requires({"map != null", "inputs.size() <= getMapGroupSize()"})
-        private CallableMap(final MapFunction<InputType, MapType> map, final List<InputType> inputs) {
-            this.inputs = inputs;
+        @Requires({"map != null"})
+        private CallableMap(final MapFunction<InputType, MapType> map, final InputType inputs) {
+            this.input = inputs;
             this.map = map;
         }
 
-        @Ensures("result.size() == inputs.size()")
-        @Override public List<MapType> call() throws Exception {
-            final List<MapType> outputs = new LinkedList<MapType>();
-            for ( final InputType input : inputs )
-                outputs.add(map.apply(input));
-            debugPrint("    Processed %d elements with map", outputs.size());
-            return outputs;
+        @Override public MapType call() throws Exception {
+            return map.apply(input);
         }
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index 89506dcb1..1dcc243f2 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -5,7 +5,10 @@ import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Iterator;
+import java.util.List;
 
 /**
  * UnitTests for the NanoScheduler
@@ -39,18 +42,17 @@ public class NanoSchedulerUnitTest extends BaseTest {
     }
 
     private static class NanoSchedulerBasicTest extends TestDataProvider {
-        final int bufferSize, mapGroupSize, nThreads, start, end, expectedResult;
+        final int bufferSize, nThreads, start, end, expectedResult;
 
-        public NanoSchedulerBasicTest(final int bufferSize, final int mapGroupSize, final int nThreads, final int start, final int end) {
+        public NanoSchedulerBasicTest(final int bufferSize, final int nThreads, final int start, final int end) {
             super(NanoSchedulerBasicTest.class);
             this.bufferSize = bufferSize;
-            this.mapGroupSize = mapGroupSize;
             this.nThreads = nThreads;
             this.start = start;
             this.end = end;
             this.expectedResult = sum2x(start, end);
-            setName(String.format("%s nt=%d buf=%d mapGroupSize=%d start=%d end=%d sum=%d",
-                    getClass().getSimpleName(), nThreads, bufferSize, mapGroupSize, start, end, expectedResult));
+            setName(String.format("%s nt=%d buf=%d start=%d end=%d sum=%d",
+                    getClass().getSimpleName(), nThreads, bufferSize, start, end, expectedResult));
         }
 
         public Iterator<Integer> makeReader() {
@@ -69,14 +71,10 @@ public class NanoSchedulerUnitTest extends BaseTest {
     @DataProvider(name = "NanoSchedulerBasicTest")
     public Object[][] createNanoSchedulerBasicTest() {
         for ( final int bufferSize : Arrays.asList(1, 10, 1000, 1000000) ) {
-            for ( final int mapGroupSize : Arrays.asList(-1, 1, 10, 100, 1000) ) {
-                if ( mapGroupSize <= bufferSize ) {
-                    for ( final int nt : Arrays.asList(1, 2, 4) ) {
-                        for ( final int start : Arrays.asList(0) ) {
-                            for ( final int end : Arrays.asList(1, 2, 11, 10000, 100000) ) {
-                                exampleTest = new NanoSchedulerBasicTest(bufferSize, mapGroupSize, nt, start, end);
-                            }
-                        }
+            for ( final int nt : Arrays.asList(1, 2, 4) ) {
+                for ( final int start : Arrays.asList(0) ) {
+                    for ( final int end : Arrays.asList(1, 2, 11, 10000, 100000) ) {
+                        exampleTest = new NanoSchedulerBasicTest(bufferSize, nt, start, end);
                     }
                 }
             }
@@ -101,10 +99,9 @@ public class NanoSchedulerUnitTest extends BaseTest {
 
     private void testNanoScheduler(final NanoSchedulerBasicTest test) throws InterruptedException {
         final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
-                new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.mapGroupSize, test.nThreads);
+                new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
 
         Assert.assertEquals(nanoScheduler.getBufferSize(), test.bufferSize, "bufferSize argument");
-        Assert.assertTrue(nanoScheduler.getMapGroupSize() >= test.mapGroupSize, "mapGroupSize argument");
         Assert.assertEquals(nanoScheduler.getnThreads(), test.nThreads, "nThreads argument");
 
         final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
@@ -115,11 +112,11 @@ public class NanoSchedulerUnitTest extends BaseTest {
 
     @Test(enabled = true, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
     public void testNanoSchedulerInLoop(final NanoSchedulerBasicTest test) throws InterruptedException {
-        if ( test.bufferSize > 1 && (test.mapGroupSize > 1 || test.mapGroupSize == -1)) {
+        if ( test.bufferSize > 1) {
             logger.warn("Running " + test);
 
             final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
-                    new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.mapGroupSize, test.nThreads);
+                    new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
 
             // test reusing the scheduler
             for ( int i = 0; i < 10; i++ ) {
@@ -134,7 +131,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
 
     @Test(timeOut = NANO_SCHEDULE_MAX_RUNTIME)
     public void testShutdown() throws InterruptedException {
-        final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 1, 2);
+        final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
         Assert.assertFalse(nanoScheduler.isShutdown(), "scheduler should be alive");
         nanoScheduler.shutdown();
         Assert.assertTrue(nanoScheduler.isShutdown(), "scheduler should be dead");
@@ -142,15 +139,15 @@ public class NanoSchedulerUnitTest extends BaseTest {
 
     @Test(expectedExceptions = IllegalStateException.class, timeOut = NANO_SCHEDULE_MAX_RUNTIME)
     public void testShutdownExecuteFailure() throws InterruptedException {
-        final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 1, 2);
+        final NanoScheduler<Integer, Integer, Integer> nanoScheduler = new NanoScheduler<Integer, Integer, Integer>(1, 2);
         nanoScheduler.shutdown();
         nanoScheduler.execute(exampleTest.makeReader(), exampleTest.makeMap(), exampleTest.initReduce(), exampleTest.makeReduce());
     }
 
     public static void main(String [ ] args) {
-        final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, 100, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
+        final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
         final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
-                new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.mapGroupSize, test.nThreads);
+                new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
 
         final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
         System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);

From e01258b2615609e925f2deb3dd886bae6b08402a Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sat, 1 Sep 2012 11:51:31 -0400
Subject: [PATCH 04/26] NanoScheduler now supports printProgress.  Bugfixes to
 printProgress

-- TraverseReadsNano prints progress at the end of each traversal unit
-- Fix bugs in TraversalEngine printProgress
    -- Synchronize the method so we don't get multiple logged outputs when two or more HMSs call printProgress before initialization at the start!
    -- Fix the logic for mustPrint, which actually had the logic of mustNotPrint.  Now we see the done log line that was always supposed to be there
    -- Fix output formatting, as the done() line was incorrectly shifting over the % complete by 1 char as 100.0% didn't fit in %4.1f
-- Add clearer doc on -PF argument so that people know that the performance log can be generated to standard out if one wants
---
 .../arguments/GATKArgumentCollection.java     | 10 +++++-
 .../gatk/traversals/TraversalEngine.java      | 33 +++++++++++++++----
 .../gatk/traversals/TraverseReadsNano.java    | 18 +++++++---
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 72cb5e02f..6be66b204 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -41,7 +41,9 @@ import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 
 import java.io.File;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
 
 /**
  * @author aaron
@@ -197,6 +199,12 @@ public class GATKArgumentCollection {
     // performance log arguments
     //
     // --------------------------------------------------------------------------------------------------------------
+
+    /**
+     * The file name for the GATK performance log output, or null if you don't want to generate the
+     * detailed performance logging table.  This table is suitable for importing into R or any
+     * other analysis software that can read tsv files
+     */
     @Argument(fullName = "performanceLog", shortName="PF", doc="If provided, a GATK runtime performance log will be written to this file", required = false)
     public File performanceLog = null;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
index abc71e549..198f9342e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@@ -189,12 +189,26 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     /**
      * Forward request to printProgress
      *
+     * Assumes that one cycle has been completed
+     *
      * @param shard the given shard currently being processed.
      * @param loc  the location
      */
     public void printProgress(Shard shard, GenomeLoc loc) {
         // A bypass is inserted here for unit testing.
-        printProgress(loc,shard.getReadMetrics(),false);
+        printProgress(loc,shard.getReadMetrics(),false, 1);
+    }
+
+    /**
+     * Forward request to printProgress
+     *
+     * @param shard the given shard currently being processed.
+     * @param loc  the location
+     * @param nElapsedCycles the number of cycles (turns of map) that have occurred since the last call
+     */
+    public void printProgress(Shard shard, GenomeLoc loc, int nElapsedCycles) {
+        // A bypass is inserted here for unit testing.
+        printProgress(loc,shard.getReadMetrics(),false, nElapsedCycles);
     }
 
     /**
@@ -205,12 +219,16 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * @param metrics   Data processed since the last cumulative
      * @param mustPrint If true, will print out info, regardless of nRecords or time interval
      */
-    private void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint) {
-        if ( mustPrint || printProgressCheckCounter++ % PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES != 0 )
+    private synchronized void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint, int nElapsedCycles) {
+        final int previousPrintCycle = printProgressCheckCounter / PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES;
+        final int newPrintCycle = (printProgressCheckCounter+nElapsedCycles) / PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES;
+
+        printProgressCheckCounter += nElapsedCycles; // keep track of our number of cycles through printProgress
+        if ( newPrintCycle == previousPrintCycle && ! mustPrint )
             // don't do any work more often than PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES
             return;
 
-        if(!progressMeterInitialized && mustPrint == false ) {
+        if( ! progressMeterInitialized ) {
             logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]");
             logger.info(String.format("%15s processed.%s  runtime per.1M.%s completed total.runtime remaining",
                     "Location", getTraversalType(), getTraversalType()));
@@ -250,8 +268,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
                 else
                     PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
 
-                logger.info(String.format("%15s        %5.2e %s     %s     %4.1f%%      %s  %s",
-                        loc == null ? "done with mapped reads" : loc, nRecords*1.0, elapsed, unitRate,
+                final String posName = loc == null ? (mustPrint ? "done" : "unmapped reads") : Integer.toString(loc.getStart());
+                logger.info(String.format("%15s        %5.2e %s     %s    %5.1f%%      %s  %s",
+                        posName, nRecords*1.0, elapsed, unitRate,
                         100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
 
             }
@@ -309,7 +328,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * Called after a traversal to print out information about the traversal process
      */
     public void printOnTraversalDone() {
-        printProgress(null, null, true);
+        printProgress(null, null, true, 1);
 
         final double elapsed = timer == null ? 0 : timer.getElapsedTime();
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
index dbddeb092..2ada8bbfa 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@@ -34,6 +34,7 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadView;
 import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
 import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
 import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
@@ -87,9 +88,15 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
         final TraverseReadsMap myMap = new TraverseReadsMap(walker);
         final TraverseReadsReduce myReduce = new TraverseReadsReduce(walker);
 
-        T result = nanoScheduler.execute(aggregateMapData(dataProvider).iterator(), myMap, sum, myReduce);
-        // TODO -- how do we print progress?
-        //printProgress(dataProvider.getShard(), ???);
+        final List<MapData> aggregatedInputs = aggregateMapData(dataProvider);
+        final T result = nanoScheduler.execute(aggregatedInputs.iterator(), myMap, sum, myReduce);
+
+        final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
+        final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
+        printProgress(dataProvider.getShard(), locus, aggregatedInputs.size());
+
+        // TODO -- how can I get done value?
+        // done = walker.isDone();
 
         return result;
     }
@@ -165,8 +172,9 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
                     return walker.map(data.refContext, data.read, data.tracker);
                 }
             }
-
-            return null; // TODO -- what should we return in the case where the walker is done or the read is filtered?
+            // TODO -- how can we cleanly support done and filtered.  Need to return
+            // TODO -- a MapResult object that says the status
+            return null;
         }
     }
 }

From 7087b22ea397c96a78a9dbc2bc98558d80343cea Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sat, 1 Sep 2012 12:28:54 -0400
Subject: [PATCH 05/26] No debugging output (even conditional) for
 ReadTransformers in PrintReads

---
 .../src/org/broadinstitute/sting/gatk/walkers/PrintReads.java    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
index a5d4b45b6..4118617fc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
@@ -228,7 +228,6 @@ public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> impleme
         GATKSAMRecord workingRead = readIn;
 
         for ( final ReadTransformer transformer : readTransformers ) {
-            if ( logger.isDebugEnabled() ) logger.debug("Applying transformer " + transformer + " to read " + readIn.getReadName());
             workingRead = transformer.apply(workingRead);
         }
 

From 800a27c3a701bef87bd8210b0dddf080c1555068 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sat, 1 Sep 2012 12:29:59 -0400
Subject: [PATCH 06/26] NanoScheduler tracks time within input, map, and reduce

-- Helpful for understanding where the time goes to each bit of the code.
-- Controlled by a local static boolean, to avoid the potential overhead in general
---
 .../utils/nanoScheduler/NanoScheduler.java    | 40 ++++++++++++++++---
 1 file changed, 35 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 5c6aa6a35..39b541944 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -3,6 +3,8 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.AutoFormattingTime;
+import org.broadinstitute.sting.utils.SimpleTimer;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.Iterator;
@@ -44,6 +46,7 @@ import java.util.concurrent.*;
 public class NanoScheduler<InputType, MapType, ReduceType> {
     private final static Logger logger = Logger.getLogger(NanoScheduler.class);
     private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
+    private final static boolean TIME_CALLS = true;
 
     final int bufferSize;
     final int nThreads;
@@ -51,6 +54,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     boolean shutdown = false;
     boolean debug = false;
 
+    final SimpleTimer inputTimer = new SimpleTimer();
+    final SimpleTimer mapTimer = new SimpleTimer();
+    final SimpleTimer reduceTimer = new SimpleTimer();
+
     /**
      * Create a new nanoschedule with the desire characteristics requested by the argument
      *
@@ -97,6 +104,19 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                 throw new IllegalStateException("Remaining tasks found in the executor, unexpected behavior!");
         }
         shutdown = true;
+
+        if (TIME_CALLS) {
+            printTimerInfo("Input  time", inputTimer);
+            printTimerInfo("Map    time", mapTimer);
+            printTimerInfo("Reduce time", reduceTimer);
+        }
+    }
+
+    private void printTimerInfo(final String label, final SimpleTimer timer) {
+        final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime() + reduceTimer.getElapsedTime();
+        final double myTimeInSec = timer.getElapsedTime();
+        final double myTimePercent = myTimeInSec / total * 100;
+        logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
     }
 
     /**
@@ -134,10 +154,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * It is safe to call this function repeatedly on a single nanoScheduler, at least until the
      * shutdown method is called.
      *
-     * @param inputReader
-     * @param map
-     * @param reduce
-     * @return
+     * @param inputReader an iterator providing us with the input data to nanoSchedule map/reduce over
+     * @param map the map function from input type -> map type, will be applied in parallel to each input
+     * @param reduce the reduce function from map type + reduce type -> reduce type to be applied in order to map results
+     * @return the last reduce value
      */
     public ReduceType execute(final Iterator<InputType> inputReader,
                               final MapFunction<InputType, MapType> map,
@@ -213,7 +233,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         // while mapQueue has something in it to reduce
         for ( final Future<MapType> future : mapQueue ) {
             final MapType value = future.get(); // block until we get the values for this task
+
+            if ( TIME_CALLS) reduceTimer.restart();
             sum = reduce.apply(value, sum);
+            if ( TIME_CALLS) reduceTimer.stop();
         }
 
         return sum;
@@ -229,11 +252,15 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     private List<InputType> readInputs(final Iterator<InputType> inputReader) {
         int n = 0;
         final List<InputType> inputs = new LinkedList<InputType>();
+
+        if ( TIME_CALLS) inputTimer.restart();
         while ( inputReader.hasNext() && n < getBufferSize() ) {
             final InputType input = inputReader.next();
             inputs.add(input);
             n++;
         }
+        if ( TIME_CALLS) inputTimer.stop();
+
         return inputs;
     }
 
@@ -266,7 +293,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         }
 
         @Override public MapType call() throws Exception {
-            return map.apply(input);
+            if ( TIME_CALLS) mapTimer.restart();
+            final MapType result = map.apply(input);
+            if ( TIME_CALLS) mapTimer.stop();
+            return result;
         }
     }
 }

From 59109d5eeb8798bb2c6eabf4b987837fc693b951 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sun, 2 Sep 2012 10:54:18 -0400
Subject: [PATCH 07/26] NanoScheduler tracks time outside of its execute call

---
 .../utils/nanoScheduler/NanoScheduler.java    | 25 ++++++++++++++-----
 1 file changed, 19 insertions(+), 6 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 39b541944..a6be6ad6d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -54,6 +54,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     boolean shutdown = false;
     boolean debug = false;
 
+    final SimpleTimer outsideSchedulerTimer = new SimpleTimer();
     final SimpleTimer inputTimer = new SimpleTimer();
     final SimpleTimer mapTimer = new SimpleTimer();
     final SimpleTimer reduceTimer = new SimpleTimer();
@@ -72,6 +73,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         this.bufferSize = bufferSize;
         this.nThreads = nThreads;
         this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
+
+        // start timing the time spent outside of the nanoScheduler
+        outsideSchedulerTimer.start();
     }
 
     /**
@@ -98,6 +102,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * After this call, execute cannot be invoked without throwing an error
      */
     public void shutdown() {
+        outsideSchedulerTimer.stop();
+
         if ( executor != null ) {
             final List<Runnable> remaining = executor.shutdownNow();
             if ( ! remaining.isEmpty() )
@@ -106,14 +112,16 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         shutdown = true;
 
         if (TIME_CALLS) {
-            printTimerInfo("Input  time", inputTimer);
-            printTimerInfo("Map    time", mapTimer);
-            printTimerInfo("Reduce time", reduceTimer);
+            printTimerInfo("Input   time", inputTimer);
+            printTimerInfo("Map     time", mapTimer);
+            printTimerInfo("Reduce  time", reduceTimer);
+            printTimerInfo("Outside time", outsideSchedulerTimer);
         }
     }
 
     private void printTimerInfo(final String label, final SimpleTimer timer) {
-        final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime() + reduceTimer.getElapsedTime();
+        final double total = inputTimer.getElapsedTime() + mapTimer.getElapsedTime()
+                + reduceTimer.getElapsedTime() + outsideSchedulerTimer.getElapsedTime();
         final double myTimeInSec = timer.getElapsedTime();
         final double myTimePercent = myTimeInSec / total * 100;
         logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
@@ -168,11 +176,16 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
         if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
 
+        outsideSchedulerTimer.stop();
+        ReduceType result;
         if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
-            return executeSingleThreaded(inputReader, map, initialValue, reduce);
+            result = executeSingleThreaded(inputReader, map, initialValue, reduce);
         } else {
-            return executeMultiThreaded(inputReader, map, initialValue, reduce);
+            result = executeMultiThreaded(inputReader, map, initialValue, reduce);
         }
+
+        outsideSchedulerTimer.restart();
+        return result;
     }
 
     /**

From 6a5a70cdf1a80751d1fe54594c0d0d2ee6a3fa87 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sun, 2 Sep 2012 11:37:30 -0400
Subject: [PATCH 08/26] Done GSA-539: SimpleTimer should use System.nanoTime
 for nanoSecond resolution

---
 .../sting/utils/SimpleTimer.java              | 89 ++++++++++++++-----
 .../sting/utils/SimpleTimerUnitTest.java      | 63 ++++++++++++-
 2 files changed, 128 insertions(+), 24 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java
index 15d34a348..b3a9986c5 100644
--- a/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/SimpleTimer.java
@@ -1,18 +1,42 @@
 package org.broadinstitute.sting.utils;
 
 
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.util.concurrent.TimeUnit;
+
 /**
- * A useful simple system for timing code.  This code is not thread safe!
+ * A useful simple system for timing code with nano second resolution
+ *
+ * Note that this code is not thread-safe.  If you have a single timer
+ * being started and stopped by multiple threads you will need to protect the
+ * calls to avoid meaningless results of having multiple starts and stops
+ * called sequentially.
  *
  * User: depristo
  * Date: Dec 10, 2010
  * Time: 9:07:44 AM
  */
 public class SimpleTimer {
-    final private String name;
-    private long elapsed = 0l;
-    private long startTime = 0l;
-    boolean running = false;
+    protected static final double NANO_TO_SECOND_DOUBLE = 1.0 / TimeUnit.SECONDS.toNanos(1);
+    private final String name;
+
+    /**
+     * The elapsedTimeNano time in nanoSeconds of this timer.  The elapsedTimeNano time is the
+     * sum of times between starts/restrats and stops.
+     */
+    private long elapsedTimeNano = 0l;
+
+    /**
+     * The start time of the last start/restart in nanoSeconds
+     */
+    private long startTimeNano = 0l;
+
+    /**
+     * Is this timer currently running (i.e., the last call was start/restart)
+     */
+    private boolean running = false;
 
     /**
      * Creates an anonymous simple timer
@@ -25,7 +49,8 @@ public class SimpleTimer {
      * Creates a simple timer named name
      * @param name of the timer, must not be null
      */
-    public SimpleTimer(String name) {
+    public SimpleTimer(final String name) {
+        if ( name == null ) throw new IllegalArgumentException("SimpleTimer name cannot be null");
         this.name = name;
     }
 
@@ -37,27 +62,27 @@ public class SimpleTimer {
     }
 
     /**
-     * Starts the timer running, and sets the elapsed time to 0.  This is equivalent to
+     * Starts the timer running, and sets the elapsedTimeNano time to 0.  This is equivalent to
      * resetting the time to have no history at all.
      *
      * @return this object, for programming convenience
      */
+    @Ensures("elapsedTimeNano == 0l")
     public synchronized SimpleTimer start() {
-        elapsed = 0l;
-        restart();
-        return this;
+        elapsedTimeNano = 0l;
+        return restart();
     }
 
     /**
-     * Starts the timer running, without reseting the elapsed time.  This function may be
+     * Starts the timer running, without resetting the elapsedTimeNano time.  This function may be
      * called without first calling start().  The only difference between start and restart
-     * is that start resets the elapsed time, while restart does not.
+     * is that start resets the elapsedTimeNano time, while restart does not.
      *
      * @return this object, for programming convenience
      */
     public synchronized SimpleTimer restart() {
         running = true;
-        startTime = currentTime();
+        startTimeNano = currentTimeNano();
         return this;
     }
 
@@ -71,29 +96,53 @@ public class SimpleTimer {
     /**
      * @return A convenience function to obtain the current time in milliseconds from this timer
      */
-    public synchronized long currentTime() {
+    public long currentTime() {
         return System.currentTimeMillis();
     }
 
     /**
-     * Stops the timer.  Increases the elapsed time by difference between start and now.  The
-     * timer must be running in order to call stop
+     * @return A convenience function to obtain the current time in nanoSeconds from this timer
+     */
+    public long currentTimeNano() {
+        return System.nanoTime();
+    }
+
+    /**
+     * Stops the timer.  Increases the elapsedTimeNano time by difference between start and now.
+     *
+     * It's ok to call stop on a timer that's not running.  It has no effect on the timer.
      *
      * @return this object, for programming convenience
      */
+    @Requires("startTimeNano != 0l")
     public synchronized SimpleTimer stop() {
-        running = false;
-        elapsed += currentTime() - startTime;
+        if ( running ) {
+            running = false;
+            elapsedTimeNano += currentTimeNano() - startTimeNano;
+        }
         return this;
     }
 
     /**
-     * Returns the total elapsed time of all start/stops of this timer.  If the timer is currently
+     * Returns the total elapsedTimeNano time of all start/stops of this timer.  If the timer is currently
      * running, includes the difference from currentTime() and the start as well
      *
      * @return this time, in seconds
      */
     public synchronized double getElapsedTime() {
-        return (running ? (currentTime() - startTime + elapsed) : elapsed) / 1000.0;
+        return nanoToSecondsAsDouble(getElapsedTimeNano());
+    }
+
+    protected static double nanoToSecondsAsDouble(final long nano) {
+        return nano * NANO_TO_SECOND_DOUBLE;
+    }
+
+    /**
+     * @see #getElapsedTime() but returns the result in nanoseconds
+     *
+     * @return the elapsed time in nanoseconds
+     */
+    public synchronized long getElapsedTimeNano() {
+        return running ? (currentTimeNano() - startTimeNano + elapsedTimeNano) : elapsedTimeNano;
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
index 7a2696b7b..7285c00ac 100755
--- a/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
@@ -1,12 +1,12 @@
 package org.broadinstitute.sting.utils;
 
 import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
-import java.io.File;
+import java.util.Arrays;
+import java.util.List;
+import java.util.concurrent.TimeUnit;
 
 public class SimpleTimerUnitTest extends BaseTest {
     private final static String NAME = "unit.test.timer";
@@ -17,33 +17,88 @@ public class SimpleTimerUnitTest extends BaseTest {
         Assert.assertEquals(t.getName(), NAME, "Name is not the provided one");
         Assert.assertFalse(t.isRunning(), "Initial state of the timer is running");
         Assert.assertEquals(t.getElapsedTime(), 0.0, "New timer elapsed time should be 0");
+        Assert.assertEquals(t.getElapsedTimeNano(), 0l, "New timer elapsed time nano should be 0");
 
         t.start();
         Assert.assertTrue(t.isRunning(), "Started timer isn't running");
         Assert.assertTrue(t.getElapsedTime() >= 0.0, "Elapsed time should be >= 0");
+        Assert.assertTrue(t.getElapsedTimeNano() >= 0.0, "Elapsed time nano should be >= 0");
+        long n1 = t.getElapsedTimeNano();
         double t1 = t.getElapsedTime();
         idleLoop(); // idle loop to wait a tiny bit of time
+        long n2 = t.getElapsedTimeNano();
         double t2 = t.getElapsedTime();
         Assert.assertTrue(t2 >= t1, "T2 >= T1 for a running time");
+        Assert.assertTrue(n2 >= n1, "T2 >= T1 nano for a running time");
 
         t.stop();
         Assert.assertFalse(t.isRunning(), "Stopped timer still running");
+        long n3 = t.getElapsedTimeNano();
         double t3 = t.getElapsedTime();
         idleLoop(); // idle loop to wait a tiny bit of time
         double t4 = t.getElapsedTime();
+        long n4 = t.getElapsedTimeNano();
         Assert.assertTrue(t4 == t3, "Elapsed times for two calls of stop timer not the same");
+        Assert.assertTrue(n4 == n3, "Elapsed times for two calls of stop timer not the same");
 
         t.restart();
         idleLoop(); // idle loop to wait a tiny bit of time
         double t5 = t.getElapsedTime();
+        long n5 = t.getElapsedTimeNano();
         Assert.assertTrue(t.isRunning(), "Restarted timer should be running");
         idleLoop(); // idle loop to wait a tiny bit of time
         double t6 = t.getElapsedTime();
+        long n6 = t.getElapsedTimeNano();
         Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
         Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
+        Assert.assertTrue(n5 >= n4, "Restarted timer elapsed time nano should be after elapsed time preceding the restart");
+        Assert.assertTrue(n6 >= n5, "Second elapsed time nano not after the first in restarted timer");
+
+        final List<Double> secondTimes = Arrays.asList(t1, t2, t3, t4, t5, t6);
+        final List<Long> nanoTimes     = Arrays.asList(n1, n2, n3, n4, n5, n6);
+        for ( int i = 0; i < nanoTimes.size(); i++ )
+            Assert.assertEquals(
+                    SimpleTimer.nanoToSecondsAsDouble(nanoTimes.get(i)),
+                    secondTimes.get(i), 1e-1, "Nanosecond and second timer disagree");
     }
 
-    private final static void idleLoop() {
+    @Test
+    public void testNanoResolution() {
+        SimpleTimer t = new SimpleTimer(NAME);
+
+        // test the nanosecond resolution
+        long n7 = t.currentTimeNano();
+        int sum = 0;
+        for ( int i = 0; i < 100; i++) sum += i;
+        long n8 = t.currentTimeNano();
+        final long delta = n8 - n7;
+        final long oneMilliInNano = TimeUnit.MILLISECONDS.toNanos(1);
+        logger.warn("nanoTime before nano operation " + n7);
+        logger.warn("nanoTime after nano operation of summing 100 ints " + n8 + ", sum = " + sum + " time delta " + delta + " vs. 1 millsecond in nano " + oneMilliInNano);
+        Assert.assertTrue(n8 > n7, "SimpleTimer doesn't appear to have nanoSecond resolution: n8 " + n8 + " <= n7 " + n7);
+        Assert.assertTrue(delta < oneMilliInNano,
+                "SimpleTimer doesn't appear to have nanoSecond resolution: time delta is " + delta + " vs 1 millisecond in nano " + oneMilliInNano);
+    }
+
+    @Test
+    public void testMeaningfulTimes() {
+        SimpleTimer t = new SimpleTimer(NAME);
+
+        t.start();
+        for ( int i = 0; i < 100; i++ ) ;
+        long nano = t.getElapsedTimeNano();
+        double secs = t.getElapsedTime();
+
+        Assert.assertTrue(secs > 0, "Seconds timer doesn't appear to count properly: elapsed time is " + secs);
+        Assert.assertTrue(secs < 0.01, "Fast operation said to take longer than 10 milliseconds: elapsed time in seconds " + secs);
+
+        Assert.assertTrue(nano > 0, "Nanosecond timer doesn't appear to count properly: elapsed time is " + nano);
+        final long maxTimeInMicro = 100;
+        final long maxTimeInNano = TimeUnit.MICROSECONDS.toNanos(100);
+        Assert.assertTrue(nano < maxTimeInNano, "Fast operation said to take longer than " + maxTimeInMicro + " microseconds: elapsed time in nano " + nano + " micro " + TimeUnit.NANOSECONDS.toMicros(nano));
+    }
+
+    private static void idleLoop() {
         for ( int i = 0; i < 100000; i++ ) ; // idle loop to wait a tiny bit of time
     }
 }
\ No newline at end of file

From 1a8f5fc374994b06f16d2a6cc987a2720d42b144 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sun, 2 Sep 2012 11:53:59 -0400
Subject: [PATCH 09/26] Trivial cleanup of NanoScheduler

---
 .../sting/utils/nanoScheduler/NanoScheduler.java          | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index a6be6ad6d..1ef4d3950 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -54,10 +54,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     boolean shutdown = false;
     boolean debug = false;
 
-    final SimpleTimer outsideSchedulerTimer = new SimpleTimer();
-    final SimpleTimer inputTimer = new SimpleTimer();
-    final SimpleTimer mapTimer = new SimpleTimer();
-    final SimpleTimer reduceTimer = new SimpleTimer();
+    final SimpleTimer outsideSchedulerTimer = new SimpleTimer("outside");
+    final SimpleTimer inputTimer = new SimpleTimer("input");
+    final SimpleTimer mapTimer = new SimpleTimer("map");
+    final SimpleTimer reduceTimer = new SimpleTimer("reduce");
 
     /**
      * Create a new nanoschedule with the desire characteristics requested by the argument

From 9823102c0cceea72beb0db689631a1ebeade9978 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sun, 2 Sep 2012 12:16:56 -0400
Subject: [PATCH 10/26] TraverseReadsNano supports walker.filter and
 walker.done

-- Instead of returning directly the result of map(), returns a MapResult object with the value and a reduceMe flag.
-- Reduce function respects the reduceMe flag
-- Code cleanup and more documentation
---
 .../gatk/traversals/TraverseReadsNano.java    | 120 +++++++++++++-----
 1 file changed, 86 insertions(+), 34 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
index 2ada8bbfa..4bb700c37 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@@ -40,27 +40,28 @@ import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
 import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
-import java.util.ArrayList;
+import java.util.LinkedList;
 import java.util.List;
 
 /**
- * @author aaron
+ * A nano-scheduling version of TraverseReads.
+ *
+ * Implements the traversal of a walker that accepts individual reads, the reference, and
+ * RODs per map call.  Directly supports shared memory parallelism via NanoScheduler
+ *
+ * @author depristo
  * @version 1.0
- * @date Apr 24, 2009
- * <p/>
- * Class TraverseReads
- * <p/>
- * This class handles traversing by reads in the new shardable style
+ * @date 9/2/2012
  */
 public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,ReadShardDataProvider> {
     /** our log, which we want to capture anything from this class */
     protected static final Logger logger = Logger.getLogger(TraverseReadsNano.class);
     private static final boolean DEBUG = false;
-    final NanoScheduler<MapData, M, T> nanoScheduler;
+    final NanoScheduler<MapData, MapResult, T> nanoScheduler;
 
     public TraverseReadsNano(int nThreads) {
         final int bufferSize = ReadShard.getReadBufferSize() + 1; // actually has 1 more than max
-        nanoScheduler = new NanoScheduler<MapData, M, T>(bufferSize, nThreads);
+        nanoScheduler = new NanoScheduler<MapData, MapResult, T>(bufferSize, nThreads);
     }
 
     @Override
@@ -95,18 +96,23 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
         final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
         printProgress(dataProvider.getShard(), locus, aggregatedInputs.size());
 
-        // TODO -- how can I get done value?
-        // done = walker.isDone();
-
         return result;
     }
 
+    /**
+     * Aggregate all of the inputs for all map calls into MapData, to be provided
+     * to NanoScheduler for Map/Reduce
+     *
+     * @param dataProvider the source of our data
+     * @return a linked list of MapData objects holding the read, ref, and ROD info for every map/reduce
+     *          should execute
+     */
     private List<MapData> aggregateMapData(final ReadShardDataProvider dataProvider) {
         final ReadView reads = new ReadView(dataProvider);
         final ReadReferenceView reference = new ReadReferenceView(dataProvider);
         final ReadBasedReferenceOrderedView rodView = new ReadBasedReferenceOrderedView(dataProvider);
 
-        final List<MapData> mapData = new ArrayList<MapData>();  // TODO -- need size of reads
+        final List<MapData> mapData = new LinkedList<MapData>();
         for ( final SAMRecord read : reads ) {
             final ReferenceContext refContext = ! read.getReadUnmappedFlag()
                     ? reference.getReferenceContext(read)
@@ -132,19 +138,9 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
         super.printOnTraversalDone();
     }
 
-    private class TraverseReadsReduce implements ReduceFunction<M, T> {
-        final ReadWalker<M,T> walker;
-
-        private TraverseReadsReduce(ReadWalker<M, T> walker) {
-            this.walker = walker;
-        }
-
-        @Override
-        public T apply(M one, T sum) {
-            return walker.reduce(one, sum);
-        }
-    }
-
+    /**
+     * The input data needed for each map call.  The read, the reference, and the RODs
+     */
     private class MapData {
         final GATKSAMRecord read;
         final ReferenceContext refContext;
@@ -157,7 +153,43 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
         }
     }
 
-    private class TraverseReadsMap implements MapFunction<MapData, M> {
+    /**
+     * Contains the results of a map call, indicating whether the call was good, filtered, or done
+     */
+    private class MapResult {
+        final M value;
+        final boolean reduceMe;
+
+        /**
+         * Create a MapResult with value that should be reduced
+         *
+         * @param value the value to reduce
+         */
+        private MapResult(final M value) {
+            this.value = value;
+            this.reduceMe = true;
+        }
+
+        /**
+         * Create a MapResult that shouldn't be reduced
+         */
+        private MapResult() {
+            this.value = null;
+            this.reduceMe = false;
+        }
+    }
+
+    /**
+     * A static object that tells reduce that the result of map should be skipped (filtered or done)
+     */
+    private final MapResult SKIP_REDUCE = new MapResult();
+
+    /**
+     * MapFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Applies walker.map to MapData, returning a MapResult object containing the result
+     */
+    private class TraverseReadsMap implements MapFunction<MapData, MapResult> {
         final ReadWalker<M,T> walker;
 
         private TraverseReadsMap(ReadWalker<M, T> walker) {
@@ -165,16 +197,36 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
         }
 
         @Override
-        public M apply(final MapData data) {
+        public MapResult apply(final MapData data) {
             if ( ! walker.isDone() ) {
                 final boolean keepMeP = walker.filter(data.refContext, data.read);
-                if (keepMeP) {
-                    return walker.map(data.refContext, data.read, data.tracker);
-                }
+                if (keepMeP)
+                    return new MapResult(walker.map(data.refContext, data.read, data.tracker));
             }
-            // TODO -- how can we cleanly support done and filtered.  Need to return
-            // TODO -- a MapResult object that says the status
-            return null;
+
+            return SKIP_REDUCE;
+        }
+    }
+
+    /**
+     * ReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
+     */
+    private class TraverseReadsReduce implements ReduceFunction<MapResult, T> {
+        final ReadWalker<M,T> walker;
+
+        private TraverseReadsReduce(ReadWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public T apply(MapResult one, T sum) {
+            if ( one.reduceMe )
+                // only run reduce on values that aren't DONE or FAILED
+                return walker.reduce(one.value, sum);
+            else
+                return sum;
         }
     }
 }

From d7105223fe7d8bb4848dbc2cfe7ccfbb9709b4b6 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 13:44:39 -0400
Subject: [PATCH 11/26] More debugging output for NanoScheduler when debugging
 is enabled

---
 .../sting/utils/nanoScheduler/NanoScheduler.java               | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 1ef4d3950..f0e77354f 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -220,12 +220,14 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
             try {
                 // read in our input values
                 final List<InputType> inputs = readInputs(inputReader);
+                debugPrint("Enqueuing " + inputs.size() + " elements to map");
 
                 // send jobs for map
                 final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
 
                 // send off the reduce job, and block until we get at least one reduce result
                 sum = reduceSerial(reduce, mapQueue, sum);
+                debugPrint("  Done with cycle of map/reduce");
             } catch (InterruptedException ex) {
                 throw new ReviewedStingException("got execution exception", ex);
             } catch (ExecutionException ex) {
@@ -307,6 +309,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
         @Override public MapType call() throws Exception {
             if ( TIME_CALLS) mapTimer.restart();
+            if ( debug ) debugPrint("\t\tmap " + input);
             final MapType result = map.apply(input);
             if ( TIME_CALLS) mapTimer.stop();
             return result;

From 757e6a016081205e3c78c71ed184c982d63910f6 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 13:45:57 -0400
Subject: [PATCH 12/26] Making Pileup thread-safe

-- Old version relied on out printstream magically sorting output, new version puts the print in reduce
---
 .../sting/gatk/walkers/Pileup.java            | 33 +++++++++++--------
 1 file changed, 19 insertions(+), 14 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
index 52c6e1560..607c83966 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
@@ -52,7 +52,7 @@ import java.util.List;
  * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducible<Integer> {
+public class Pileup extends LocusWalker<String, Integer> implements TreeReducible<Integer> {
 
     private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
 
@@ -70,27 +70,32 @@ public class Pileup extends LocusWalker<Integer, Integer> implements TreeReducib
     @Input(fullName="metadata",shortName="metadata",doc="Add these ROD bindings to the output Pileup", required=false)
     public List<RodBinding<Feature>> rods = Collections.emptyList();
 
-    public void initialize() {
-    }
-
-    public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-
-        String rods = getReferenceOrderedData( tracker );
+    @Override
+    public String map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        final String rods = getReferenceOrderedData( tracker );
 
         ReadBackedPileup basePileup = context.getBasePileup();
-        out.printf("%s %s", basePileup.getPileupString((char)ref.getBase()), rods);
-        if ( SHOW_VERBOSE )
-            out.printf(" %s", createVerboseOutput(basePileup));
-        out.println();
 
-        return 1;
+        final StringBuilder s = new StringBuilder();
+        s.append(String.format("%s %s", basePileup.getPileupString((char)ref.getBase()), rods));
+        if ( SHOW_VERBOSE )
+            s.append(" ").append(createVerboseOutput(basePileup));
+        s.append("\n");
+
+        return s.toString();
     }
 
     // Given result of map function
+    @Override
     public Integer reduceInit() { return 0; }
-    public Integer reduce(Integer value, Integer sum) {
-        return treeReduce(sum,value);
+
+    @Override
+    public Integer reduce(String value, Integer sum) {
+        out.print(value);
+        return sum + 1;
     }
+
+    @Override
     public Integer treeReduce(Integer lhs, Integer rhs) {
         return lhs + rhs;
     }

From d503ed97abd7a4990e3412aa8a934ff0761e847b Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 13:47:40 -0400
Subject: [PATCH 13/26] Mark I NanoScheduling TraverseLoci

-- Refactored TraverseLoci into old linear version and nano scheduling version
-- Temp. GATK argument to say how many nano threads to use
-- Can efficiently scale to 3 threads before blocking on input
---
 .../sting/gatk/ReadMetrics.java               |  10 +-
 .../arguments/GATKArgumentCollection.java     |   4 +
 .../sting/gatk/executive/MicroScheduler.java  |   3 +-
 ...raverseLoci.java => TraverseLociBase.java} |  70 +++---
 .../gatk/traversals/TraverseLociLinear.java   |  48 +++++
 .../gatk/traversals/TraverseLociNano.java     | 200 ++++++++++++++++++
 6 files changed, 293 insertions(+), 42 deletions(-)
 rename public/java/src/org/broadinstitute/sting/gatk/traversals/{TraverseLoci.java => TraverseLociBase.java} (57%)
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
 create mode 100755 public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
index ceaa30f01..bfea0b1e1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/ReadMetrics.java
@@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk;
 import net.sf.picard.filter.SamRecordFilter;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
-import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 import java.util.TreeMap;
@@ -119,11 +118,18 @@ public class ReadMetrics implements Cloneable {
         return nRecords;
     }
 
+    /**
+     * Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed.
+     */
+    public void incrementNumIterations(final long by) {
+        nRecords += by;
+    }
+
     /**
      * Increments the number of 'iterations' (one call of filter/map/reduce sequence) completed.
      */
     public void incrementNumIterations() {
-        nRecords++;
+        incrementNumIterations(1);
     }
 
     public long getNumReadsSeen() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 6be66b204..33400bd9e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -313,6 +313,10 @@ public class GATKArgumentCollection {
     @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc="The total number of BAM file handles to keep open simultaneously", required=false)
     public Integer numberOfBAMFileHandles = null;
 
+    @Argument(fullName="nanoThreads", shortName = "nanoThreads", doc="NanoThreading", required = false)
+    @Hidden
+    public int nanoThreads = 1;
+
     @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
     public List<String> readGroupBlackList = null;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 417a0982f..073a46ee3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -146,7 +146,8 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
         if (walker instanceof ReadWalker) {
             traversalEngine = numThreads > 1 ? new TraverseReadsNano(numThreads) : new TraverseReads();
         } else if (walker instanceof LocusWalker) {
-            traversalEngine = new TraverseLoci();
+            // TODO -- refactor to use better interface
+            traversalEngine = engine.getArguments().nanoThreads > 1 ? new TraverseLociNano(engine.getArguments().nanoThreads) : new TraverseLociLinear();
         } else if (walker instanceof DuplicateWalker) {
             traversalEngine = new TraverseDuplicates();
         } else if (walker instanceof ReadPairWalker) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
similarity index 57%
rename from public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
rename to public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
index a5a6919a2..19d95381e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
@@ -3,9 +3,7 @@ package org.broadinstitute.sting.gatk.traversals;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.WalkerManager;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.datasources.providers.*;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
@@ -15,28 +13,42 @@ import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
 /**
  * A simple solution to iterating over all reference positions over a series of genomic locations.
  */
-public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
+public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,LocusShardDataProvider> {
     /**
      * our log, which we want to capture anything from this class
      */
     protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
 
     @Override
-    protected String getTraversalType() {
+    protected final String getTraversalType() {
         return "sites";
     }
 
+    protected static class TraverseResults<T> {
+        final int numIterations;
+        final T reduceResult;
+
+        public TraverseResults(int numIterations, T reduceResult) {
+            this.numIterations = numIterations;
+            this.reduceResult = reduceResult;
+        }
+    }
+
+    protected abstract TraverseResults<T> traverse( final LocusWalker<M,T> walker,
+                                                    final LocusView locusView,
+                                                    final LocusReferenceView referenceView,
+                                                    final ReferenceOrderedView referenceOrderedDataView,
+                                                    final T sum);
+
     @Override
     public T traverse( LocusWalker<M,T> walker,
                        LocusShardDataProvider dataProvider,
                        T sum) {
-        logger.debug(String.format("TraverseLoci.traverse: Shard is %s", dataProvider));
+        logger.debug(String.format("TraverseLociBase.traverse: Shard is %s", dataProvider));
 
-        LocusView locusView = getLocusView( walker, dataProvider );
-        boolean done = false;
+        final LocusView locusView = getLocusView( walker, dataProvider );
 
         if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
-
             //ReferenceOrderedView referenceOrderedDataView = new ReferenceOrderedView( dataProvider );
             ReferenceOrderedView referenceOrderedDataView = null;
             if ( WalkerManager.getWalkerDataSource(walker) != DataSource.REFERENCE_ORDERED_DATA )
@@ -44,43 +56,23 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
             else
                 referenceOrderedDataView = (RodLocusView)locusView;
 
-            LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
+            final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
 
-            // We keep processing while the next reference location is within the interval
-            while( locusView.hasNext() && ! done ) {
-                AlignmentContext locus = locusView.next();
-                GenomeLoc location = locus.getLocation();
-
-                dataProvider.getShard().getReadMetrics().incrementNumIterations();
-
-                // create reference context. Note that if we have a pileup of "extended events", the context will
-                // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
-                ReferenceContext refContext = referenceView.getReferenceContext(location);
-
-                // Iterate forward to get all reference ordered data covering this location
-                final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
-
-                final boolean keepMeP = walker.filter(tracker, refContext, locus);
-                if (keepMeP) {
-                    M x = walker.map(tracker, refContext, locus);
-                    sum = walker.reduce(x, sum);
-                    done = walker.isDone();
-                }
-
-                printProgress(dataProvider.getShard(),locus.getLocation());
-            }
+            final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
+            sum = result.reduceResult;
+            dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
         }
 
         // We have a final map call to execute here to clean up the skipped based from the
         // last position in the ROD to that in the interval
         if ( WalkerManager.getWalkerDataSource(walker) == DataSource.REFERENCE_ORDERED_DATA && ! walker.isDone() ) {
             // only do this if the walker isn't done!
-            RodLocusView rodLocusView = (RodLocusView)locusView;
-            long nSkipped = rodLocusView.getLastSkippedBases();
+            final RodLocusView rodLocusView = (RodLocusView)locusView;
+            final long nSkipped = rodLocusView.getLastSkippedBases();
             if ( nSkipped > 0 ) {
-                GenomeLoc site = rodLocusView.getLocOneBeyondShard();
-                AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
-                M x = walker.map(null, null, ac);
+                final GenomeLoc site = rodLocusView.getLocOneBeyondShard();
+                final AlignmentContext ac = new AlignmentContext(site, new ReadBackedPileupImpl(site), nSkipped);
+                final M x = walker.map(null, null, ac);
                 sum = walker.reduce(x, sum);
             }
         }
@@ -90,14 +82,14 @@ public class TraverseLoci<M,T> extends TraversalEngine<M,T,LocusWalker<M,T>,Locu
 
     /**
      * Gets the best view of loci for this walker given the available data.  The view will function as a 'trigger track'
-     * of sorts, providing a consistent interface so that TraverseLoci doesn't need to be reimplemented for any new datatype
+     * of sorts, providing a consistent interface so that TraverseLociBase doesn't need to be reimplemented for any new datatype
      * that comes along.
      * @param walker walker to interrogate.
      * @param dataProvider Data which which to drive the locus view.
      * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
      */
     private LocusView getLocusView( Walker<M,T> walker, LocusShardDataProvider dataProvider ) {
-        DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
+        final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
         if( dataSource == DataSource.READS )
             return new CoveredLocusView(dataProvider);
         else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
new file mode 100755
index 000000000..1dec3b238
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
@@ -0,0 +1,48 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+/**
+ * A simple solution to iterating over all reference positions over a series of genomic locations.
+ */
+public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
+
+    @Override
+    protected TraverseResults<T> traverse(LocusWalker<M, T> walker, LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView, T sum) {
+        // We keep processing while the next reference location is within the interval
+        boolean done = false;
+        int numIterations = 0;
+
+        while( locusView.hasNext() && ! done ) {
+            numIterations++;
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();
+
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+            final boolean keepMeP = walker.filter(tracker, refContext, locus);
+            if (keepMeP) {
+                final M x = walker.map(tracker, refContext, locus);
+                sum = walker.reduce(x, sum);
+                done = walker.isDone();
+            }
+
+            // TODO -- refactor printProgress to separate updating read metrics from printing progress
+            //printProgress(dataProvider.getShard(),locus.getLocation());
+        }
+
+        return new TraverseResults<T>(numIterations, sum);
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
new file mode 100755
index 000000000..4e6eb1915
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
@@ -0,0 +1,200 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusReferenceView;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusView;
+import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
+import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
+
+import java.util.Iterator;
+
+/**
+ * A simple solution to iterating over all reference positions over a series of genomic locations.
+ */
+public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
+    /** our log, which we want to capture anything from this class */
+    private static final boolean DEBUG = false;
+    private static final int BUFFER_SIZE = 1000;
+
+    final NanoScheduler<MapData, MapResult, T> nanoScheduler;
+
+    public TraverseLociNano(int nThreads) {
+        nanoScheduler = new NanoScheduler<MapData, MapResult, T>(BUFFER_SIZE, nThreads);
+    }
+
+    @Override
+    protected TraverseResults<T> traverse(final LocusWalker<M, T> walker,
+                                          final LocusView locusView,
+                                          final LocusReferenceView referenceView,
+                                          final ReferenceOrderedView referenceOrderedDataView,
+                                          final T sum) {
+        nanoScheduler.setDebug(DEBUG);
+        final TraverseLociMap myMap = new TraverseLociMap(walker);
+        final TraverseLociReduce myReduce = new TraverseLociReduce(walker);
+
+        final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView);
+        final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce);
+
+        // todo -- how do I print progress?
+//        final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
+//        final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
+//        printProgress(dataProvider.getShard(), locus, aggregatedInputs.size());
+
+        return new TraverseResults<T>(inputIterator.numIterations, result);
+    }
+
+    /**
+     * Create iterator that provides inputs for all map calls into MapData, to be provided
+     * to NanoScheduler for Map/Reduce
+     */
+    private class MapDataIterator implements Iterator<MapData> {
+        final LocusView locusView;
+        final LocusReferenceView referenceView;
+        final ReferenceOrderedView referenceOrderedDataView;
+        int numIterations = 0;
+
+        private MapDataIterator(LocusView locusView, LocusReferenceView referenceView, ReferenceOrderedView referenceOrderedDataView) {
+            this.locusView = locusView;
+            this.referenceView = referenceView;
+            this.referenceOrderedDataView = referenceOrderedDataView;
+        }
+
+        @Override
+        public boolean hasNext() {
+            return locusView.hasNext();
+        }
+
+        @Override
+        public MapData next() {
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();
+
+            //logger.info("Pulling data from MapDataIterator at " + location);
+
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(location, refContext);
+
+            numIterations++;
+            return new MapData(locus, refContext,  tracker);
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("Cannot remove elements from MapDataIterator");
+        }
+    }
+
+    @Override
+    public void printOnTraversalDone() {
+        nanoScheduler.shutdown();
+        super.printOnTraversalDone();
+    }
+
+    /**
+     * The input data needed for each map call.  The read, the reference, and the RODs
+     */
+    private class MapData {
+        final AlignmentContext alignmentContext;
+        final ReferenceContext refContext;
+        final RefMetaDataTracker tracker;
+
+        private MapData(final AlignmentContext alignmentContext, ReferenceContext refContext, RefMetaDataTracker tracker) {
+            this.alignmentContext = alignmentContext;
+            this.refContext = refContext;
+            this.tracker = tracker;
+        }
+
+        @Override
+        public String toString() {
+            return "MapData " + alignmentContext.getLocation();
+        }
+    }
+
+    /**
+     * Contains the results of a map call, indicating whether the call was good, filtered, or done
+     */
+    private class MapResult {
+        final M value;
+        final boolean reduceMe;
+
+        /**
+         * Create a MapResult with value that should be reduced
+         *
+         * @param value the value to reduce
+         */
+        private MapResult(final M value) {
+            this.value = value;
+            this.reduceMe = true;
+        }
+
+        /**
+         * Create a MapResult that shouldn't be reduced
+         */
+        private MapResult() {
+            this.value = null;
+            this.reduceMe = false;
+        }
+    }
+
+    /**
+     * A static object that tells reduce that the result of map should be skipped (filtered or done)
+     */
+    private final MapResult SKIP_REDUCE = new MapResult();
+
+    /**
+     * MapFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Applies walker.map to MapData, returning a MapResult object containing the result
+     */
+    private class TraverseLociMap implements MapFunction<MapData, MapResult> {
+        final LocusWalker<M,T> walker;
+
+        private TraverseLociMap(LocusWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public MapResult apply(final MapData data) {
+            if ( ! walker.isDone() ) {
+                final boolean keepMeP = walker.filter(data.tracker, data.refContext, data.alignmentContext);
+                if (keepMeP) {
+                    final M x = walker.map(data.tracker, data.refContext, data.alignmentContext);
+                    return new MapResult(x);
+                }
+            }
+            return SKIP_REDUCE;
+        }
+    }
+
+    /**
+     * ReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     *
+     * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
+     */
+    private class TraverseLociReduce implements ReduceFunction<MapResult, T> {
+        final LocusWalker<M,T> walker;
+
+        private TraverseLociReduce(LocusWalker<M, T> walker) {
+            this.walker = walker;
+        }
+
+        @Override
+        public T apply(MapResult one, T sum) {
+            if ( one.reduceMe )
+                // only run reduce on values that aren't DONE or FAILED
+                return walker.reduce(one.value, sum);
+            else
+                return sum;
+        }
+    }
+}

From 8cdeb51b78696340d9303d44342095bb82a40671 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 14:50:06 -0400
Subject: [PATCH 14/26] Cleanup printProgress in TraversalEngine

-- Separate updating cumulative traversal metrics from printing progress.  There's now an updateCumulativeMetrics function and a printProgress() that only takes a current position
-- printProgress now soles relies on the time since the last progress to decide if it will print or not.  No longer uses the number of cycles, since this isn't reliable in the case of nano scheduling
-- GenomeAnalysisEngine now maintains a pointer to the master cumulative metrics.  getCumulativeMetrics never returns null, which was handled in some parts of the code but not others.
-- Update all of the traversals to use the new updateCumulativeMetrics, printProgress model
-- Added progress callback to nano scheduler.  Every bufferSize elements this callback is invoked, allowing us to smoothly update the progress meter in the NanoScheduler
-- Rename MapFunction to NanoSchedulerMap and the same for reduce.
---
 .../sting/gatk/GenomeAnalysisEngine.java      |   7 +-
 .../gatk/traversals/TraversalEngine.java      | 152 ++++++++----------
 .../traversals/TraverseActiveRegions.java     |   3 +-
 .../gatk/traversals/TraverseDuplicates.java   |   3 +-
 .../gatk/traversals/TraverseLociBase.java     |   1 +
 .../gatk/traversals/TraverseLociLinear.java   |   3 +-
 .../gatk/traversals/TraverseLociNano.java     |  25 +--
 .../gatk/traversals/TraverseReadPairs.java    |   3 +-
 .../sting/gatk/traversals/TraverseReads.java  |   7 +-
 .../gatk/traversals/TraverseReadsNano.java    |  14 +-
 .../utils/nanoScheduler/NanoScheduler.java    |  35 ++--
 ...ion.java => NanoSchedulerMapFunction.java} |   2 +-
 .../NanoSchedulerProgressFunction.java        |  12 ++
 ....java => NanoSchedulerReduceFunction.java} |   2 +-
 .../nanoScheduler/NanoSchedulerUnitTest.java  |   4 +-
 15 files changed, 153 insertions(+), 120 deletions(-)
 rename public/java/src/org/broadinstitute/sting/utils/nanoScheduler/{MapFunction.java => NanoSchedulerMapFunction.java} (84%)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerProgressFunction.java
 rename public/java/src/org/broadinstitute/sting/utils/nanoScheduler/{ReduceFunction.java => NanoSchedulerReduceFunction.java} (87%)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index b9b5e452d..1b4333ce2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -143,6 +143,8 @@ public class GenomeAnalysisEngine {
      */
     private ThreadAllocation threadAllocation;
 
+    private ReadMetrics cumulativeMetrics = null;
+
     /**
      * A currently hacky unique name for this GATK instance
      */
@@ -1035,7 +1037,10 @@ public class GenomeAnalysisEngine {
      *         owned by the caller; the caller can do with the object what they wish.
      */
     public ReadMetrics getCumulativeMetrics() {
-        return readsDataSource == null ? null : readsDataSource.getCumulativeReadMetrics();
+        // todo -- probably shouldn't be lazy
+        if ( cumulativeMetrics == null )
+            cumulativeMetrics = readsDataSource == null ? new ReadMetrics() : readsDataSource.getCumulativeReadMetrics();
+        return cumulativeMetrics;
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
index 198f9342e..4422d49ae 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@@ -44,24 +44,12 @@ import java.util.List;
 import java.util.Map;
 
 public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,ProviderType extends ShardDataProvider> {
+    /** our log, which we want to capture anything from this class */
+    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
+
     // Time in milliseconds since we initialized this engine
     private static final int HISTORY_WINDOW_SIZE = 50;
 
-    private static class ProcessingHistory {
-        double elapsedSeconds;
-        long unitsProcessed;
-        long bpProcessed;
-        GenomeLoc loc;
-
-        public ProcessingHistory(double elapsedSeconds, GenomeLoc loc, long unitsProcessed, long bpProcessed) {
-            this.elapsedSeconds = elapsedSeconds;
-            this.loc = loc;
-            this.unitsProcessed = unitsProcessed;
-            this.bpProcessed = bpProcessed;
-        }
-
-    }
-
     /** lock object to sure updates to history are consistent across threads */
     private static final Object lock = new Object();
     LinkedList<ProcessingHistory> history = new LinkedList<ProcessingHistory>();
@@ -70,13 +58,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     private SimpleTimer timer = null;
 
     // How long can we go without printing some progress info?
-    private static final int PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES = 1000;
-    private int printProgressCheckCounter = 0;
     private long lastProgressPrintTime = -1;                       // When was the last time we printed progress log?
-    private long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
-    private long PROGRESS_PRINT_FREQUENCY = 10 * 1000;             // in milliseconds
-    private final double TWO_HOURS_IN_SECONDS = 2.0 * 60.0 * 60.0;
-    private final double TWELVE_HOURS_IN_SECONDS = 12.0 * 60.0 * 60.0;
+
+    private final static long MIN_ELAPSED_TIME_BEFORE_FIRST_PROGRESS = 30 * 1000; // in milliseconds
+    private final static double TWO_HOURS_IN_SECONDS                 = 2.0 * 60.0 * 60.0;
+    private final static double TWELVE_HOURS_IN_SECONDS              = 12.0 * 60.0 * 60.0;
+    private long progressPrintFrequency                              = 10 * 1000; // in milliseconds
     private boolean progressMeterInitialized = false;
 
     // for performance log
@@ -85,15 +72,12 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     private File performanceLogFile;
     private PrintStream performanceLog = null;
     private long lastPerformanceLogPrintTime = -1;                   // When was the last time we printed to the performance log?
-    private final long PERFORMANCE_LOG_PRINT_FREQUENCY = PROGRESS_PRINT_FREQUENCY;  // in milliseconds
+    private final long PERFORMANCE_LOG_PRINT_FREQUENCY = progressPrintFrequency;  // in milliseconds
 
     /** Size, in bp, of the area we are processing.  Updated once in the system in initial for performance reasons */
     long targetSize = -1;
     GenomeLocSortedSet targetIntervals = null;
 
-    /** our log, which we want to capture anything from this class */
-    protected static final Logger logger = Logger.getLogger(TraversalEngine.class);
-
     protected GenomeAnalysisEngine engine;
 
     // ----------------------------------------------------------------------------------------------------
@@ -187,28 +171,34 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
     }
 
     /**
-     * Forward request to printProgress
+     * Update the cumulative traversal metrics according to the data in this shard
      *
-     * Assumes that one cycle has been completed
-     *
-     * @param shard the given shard currently being processed.
-     * @param loc  the location
+     * @param shard a non-null shard
      */
-    public void printProgress(Shard shard, GenomeLoc loc) {
-        // A bypass is inserted here for unit testing.
-        printProgress(loc,shard.getReadMetrics(),false, 1);
+    public void updateCumulativeMetrics(final Shard shard) {
+        updateCumulativeMetrics(shard.getReadMetrics());
+    }
+
+    /**
+     * Update the cumulative traversal metrics according to the data in this shard
+     *
+     * @param singleTraverseMetrics read metrics object containing the information about a single shard's worth
+     *                              of data processing
+     */
+    public void updateCumulativeMetrics(final ReadMetrics singleTraverseMetrics) {
+        engine.getCumulativeMetrics().incrementMetrics(singleTraverseMetrics);
     }
 
     /**
      * Forward request to printProgress
      *
-     * @param shard the given shard currently being processed.
+     * Assumes that one cycle has been completed
+     *
      * @param loc  the location
-     * @param nElapsedCycles the number of cycles (turns of map) that have occurred since the last call
      */
-    public void printProgress(Shard shard, GenomeLoc loc, int nElapsedCycles) {
+    public void printProgress(final GenomeLoc loc) {
         // A bypass is inserted here for unit testing.
-        printProgress(loc,shard.getReadMetrics(),false, nElapsedCycles);
+        printProgress(loc, false);
     }
 
     /**
@@ -216,18 +206,9 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * every M seconds, for N and M set in global variables.
      *
      * @param loc       Current location, can be null if you are at the end of the traversal
-     * @param metrics   Data processed since the last cumulative
      * @param mustPrint If true, will print out info, regardless of nRecords or time interval
      */
-    private synchronized void printProgress(GenomeLoc loc, ReadMetrics metrics, boolean mustPrint, int nElapsedCycles) {
-        final int previousPrintCycle = printProgressCheckCounter / PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES;
-        final int newPrintCycle = (printProgressCheckCounter+nElapsedCycles) / PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES;
-
-        printProgressCheckCounter += nElapsedCycles; // keep track of our number of cycles through printProgress
-        if ( newPrintCycle == previousPrintCycle && ! mustPrint )
-            // don't do any work more often than PRINT_PROGRESS_CHECK_FREQUENCY_IN_CYCLES
-            return;
-
+    private synchronized void printProgress(final GenomeLoc loc, boolean mustPrint) {
         if( ! progressMeterInitialized ) {
             logger.info("[INITIALIZATION COMPLETE; TRAVERSAL STARTING]");
             logger.info(String.format("%15s processed.%s  runtime per.1M.%s completed total.runtime remaining",
@@ -236,37 +217,30 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
         }
 
         final long curTime = timer.currentTime();
-        boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, PROGRESS_PRINT_FREQUENCY);
+        boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, progressPrintFrequency);
         boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
 
         if ( printProgress || printLog ) {
-            // getting and appending metrics data actually turns out to be quite a heavyweight
-            // operation.  Postpone it until after determining whether to print the log message.
-            ReadMetrics cumulativeMetrics = engine.getCumulativeMetrics() != null ? engine.getCumulativeMetrics() : new ReadMetrics();
-            if(metrics != null)
-                cumulativeMetrics.incrementMetrics(metrics);
-
-            final long nRecords = cumulativeMetrics.getNumIterations();
-
-            ProcessingHistory last = updateHistory(loc,cumulativeMetrics);
+            final ProcessingHistory last = updateHistory(loc, engine.getCumulativeMetrics());
 
             final AutoFormattingTime elapsed = new AutoFormattingTime(last.elapsedSeconds);
-            final AutoFormattingTime bpRate = new AutoFormattingTime(secondsPerMillionBP(last));
-            final AutoFormattingTime unitRate = new AutoFormattingTime(secondsPerMillionElements(last));
-            final double fractionGenomeTargetCompleted = calculateFractionGenomeTargetCompleted(last);
+            final AutoFormattingTime bpRate = new AutoFormattingTime(last.secondsPerMillionBP());
+            final AutoFormattingTime unitRate = new AutoFormattingTime(last.secondsPerMillionElements());
+            final double fractionGenomeTargetCompleted = last.calculateFractionGenomeTargetCompleted(targetSize);
             final AutoFormattingTime estTotalRuntime = new AutoFormattingTime(elapsed.getTimeInSeconds() / fractionGenomeTargetCompleted);
             final AutoFormattingTime timeToCompletion = new AutoFormattingTime(estTotalRuntime.getTimeInSeconds() - elapsed.getTimeInSeconds());
+            final long nRecords = engine.getCumulativeMetrics().getNumIterations();
 
             if ( printProgress ) {
                 lastProgressPrintTime = curTime;
 
                 // dynamically change the update rate so that short running jobs receive frequent updates while longer jobs receive fewer updates
                 if ( estTotalRuntime.getTimeInSeconds() > TWELVE_HOURS_IN_SECONDS )
-                    PROGRESS_PRINT_FREQUENCY = 60 * 1000; // in milliseconds
+                    progressPrintFrequency = 60 * 1000; // in milliseconds
                 else if ( estTotalRuntime.getTimeInSeconds() > TWO_HOURS_IN_SECONDS )
-                    PROGRESS_PRINT_FREQUENCY = 30 * 1000; // in milliseconds
+                    progressPrintFrequency = 30 * 1000; // in milliseconds
                 else
-                    PROGRESS_PRINT_FREQUENCY = 10 * 1000; // in milliseconds
+                    progressPrintFrequency = 10 * 1000; // in milliseconds
 
                 final String posName = loc == null ? (mustPrint ? "done" : "unmapped reads") : Integer.toString(loc.getStart());
                 logger.info(String.format("%15s        %5.2e %s     %s    %5.1f%%      %s  %s",
@@ -296,7 +270,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * @param metrics information about what's been processed already
      * @return
      */
-    private final ProcessingHistory updateHistory(GenomeLoc loc, ReadMetrics metrics) {
+    private ProcessingHistory updateHistory(GenomeLoc loc, ReadMetrics metrics) {
         synchronized (lock) {
             if ( history.size() > HISTORY_WINDOW_SIZE )
                 history.pop();
@@ -309,26 +283,11 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
         }
     }
 
-    /** How long in seconds to process 1M traversal units? */
-    private final double secondsPerMillionElements(ProcessingHistory last) {
-        return (last.elapsedSeconds * 1000000.0) / Math.max(last.unitsProcessed, 1);
-    }
-
-    /** How long in seconds to process 1M bp on the genome? */
-    private final double secondsPerMillionBP(ProcessingHistory last) {
-        return (last.elapsedSeconds * 1000000.0) / Math.max(last.bpProcessed, 1);
-    }
-
-    /** What fractoin of the target intervals have we covered? */
-    private final double calculateFractionGenomeTargetCompleted(ProcessingHistory last) {
-        return (1.0*last.bpProcessed) / targetSize;
-    }
-
     /**
      * Called after a traversal to print out information about the traversal process
      */
     public void printOnTraversalDone() {
-        printProgress(null, null, true, 1);
+        printProgress(null, true);
 
         final double elapsed = timer == null ? 0 : timer.getElapsedTime();
 
@@ -389,7 +348,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * @return Frequency, in seconds, of performance log writes.
      */
     public long getPerformanceProgressPrintFrequencySeconds() {
-        return PROGRESS_PRINT_FREQUENCY;
+        return progressPrintFrequency;
     }
 
     /**
@@ -397,6 +356,35 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
      * @param seconds number of seconds between messages indicating performance frequency.
      */
     public void setPerformanceProgressPrintFrequencySeconds(long seconds) {
-        PROGRESS_PRINT_FREQUENCY = seconds;
+        progressPrintFrequency = seconds;
+    }
+
+    private static class ProcessingHistory {
+        double elapsedSeconds;
+        long unitsProcessed;
+        long bpProcessed;
+        GenomeLoc loc;
+
+        public ProcessingHistory(double elapsedSeconds, GenomeLoc loc, long unitsProcessed, long bpProcessed) {
+            this.elapsedSeconds = elapsedSeconds;
+            this.loc = loc;
+            this.unitsProcessed = unitsProcessed;
+            this.bpProcessed = bpProcessed;
+        }
+
+        /** How long in seconds to process 1M traversal units? */
+        private double secondsPerMillionElements() {
+            return (elapsedSeconds * 1000000.0) / Math.max(unitsProcessed, 1);
+        }
+
+        /** How long in seconds to process 1M bp on the genome? */
+        private double secondsPerMillionBP() {
+            return (elapsedSeconds * 1000000.0) / Math.max(bpProcessed, 1);
+        }
+
+        /** What fractoin of the target intervals have we covered? */
+        private double calculateFractionGenomeTargetCompleted(final long targetSize) {
+            return (1.0*bpProcessed) / targetSize;
+        }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index ecaa15fe9..bbd9346b3 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -104,7 +104,8 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
 
                 prevLoc = location;
 
-                printProgress(dataProvider.getShard(), locus.getLocation());
+                updateCumulativeMetrics(dataProvider.getShard());
+                printProgress(locus.getLocation());
             }
 
             // Take the individual isActive calls and integrate them into contiguous active regions and
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
index 3f349d86d..2b45d894c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseDuplicates.java
@@ -196,7 +196,8 @@ public class TraverseDuplicates<M,T> extends TraversalEngine<M,T,DuplicateWalker
                 sum = walker.reduce(x, sum);
             }
 
-            printProgress(dataProvider.getShard(),site);
+            updateCumulativeMetrics(dataProvider.getShard());
+            printProgress(site);
             done = walker.isDone();
         }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
index 19d95381e..efa2eca02 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociBase.java
@@ -61,6 +61,7 @@ public abstract class TraverseLociBase<M,T> extends TraversalEngine<M,T,LocusWal
             final TraverseResults<T> result = traverse( walker, locusView, referenceView, referenceOrderedDataView, sum );
             sum = result.reduceResult;
             dataProvider.getShard().getReadMetrics().incrementNumIterations(result.numIterations);
+            updateCumulativeMetrics(dataProvider.getShard());
         }
 
         // We have a final map call to execute here to clean up the skipped based from the
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
index 1dec3b238..22381092f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociLinear.java
@@ -39,8 +39,7 @@ public class TraverseLociLinear<M,T> extends TraverseLociBase<M,T> {
                 done = walker.isDone();
             }
 
-            // TODO -- refactor printProgress to separate updating read metrics from printing progress
-            //printProgress(dataProvider.getShard(),locus.getLocation());
+            printProgress(locus.getLocation());
         }
 
         return new TraverseResults<T>(numIterations, sum);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
index 4e6eb1915..73b73c002 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseLociNano.java
@@ -8,9 +8,10 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReferenceOrderedView;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
 import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
-import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoSchedulerMapFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoSchedulerProgressFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoSchedulerReduceFunction;
 
 import java.util.Iterator;
 
@@ -26,6 +27,7 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
 
     public TraverseLociNano(int nThreads) {
         nanoScheduler = new NanoScheduler<MapData, MapResult, T>(BUFFER_SIZE, nThreads);
+        nanoScheduler.setProgressFunction(new TraverseLociProgress());
     }
 
     @Override
@@ -41,11 +43,6 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
         final MapDataIterator inputIterator = new MapDataIterator(locusView, referenceView, referenceOrderedDataView);
         final T result = nanoScheduler.execute(inputIterator, myMap, sum, myReduce);
 
-        // todo -- how do I print progress?
-//        final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
-//        final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
-//        printProgress(dataProvider.getShard(), locus, aggregatedInputs.size());
-
         return new TraverseResults<T>(inputIterator.numIterations, result);
     }
 
@@ -156,7 +153,7 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
      *
      * Applies walker.map to MapData, returning a MapResult object containing the result
      */
-    private class TraverseLociMap implements MapFunction<MapData, MapResult> {
+    private class TraverseLociMap implements NanoSchedulerMapFunction<MapData, MapResult> {
         final LocusWalker<M,T> walker;
 
         private TraverseLociMap(LocusWalker<M, T> walker) {
@@ -177,11 +174,11 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
     }
 
     /**
-     * ReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     * NanoSchedulerReduceFunction for TraverseReads meeting NanoScheduler interface requirements
      *
      * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
      */
-    private class TraverseLociReduce implements ReduceFunction<MapResult, T> {
+    private class TraverseLociReduce implements NanoSchedulerReduceFunction<MapResult, T> {
         final LocusWalker<M,T> walker;
 
         private TraverseLociReduce(LocusWalker<M, T> walker) {
@@ -197,4 +194,12 @@ public class TraverseLociNano<M,T> extends TraverseLociBase<M,T> {
                 return sum;
         }
     }
+
+    private class TraverseLociProgress implements NanoSchedulerProgressFunction<MapData> {
+        @Override
+        public void progress(MapData lastProcessedMap) {
+            if (lastProcessedMap.alignmentContext != null)
+                printProgress(lastProcessedMap.alignmentContext.getLocation());
+        }
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
index ebaac40af..9b076fce4 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadPairs.java
@@ -65,7 +65,8 @@ public class TraverseReadPairs<M,T> extends TraversalEngine<M,T, ReadPairWalker<
                 pairs.clear();
                 pairs.add(read);
 
-                printProgress(dataProvider.getShard(),null);
+                updateCumulativeMetrics(dataProvider.getShard());
+                printProgress(null);
             }
 
             done = walker.isDone();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
index 3b712c973..604d9bf4c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReads.java
@@ -99,8 +99,11 @@ public class TraverseReads<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,Read
                 sum = walker.reduce(x, sum);
             }
 
-            GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
-            printProgress(dataProvider.getShard(),locus);
+            final GenomeLoc locus = read.getReferenceIndex() == SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX ? null : engine.getGenomeLocParser().createGenomeLoc(read.getReferenceName(),read.getAlignmentStart());
+
+            updateCumulativeMetrics(dataProvider.getShard());
+            printProgress(locus);
+
             done = walker.isDone();
         }
         return sum;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
index 4bb700c37..5679747e1 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseReadsNano.java
@@ -35,9 +35,9 @@ import org.broadinstitute.sting.gatk.datasources.reads.ReadShard;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.nanoScheduler.MapFunction;
 import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
-import org.broadinstitute.sting.utils.nanoScheduler.ReduceFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoSchedulerMapFunction;
+import org.broadinstitute.sting.utils.nanoScheduler.NanoSchedulerReduceFunction;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.LinkedList;
@@ -94,7 +94,9 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
 
         final GATKSAMRecord lastRead = aggregatedInputs.get(aggregatedInputs.size() - 1).read;
         final GenomeLoc locus = engine.getGenomeLocParser().createGenomeLoc(lastRead);
-        printProgress(dataProvider.getShard(), locus, aggregatedInputs.size());
+
+        updateCumulativeMetrics(dataProvider.getShard());
+        printProgress(locus);
 
         return result;
     }
@@ -189,7 +191,7 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
      *
      * Applies walker.map to MapData, returning a MapResult object containing the result
      */
-    private class TraverseReadsMap implements MapFunction<MapData, MapResult> {
+    private class TraverseReadsMap implements NanoSchedulerMapFunction<MapData, MapResult> {
         final ReadWalker<M,T> walker;
 
         private TraverseReadsMap(ReadWalker<M, T> walker) {
@@ -209,11 +211,11 @@ public class TraverseReadsNano<M,T> extends TraversalEngine<M,T,ReadWalker<M,T>,
     }
 
     /**
-     * ReduceFunction for TraverseReads meeting NanoScheduler interface requirements
+     * NanoSchedulerReduceFunction for TraverseReads meeting NanoScheduler interface requirements
      *
      * Takes a MapResult object and applies the walkers reduce function to each map result, when applicable
      */
-    private class TraverseReadsReduce implements ReduceFunction<MapResult, T> {
+    private class TraverseReadsReduce implements NanoSchedulerReduceFunction<MapResult, T> {
         final ReadWalker<M,T> walker;
 
         private TraverseReadsReduce(ReadWalker<M, T> walker) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index f0e77354f..f0c2a6723 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -54,6 +54,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     boolean shutdown = false;
     boolean debug = false;
 
+    private NanoSchedulerProgressFunction<InputType> progressFunction = null;
+
     final SimpleTimer outsideSchedulerTimer = new SimpleTimer("outside");
     final SimpleTimer inputTimer = new SimpleTimer("input");
     final SimpleTimer mapTimer = new SimpleTimer("map");
@@ -148,6 +150,17 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         this.debug = debug;
     }
 
+    /**
+     * Set the progress callback function to progressFunction
+     *
+     * The progress callback is invoked after each buffer size elements have been processed by map/reduce
+     *
+     * @param progressFunction a progress function to call, or null if you don't want any progress callback
+     */
+    public void setProgressFunction(final NanoSchedulerProgressFunction<InputType> progressFunction) {
+        this.progressFunction = progressFunction;
+    }
+
     /**
      * Execute a map/reduce job with this nanoScheduler
      *
@@ -168,9 +181,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * @return the last reduce value
      */
     public ReduceType execute(final Iterator<InputType> inputReader,
-                              final MapFunction<InputType, MapType> map,
+                              final NanoSchedulerMapFunction<InputType, MapType> map,
                               final ReduceType initialValue,
-                              final ReduceFunction<MapType, ReduceType> reduce) {
+                              final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         if ( isShutdown() ) throw new IllegalStateException("execute called on already shutdown NanoScheduler");
         if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
         if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
@@ -193,9 +206,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * @return the reduce result of this map/reduce job
      */
     private ReduceType executeSingleThreaded(final Iterator<InputType> inputReader,
-                                             final MapFunction<InputType, MapType> map,
+                                             final NanoSchedulerMapFunction<InputType, MapType> map,
                                              final ReduceType initialValue,
-                                             final ReduceFunction<MapType, ReduceType> reduce) {
+                                             final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         ReduceType sum = initialValue;
         while ( inputReader.hasNext() ) {
             final InputType input = inputReader.next();
@@ -211,9 +224,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * @return the reduce result of this map/reduce job
      */
     private ReduceType executeMultiThreaded(final Iterator<InputType> inputReader,
-                                            final MapFunction<InputType, MapType> map,
+                                            final NanoSchedulerMapFunction<InputType, MapType> map,
                                             final ReduceType initialValue,
-                                            final ReduceFunction<MapType, ReduceType> reduce) {
+                                            final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         debugPrint("Executing nanoScheduler");
         ReduceType sum = initialValue;
         while ( inputReader.hasNext() ) {
@@ -228,6 +241,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                 // send off the reduce job, and block until we get at least one reduce result
                 sum = reduceSerial(reduce, mapQueue, sum);
                 debugPrint("  Done with cycle of map/reduce");
+
+                if ( progressFunction != null ) progressFunction.progress(inputs.get(inputs.size()-1));
             } catch (InterruptedException ex) {
                 throw new ReviewedStingException("got execution exception", ex);
             } catch (ExecutionException ex) {
@@ -239,7 +254,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     }
 
     @Requires({"reduce != null", "! mapQueue.isEmpty()"})
-    private ReduceType reduceSerial(final ReduceFunction<MapType, ReduceType> reduce,
+    private ReduceType reduceSerial(final NanoSchedulerReduceFunction<MapType, ReduceType> reduce,
                                     final Queue<Future<MapType>> mapQueue,
                                     final ReduceType initSum)
             throws InterruptedException, ExecutionException {
@@ -280,7 +295,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     }
 
     @Requires({"map != null", "! inputs.isEmpty()"})
-    private Queue<Future<MapType>> submitMapJobs(final MapFunction<InputType, MapType> map,
+    private Queue<Future<MapType>> submitMapJobs(final NanoSchedulerMapFunction<InputType, MapType> map,
                                                  final ExecutorService executor,
                                                  final List<InputType> inputs) {
         final Queue<Future<MapType>> mapQueue = new LinkedList<Future<MapType>>();
@@ -299,10 +314,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      */
     private class CallableMap implements Callable<MapType> {
         final InputType input;
-        final MapFunction<InputType, MapType> map;
+        final NanoSchedulerMapFunction<InputType, MapType> map;
 
         @Requires({"map != null"})
-        private CallableMap(final MapFunction<InputType, MapType> map, final InputType inputs) {
+        private CallableMap(final NanoSchedulerMapFunction<InputType, MapType> map, final InputType inputs) {
             this.input = inputs;
             this.map = map;
         }
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerMapFunction.java
similarity index 84%
rename from public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapFunction.java
rename to public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerMapFunction.java
index 440c263b7..ddf4421d2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/MapFunction.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerMapFunction.java
@@ -9,7 +9,7 @@ package org.broadinstitute.sting.utils.nanoScheduler;
  * Date: 8/24/12
  * Time: 9:49 AM
  */
-public interface MapFunction<InputType, ResultType> {
+public interface NanoSchedulerMapFunction<InputType, ResultType> {
     /**
      * Return function on input, returning a value of ResultType
      * @param input
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerProgressFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerProgressFunction.java
new file mode 100644
index 000000000..8631196a3
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerProgressFunction.java
@@ -0,0 +1,12 @@
+package org.broadinstitute.sting.utils.nanoScheduler;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: depristo
+ * Date: 9/4/12
+ * Time: 2:10 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public interface NanoSchedulerProgressFunction<InputType> {
+    public void progress(final InputType lastMapInput);
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReduceFunction.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerReduceFunction.java
similarity index 87%
rename from public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReduceFunction.java
rename to public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerReduceFunction.java
index 8f1b0eddd..7e58eeaf9 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/ReduceFunction.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerReduceFunction.java
@@ -7,7 +7,7 @@ package org.broadinstitute.sting.utils.nanoScheduler;
  * Date: 8/24/12
  * Time: 9:49 AM
  */
-public interface ReduceFunction<MapType, ReduceType> {
+public interface NanoSchedulerReduceFunction<MapType, ReduceType> {
     /**
      * Combine one with sum into a new ReduceType
      * @param one the result of a map call on an input element
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index 1dcc243f2..0ec3035e2 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -21,11 +21,11 @@ import java.util.List;
 public class NanoSchedulerUnitTest extends BaseTest {
     public static final int NANO_SCHEDULE_MAX_RUNTIME = 60000;
 
-    private static class Map2x implements MapFunction<Integer, Integer> {
+    private static class Map2x implements NanoSchedulerMapFunction<Integer, Integer> {
         @Override public Integer apply(Integer input) { return input * 2; }
     }
 
-    private static class ReduceSum implements ReduceFunction<Integer, Integer> {
+    private static class ReduceSum implements NanoSchedulerReduceFunction<Integer, Integer> {
         int prevOne = Integer.MIN_VALUE;
 
         @Override public Integer apply(Integer one, Integer sum) {

From 03dd470ec152c1bf7682ce3afde2141b151acf13 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 14:58:12 -0400
Subject: [PATCH 15/26] Test for progressFunction in NanoScheduler; bugfix for
 single threaded fast path

---
 .../utils/nanoScheduler/NanoScheduler.java    |  3 +++
 .../nanoScheduler/NanoSchedulerUnitTest.java  | 22 ++++++++++++++++++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index f0c2a6723..61d4fdd01 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -210,9 +210,12 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                                              final ReduceType initialValue,
                                              final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         ReduceType sum = initialValue;
+        int i = 0;
         while ( inputReader.hasNext() ) {
             final InputType input = inputReader.next();
             final MapType mapValue = map.apply(input);
+            if ( i++ % bufferSize == 0 && progressFunction != null )
+                progressFunction.progress(input);
             sum = reduce.apply(mapValue, sum);
         }
         return sum;
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index 0ec3035e2..3bd006ffe 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -34,6 +34,16 @@ public class NanoSchedulerUnitTest extends BaseTest {
         }
     }
 
+    private static class ProgressCallback implements NanoSchedulerProgressFunction<Integer> {
+        int callBacks = 0;
+
+        @Override
+        public void progress(Integer lastMapInput) {
+            callBacks++;
+        }
+    }
+
+
     private static int sum2x(final int start, final int end) {
         int sum = 0;
         for ( int i = start; i < end; i++ )
@@ -62,6 +72,11 @@ public class NanoSchedulerUnitTest extends BaseTest {
             return ints.iterator();
         }
 
+        public int nExpectedCallbacks() {
+            int nElements = Math.max(end - start, 0);
+            return nElements / bufferSize;
+        }
+
         public Map2x makeMap() { return new Map2x(); }
         public Integer initReduce() { return 0; }
         public ReduceSum makeReduce() { return new ReduceSum(); }
@@ -73,7 +88,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
         for ( final int bufferSize : Arrays.asList(1, 10, 1000, 1000000) ) {
             for ( final int nt : Arrays.asList(1, 2, 4) ) {
                 for ( final int start : Arrays.asList(0) ) {
-                    for ( final int end : Arrays.asList(1, 2, 11, 10000, 100000) ) {
+                    for ( final int end : Arrays.asList(0, 1, 2, 11, 10000, 100000) ) {
                         exampleTest = new NanoSchedulerBasicTest(bufferSize, nt, start, end);
                     }
                 }
@@ -101,12 +116,17 @@ public class NanoSchedulerUnitTest extends BaseTest {
         final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
                 new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
 
+        final ProgressCallback callback = new ProgressCallback();
+        nanoScheduler.setProgressFunction(callback);
+
         Assert.assertEquals(nanoScheduler.getBufferSize(), test.bufferSize, "bufferSize argument");
         Assert.assertEquals(nanoScheduler.getnThreads(), test.nThreads, "nThreads argument");
 
         final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
         Assert.assertNotNull(sum);
         Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
+
+        Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected");
         nanoScheduler.shutdown();
     }
 

From a997c99806b49c1ca0efdd4cc9c834df465e7b22 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 17:54:43 -0400
Subject: [PATCH 16/26] Initial NanoScheduler with input producer thread

---
 .../utils/nanoScheduler/NanoScheduler.java    | 109 ++++++++++++++----
 .../nanoScheduler/NanoSchedulerUnitTest.java  |   3 +-
 2 files changed, 86 insertions(+), 26 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 61d4fdd01..4f9fedce3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -5,6 +5,7 @@ import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.AutoFormattingTime;
 import org.broadinstitute.sting.utils.SimpleTimer;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.Iterator;
@@ -74,7 +75,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
         this.bufferSize = bufferSize;
         this.nThreads = nThreads;
-        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads);
+        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads-1);
 
         // start timing the time spent outside of the nanoScheduler
         outsideSchedulerTimer.start();
@@ -232,20 +233,31 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                                             final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         debugPrint("Executing nanoScheduler");
         ReduceType sum = initialValue;
-        while ( inputReader.hasNext() ) {
+        boolean done = false;
+
+        final BlockingQueue<InputDatum> inputQueue = new LinkedBlockingDeque<InputDatum>(bufferSize);
+        final InputProducer inputProducer = new InputProducer(inputReader, inputQueue);
+        inputProducer.start();
+
+        while ( ! done ) {
             try {
-                // read in our input values
-                final List<InputType> inputs = readInputs(inputReader);
-                debugPrint("Enqueuing " + inputs.size() + " elements to map");
+                final Pair<List<InputType>, Boolean> readResults = readInputs(inputQueue);
+                final List<InputType> inputs = readResults.getFirst();
+                done = readResults.getSecond();
 
-                // send jobs for map
-                final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
+                if ( ! inputs.isEmpty() ) {
+                    // send jobs for map
+                    final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
 
-                // send off the reduce job, and block until we get at least one reduce result
-                sum = reduceSerial(reduce, mapQueue, sum);
-                debugPrint("  Done with cycle of map/reduce");
+                    // send off the reduce job, and block until we get at least one reduce result
+                    sum = reduceSerial(reduce, mapQueue, sum);
+                    debugPrint("  Done with cycle of map/reduce");
 
-                if ( progressFunction != null ) progressFunction.progress(inputs.get(inputs.size()-1));
+                    if ( progressFunction != null ) progressFunction.progress(inputs.get(inputs.size()-1));
+                } else {
+                    // we must be done
+                    if ( ! done ) throw new IllegalStateException("Inputs empty but not done");
+                }
             } catch (InterruptedException ex) {
                 throw new ReviewedStingException("got execution exception", ex);
             } catch (ExecutionException ex) {
@@ -267,9 +279,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         for ( final Future<MapType> future : mapQueue ) {
             final MapType value = future.get(); // block until we get the values for this task
 
-            if ( TIME_CALLS) reduceTimer.restart();
+            if ( TIME_CALLS ) reduceTimer.restart();
             sum = reduce.apply(value, sum);
-            if ( TIME_CALLS) reduceTimer.stop();
+            if ( TIME_CALLS ) reduceTimer.stop();
         }
 
         return sum;
@@ -280,21 +292,68 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      *
      * @return a queue of input read in, containing one or more values of InputType read in
      */
-    @Requires("inputReader.hasNext()")
-    @Ensures("!result.isEmpty()")
-    private List<InputType> readInputs(final Iterator<InputType> inputReader) {
+    @Requires("inputReader != null")
+    @Ensures("result != null")
+    private Pair<List<InputType>, Boolean> readInputs(final BlockingQueue<InputDatum> inputReader) throws InterruptedException {
         int n = 0;
         final List<InputType> inputs = new LinkedList<InputType>();
+        boolean done = false;
 
-        if ( TIME_CALLS) inputTimer.restart();
-        while ( inputReader.hasNext() && n < getBufferSize() ) {
-            final InputType input = inputReader.next();
-            inputs.add(input);
-            n++;
+        while ( ! done && n < getBufferSize() ) {
+            final InputDatum input = inputReader.take();
+            done = input.isLast();
+            if ( ! done ) {
+                inputs.add(input.datum);
+                n++;
+            }
         }
-        if ( TIME_CALLS) inputTimer.stop();
 
-        return inputs;
+        return new Pair<List<InputType>, Boolean>(inputs, done);
+    }
+
+    private class InputProducer extends Thread {
+        final Iterator<InputType> inputReader;
+        final BlockingQueue<InputDatum> outputQueue;
+
+        public InputProducer(final Iterator<InputType> inputReader, final BlockingQueue<InputDatum> outputQueue) {
+            this.inputReader = inputReader;
+            this.outputQueue = outputQueue;
+        }
+
+        public void run() {
+            try {
+                while ( inputReader.hasNext() ) {
+                    if ( TIME_CALLS ) inputTimer.restart();
+                    final InputType input = inputReader.next();
+                    if ( TIME_CALLS ) inputTimer.stop();
+                    outputQueue.put(new InputDatum(input));
+                }
+
+                // add the EOF object so we know we are done
+                outputQueue.put(new InputDatum());
+            } catch (InterruptedException ex) {
+                throw new ReviewedStingException("got execution exception", ex);
+            }
+        }
+    }
+
+    private class InputDatum {
+        final boolean isLast;
+        final InputType datum;
+
+        private InputDatum(final InputType datum) {
+            isLast = false;
+            this.datum = datum;
+        }
+
+        private InputDatum() {
+            isLast = true;
+            this.datum = null;
+        }
+
+        public boolean isLast() {
+            return isLast;
+        }
     }
 
     @Requires({"map != null", "! inputs.isEmpty()"})
@@ -326,10 +385,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         }
 
         @Override public MapType call() throws Exception {
-            if ( TIME_CALLS) mapTimer.restart();
+            if ( TIME_CALLS ) mapTimer.restart();
             if ( debug ) debugPrint("\t\tmap " + input);
             final MapType result = map.apply(input);
-            if ( TIME_CALLS) mapTimer.stop();
+            if ( TIME_CALLS ) mapTimer.stop();
             return result;
         }
     }
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index 3bd006ffe..ddfc3cecd 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -126,7 +126,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
         Assert.assertNotNull(sum);
         Assert.assertEquals((int)sum, test.expectedResult, "NanoScheduler sum not the same as calculated directly");
 
-        Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected");
+        Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected.  Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
         nanoScheduler.shutdown();
     }
 
@@ -168,6 +168,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
         final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(1000, Integer.valueOf(args[0]), 0, Integer.valueOf(args[1]));
         final NanoScheduler<Integer, Integer, Integer> nanoScheduler =
                 new NanoScheduler<Integer, Integer, Integer>(test.bufferSize, test.nThreads);
+        nanoScheduler.setDebug(true);
 
         final Integer sum = nanoScheduler.execute(test.makeReader(), test.makeMap(), test.initReduce(), test.makeReduce());
         System.out.printf("Sum = %d, expected =%d%n", sum, test.expectedResult);

From c822b7c760245064741acf3c2221a299c89d21cb Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 18:02:40 -0400
Subject: [PATCH 17/26] Fix long-standing NPE in LMS due to inappropriate
 timing of initialization

---
 .../sting/gatk/executive/LinearMicroScheduler.java             | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
index 5bcb16c94..740bcb566 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@@ -60,11 +60,12 @@ public class LinearMicroScheduler extends MicroScheduler {
 
         boolean done = walker.isDone();
         int counter = 0;
+
+        traversalEngine.startTimersIfNecessary();
         for (Shard shard : shardStrategy ) {
             if ( done || shard == null ) // we ran out of shards that aren't owned
                 break;
 
-            traversalEngine.startTimersIfNecessary();
             if(shard.getShardType() == Shard.ShardType.LOCUS) {
                 WindowMaker windowMaker = new WindowMaker(shard, engine.getGenomeLocParser(),
                         getReadIterator(shard), shard.getGenomeLocs(), SampleUtils.getSAMFileSamples(engine));

From 71d9ebcb0d8266152a142b5f9207eec022a7716f Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 18:03:05 -0400
Subject: [PATCH 18/26] Fix bug (introduced by me) that didn't include contig
 in progress meter

---
 .../broadinstitute/sting/gatk/traversals/TraversalEngine.java   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
index 4422d49ae..8c617e4dc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraversalEngine.java
@@ -242,7 +242,7 @@ public abstract class TraversalEngine<M,T,WalkerType extends Walker<M,T>,Provide
                 else
                     progressPrintFrequency = 10 * 1000; // in milliseconds
 
-                final String posName = loc == null ? (mustPrint ? "done" : "unmapped reads") : Integer.toString(loc.getStart());
+                final String posName = loc == null ? (mustPrint ? "done" : "unmapped reads") : String.format("%s:%d", loc.getContig(), loc.getStart());
                 logger.info(String.format("%15s        %5.2e %s     %s    %5.1f%%      %s  %s",
                         posName, nRecords*1.0, elapsed, unitRate,
                         100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));

From 1e55475adcce7a9ec5ab8322fe5ed46efe111d1c Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Sep 2012 18:07:08 -0400
Subject: [PATCH 19/26] NanoScheduler uses ExecutorService to run input reader
 thread

---
 .../utils/nanoScheduler/NanoScheduler.java    | 20 ++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 4f9fedce3..89e44ce93 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -51,7 +51,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
     final int bufferSize;
     final int nThreads;
-    final ExecutorService executor;
+    final ExecutorService inputExecutor;
+    final ExecutorService mapExecutor;
     boolean shutdown = false;
     boolean debug = false;
 
@@ -75,7 +76,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
         this.bufferSize = bufferSize;
         this.nThreads = nThreads;
-        this.executor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads-1);
+        this.mapExecutor = nThreads == 1 ? null : Executors.newFixedThreadPool(nThreads-1);
+        this.inputExecutor = Executors.newSingleThreadExecutor();
 
         // start timing the time spent outside of the nanoScheduler
         outsideSchedulerTimer.start();
@@ -107,10 +109,10 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     public void shutdown() {
         outsideSchedulerTimer.stop();
 
-        if ( executor != null ) {
-            final List<Runnable> remaining = executor.shutdownNow();
+        if ( mapExecutor != null ) {
+            final List<Runnable> remaining = mapExecutor.shutdownNow();
             if ( ! remaining.isEmpty() )
-                throw new IllegalStateException("Remaining tasks found in the executor, unexpected behavior!");
+                throw new IllegalStateException("Remaining tasks found in the mapExecutor, unexpected behavior!");
         }
         shutdown = true;
 
@@ -236,8 +238,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         boolean done = false;
 
         final BlockingQueue<InputDatum> inputQueue = new LinkedBlockingDeque<InputDatum>(bufferSize);
-        final InputProducer inputProducer = new InputProducer(inputReader, inputQueue);
-        inputProducer.start();
+
+        inputExecutor.submit(new InputProducer(inputReader, inputQueue));
 
         while ( ! done ) {
             try {
@@ -247,7 +249,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
                 if ( ! inputs.isEmpty() ) {
                     // send jobs for map
-                    final Queue<Future<MapType>> mapQueue = submitMapJobs(map, executor, inputs);
+                    final Queue<Future<MapType>> mapQueue = submitMapJobs(map, mapExecutor, inputs);
 
                     // send off the reduce job, and block until we get at least one reduce result
                     sum = reduceSerial(reduce, mapQueue, sum);
@@ -311,7 +313,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         return new Pair<List<InputType>, Boolean>(inputs, done);
     }
 
-    private class InputProducer extends Thread {
+    private class InputProducer implements Runnable {
         final Iterator<InputType> inputReader;
         final BlockingQueue<InputDatum> outputQueue;
 

From 9bf1d138d9ba921312b49b00d1627f4feff62c2d Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 15:41:52 -0400
Subject: [PATCH 20/26] New GATK argument interface for data and cpu threads

-- Closes GSA-515 Nanoscheduler GSA-542 Good interface to nanoScheduler
-- Old -nt means dataThreads
-- New -cnt (--num_cpu_threads_per_data_thread) gives you n cpu threads for each data thread in the system
-- Cleanup logic for handling data and cpu threading in HMS, LMS, and MS
-- GATKRunReport reports the total number of threads in use by the GATK, not just the nt value
-- Removed the io,cpu tags for nt.  Stupid system if you ask me.  Cleaned up the GenomeAnalysisEngine and ThreadAllocation handling to be totally straightforward now
---
 .../sting/gatk/GenomeAnalysisEngine.java      | 32 ++++----
 .../arguments/GATKArgumentCollection.java     | 44 ++++++-----
 .../executive/HierarchicalMicroScheduler.java | 17 ++--
 .../gatk/executive/LinearMicroScheduler.java  |  9 +--
 .../sting/gatk/executive/MicroScheduler.java  | 40 ++++++----
 .../io/stubs/VariantContextWriterStub.java    |  4 +-
 .../sting/gatk/phonehome/GATKRunReport.java   |  2 +-
 .../resourcemanagement/ThreadAllocation.java  | 78 +++++++++++--------
 8 files changed, 123 insertions(+), 103 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index 1b4333ce2..fa28b02cd 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -400,28 +400,22 @@ public class GenomeAnalysisEngine {
      * Parse out the thread allocation from the given command-line argument.
      */
     private void determineThreadAllocation() {
-        Tags tags = parsingEngine.getTags(argCollection.numberOfThreads);
+        if ( argCollection.numberOfDataThreads < 1 ) throw new UserException.BadArgumentValue("num_threads", "cannot be less than 1, but saw " + argCollection.numberOfDataThreads);
+        if ( argCollection.numberOfCPUThreadsPerDataThread < 1 ) throw new UserException.BadArgumentValue("num_cpu_threads", "cannot be less than 1, but saw " + argCollection.numberOfCPUThreadsPerDataThread);
+        if ( argCollection.numberOfIOThreads < 0 ) throw new UserException.BadArgumentValue("num_io_threads", "cannot be less than 0, but saw " + argCollection.numberOfIOThreads);
 
-        // TODO: Kill this complicated logic once Queue supports arbitrary tagged parameters.
-        Integer numCPUThreads = null;
-        if(tags.containsKey("cpu") && argCollection.numberOfCPUThreads != null)
-            throw new UserException("Number of CPU threads specified both directly on the command-line and as a tag to the nt argument.  Please specify only one or the other.");
-        else if(tags.containsKey("cpu"))
-            numCPUThreads = Integer.parseInt(tags.getValue("cpu"));
-        else if(argCollection.numberOfCPUThreads != null)
-            numCPUThreads = argCollection.numberOfCPUThreads;
-
-        Integer numIOThreads = null;
-        if(tags.containsKey("io") && argCollection.numberOfIOThreads != null)
-            throw new UserException("Number of IO threads specified both directly on the command-line and as a tag to the nt argument.  Please specify only one or the other.");
-        else if(tags.containsKey("io"))
-            numIOThreads = Integer.parseInt(tags.getValue("io"));
-        else if(argCollection.numberOfIOThreads != null)
-            numIOThreads = argCollection.numberOfIOThreads;
-
-        this.threadAllocation = new ThreadAllocation(argCollection.numberOfThreads, numCPUThreads, numIOThreads, ! argCollection.disableEfficiencyMonitor);
+        this.threadAllocation = new ThreadAllocation(argCollection.numberOfDataThreads,
+                argCollection.numberOfCPUThreadsPerDataThread,
+                argCollection.numberOfIOThreads,
+                ! argCollection.disableEfficiencyMonitor);
     }
 
+    public int getTotalNumberOfThreads() {
+        return this.threadAllocation == null ? 1 : threadAllocation.getTotalNumThreads();
+    }
+
+
+
     /**
      * Allow subclasses and others within this package direct access to the walker manager.
      * @return The walker manager used by this package.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index 33400bd9e..b9e44d87b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -287,9 +287,32 @@ public class GATKArgumentCollection {
     @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
     public ValidationExclusion.TYPE unsafe;
 
-    /** How many threads should be allocated to this analysis. */
-    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
-    public Integer numberOfThreads = 1;
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // Multi-threading arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    /**
+     * How many data threads should be allocated to this analysis?  Data threads contains N cpu threads per
+     * data thread, and act as completely data parallel processing, increasing the memory usage of GATK
+     * by M data threads.  Data threads generally scale extremely effectively, up to 24 cores
+     */
+    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many data threads should be allocated to running this analysis.", required = false)
+    public Integer numberOfDataThreads = 1;
+
+    /**
+     * How many CPU threads should be allocated per data thread?  Each CPU thread operates the map
+     * cycle independently, but may run into earlier scaling problems with IO than data threads.  Has
+     * the benefit of not requiring X times as much memory per thread as data threads do, but rather
+     * only a constant overhead.
+     */
+    @Argument(fullName="num_cpu_threads_per_data_thread", shortName = "cnt", doc="How many CPU threads should be allocated per data thread to running this analysis?", required = false)
+    public int numberOfCPUThreadsPerDataThread = 1;
+
+    @Argument(fullName="num_io_threads", shortName = "nit", doc="How many of the given threads should be allocated to IO", required = false)
+    @Hidden
+    public int numberOfIOThreads = 0;
 
     /**
      * By default the GATK monitors its own efficiency, but this can have a itsy-bitsy tiny
@@ -299,24 +322,9 @@ public class GATKArgumentCollection {
     @Argument(fullName = "disableThreadEfficiencyMonitor", shortName = "dtem", doc = "Disable GATK efficiency monitoring", required = false)
     public Boolean disableEfficiencyMonitor = false;
 
-    /**
-     * The following two arguments (num_cpu_threads, num_io_threads are TEMPORARY since Queue cannot currently support arbitrary tagged data types.
-     * TODO: Kill this when I can do a tagged integer in Queue.
-     */
-    @Argument(fullName="num_cpu_threads", shortName = "nct", doc="How many of the given threads should be allocated to the CPU", required = false)
-    @Hidden
-    public Integer numberOfCPUThreads = null;
-    @Argument(fullName="num_io_threads", shortName = "nit", doc="How many of the given threads should be allocated to IO", required = false)
-    @Hidden
-    public Integer numberOfIOThreads = null;
-
     @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc="The total number of BAM file handles to keep open simultaneously", required=false)
     public Integer numberOfBAMFileHandles = null;
 
-    @Argument(fullName="nanoThreads", shortName = "nanoThreads", doc="NanoThreading", required = false)
-    @Hidden
-    public int nanoThreads = 1;
-
     @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
     public List<String> readGroupBlackList = null;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
index 9198d210d..f1d2f7b5b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@@ -8,6 +8,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -76,21 +77,21 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
     /**
      * Create a new hierarchical microscheduler to process the given reads and reference.
      *
-     * @param walker        the walker used to process the dataset.
-     * @param reads         Reads file(s) to process.
-     * @param reference     Reference for driving the traversal.
-     * @param nThreadsToUse maximum number of threads to use to do the work
+     * @param walker           the walker used to process the dataset.
+     * @param reads            Reads file(s) to process.
+     * @param reference        Reference for driving the traversal.
+     * @param threadAllocation How should we apply multi-threaded execution?
      */
     protected HierarchicalMicroScheduler(final GenomeAnalysisEngine engine,
                                          final Walker walker,
                                          final SAMDataSource reads,
                                          final IndexedFastaSequenceFile reference,
                                          final Collection<ReferenceOrderedDataSource> rods,
-                                         final int nThreadsToUse,
-                                         final boolean monitorThreadPerformance ) {
-        super(engine, walker, reads, reference, rods, nThreadsToUse);
+                                         final ThreadAllocation threadAllocation) {
+        super(engine, walker, reads, reference, rods, threadAllocation);
 
-        if ( monitorThreadPerformance ) {
+        final int nThreadsToUse = threadAllocation.getNumDataThreads();
+        if ( threadAllocation.monitorThreadEfficiency() ) {
             final EfficiencyMonitoringThreadFactory monitoringThreadFactory = new EfficiencyMonitoringThreadFactory(nThreadsToUse);
             setThreadEfficiencyMonitor(monitoringThreadFactory);
             this.threadPool = Executors.newFixedThreadPool(nThreadsToUse, monitoringThreadFactory);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
index 740bcb566..ceb4a6f9b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@@ -10,6 +10,7 @@ import org.broadinstitute.sting.gatk.datasources.reads.Shard;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.traversals.TraverseActiveRegions;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.SampleUtils;
@@ -39,13 +40,11 @@ public class LinearMicroScheduler extends MicroScheduler {
                                    final SAMDataSource reads,
                                    final IndexedFastaSequenceFile reference,
                                    final Collection<ReferenceOrderedDataSource> rods,
-                                   final int numThreads, // may be > 1 if are nanoScheduling
-                                   final boolean monitorThreadPerformance ) {
-        super(engine, walker, reads, reference, rods, numThreads);
+                                   final ThreadAllocation threadAllocation) {
+        super(engine, walker, reads, reference, rods, threadAllocation);
 
-        if ( monitorThreadPerformance )
+        if ( threadAllocation.monitorThreadEfficiency() )
             setThreadEfficiencyMonitor(new ThreadEfficiencyMonitor());
-
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 073a46ee3..bc0d5da96 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -100,27 +100,30 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
      * @return The best-fit microscheduler.
      */
     public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
-        if (threadAllocation.getNumCPUThreads() > 1) {
+        if ( threadAllocation.isRunningInParallelMode() )
+            logger.info(String.format("Running the GATK in parallel mode with %d CPU threads for each of %d data threads",
+                    threadAllocation.getNumCPUThreadsPerDataThread(), threadAllocation.getNumDataThreads()));
+
+        if ( threadAllocation.getNumDataThreads() > 1 ) {
             if (walker.isReduceByInterval())
                 throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval.  Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
 
-            logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
-
-            if ( walker instanceof ReadWalker ) {
-                if ( ! (walker instanceof ThreadSafeMapReduce) ) badNT(engine, walker);
-                return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+            if ( ! (walker instanceof TreeReducible) ) {
+                throw badNT("nt", engine, walker);
             } else {
-                // TODO -- update test for when nano scheduling only is an option
-                if ( ! (walker instanceof TreeReducible) ) badNT(engine, walker);
-                return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+                return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
             }
         } else {
-            return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads(), threadAllocation.monitorThreadEfficiency());
+            if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof ThreadSafeMapReduce) )
+                throw badNT("cnt", engine, walker);
+            return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
         }
     }
 
-    private static void badNT(final GenomeAnalysisEngine engine, final Walker walker) {
-        throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
+    private static UserException badNT(final String parallelArg, final GenomeAnalysisEngine engine, final Walker walker) {
+        throw new UserException.BadArgumentValue("nt",
+                String.format("The analysis %s currently does not support parallel execution with %s.  " +
+                        "Please run your analysis without the %s option.", engine.getWalkerName(walker.getClass()), parallelArg, parallelArg));
     }
 
     /**
@@ -130,24 +133,27 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
      * @param reads   The reads.
      * @param reference The reference.
      * @param rods    the rods to include in the traversal
-     * @param numThreads the number of threads we are using in the underlying traversal
+     * @param threadAllocation the allocation of threads to use in the underlying traversal
      */
     protected MicroScheduler(final GenomeAnalysisEngine engine,
                              final Walker walker,
                              final SAMDataSource reads,
                              final IndexedFastaSequenceFile reference,
                              final Collection<ReferenceOrderedDataSource> rods,
-                             final int numThreads) {
+                             final ThreadAllocation threadAllocation) {
         this.engine = engine;
         this.reads = reads;
         this.reference = reference;
         this.rods = rods;
 
         if (walker instanceof ReadWalker) {
-            traversalEngine = numThreads > 1 ? new TraverseReadsNano(numThreads) : new TraverseReads();
+            traversalEngine = threadAllocation.getNumCPUThreadsPerDataThread() > 1
+                    ? new TraverseReadsNano(threadAllocation.getNumCPUThreadsPerDataThread())
+                    : new TraverseReads();
         } else if (walker instanceof LocusWalker) {
-            // TODO -- refactor to use better interface
-            traversalEngine = engine.getArguments().nanoThreads > 1 ? new TraverseLociNano(engine.getArguments().nanoThreads) : new TraverseLociLinear();
+            traversalEngine = threadAllocation.getNumCPUThreadsPerDataThread() > 1
+                    ? new TraverseLociNano(threadAllocation.getNumCPUThreadsPerDataThread())
+                    : new TraverseLociLinear();
         } else if (walker instanceof DuplicateWalker) {
             traversalEngine = new TraverseDuplicates();
         } else if (walker instanceof ReadPairWalker) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
index 260a7efda..ee1dc63e6 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/io/stubs/VariantContextWriterStub.java
@@ -32,9 +32,9 @@ import org.broadinstitute.sting.utils.classloader.JVMUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
+import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.writer.Options;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriterFactory;
 
 import java.io.File;
@@ -269,7 +269,7 @@ public class VariantContextWriterStub implements Stub<VariantContextWriter>, Var
      * @return
      */
     public boolean alsoWriteBCFForTest() {
-        return engine.getArguments().numberOfThreads == 1 && // only works single threaded
+        return engine.getArguments().numberOfDataThreads == 1 && // only works single threaded
                 ! isCompressed() && // for non-compressed outputs
                 getFile() != null && // that are going to disk
                 engine.getArguments().generateShadowBCF; // and we actually want to do it
diff --git a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
index 6f3f175a2..51fed470f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/phonehome/GATKRunReport.java
@@ -218,7 +218,7 @@ public class GATKRunReport {
         // if there was an exception, capture it
         this.mException = e == null ? null : new ExceptionToXML(e);
 
-        numThreads = engine.getArguments().numberOfThreads;
+        numThreads = engine.getTotalNumberOfThreads();
         percentTimeRunning = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.USER_CPU);
         percentTimeBlocking = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.BLOCKING);
         percentTimeWaiting = getThreadEfficiencyPercent(engine, ThreadEfficiencyMonitor.State.WAITING);
diff --git a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
index caae55ac5..f958c9db8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
@@ -24,7 +24,7 @@
 
 package org.broadinstitute.sting.gatk.resourcemanagement;
 
-import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 /**
  * Models how threads are distributed between various components of the GATK.
@@ -33,7 +33,12 @@ public class ThreadAllocation {
     /**
      * The number of CPU threads to be used by the GATK.
      */
-    private final int numCPUThreads;
+    private final int numDataThreads;
+
+    /**
+     * The number of CPU threads per data thread for GATK processing
+     */
+    private final int numCPUThreadsPerDataThread;
 
     /**
      * Number of threads to devote exclusively to IO.  Default is 0.
@@ -45,8 +50,12 @@ public class ThreadAllocation {
      */
     private final boolean monitorEfficiency;
 
-    public int getNumCPUThreads() {
-        return numCPUThreads;
+    public int getNumDataThreads() {
+        return numDataThreads;
+    }
+
+    public int getNumCPUThreadsPerDataThread() {
+        return numCPUThreadsPerDataThread;
     }
 
     public int getNumIOThreads() {
@@ -57,47 +66,50 @@ public class ThreadAllocation {
         return monitorEfficiency;
     }
 
+    /**
+     * Are we running in parallel mode?
+     *
+     * @return true if any parallel processing is enabled
+     */
+    public boolean isRunningInParallelMode() {
+        return getTotalNumThreads() > 1;
+    }
+
+    /**
+     * What is the total number of threads in use by the GATK?
+     *
+     * @return the sum of all thread allocations in this object
+     */
+    public int getTotalNumThreads() {
+        return getNumDataThreads() + getNumCPUThreadsPerDataThread() + getNumIOThreads();
+    }
+
     /**
      * Construct the default thread allocation.
      */
     public ThreadAllocation() {
-        this(1, null, null, false);
+        this(1, 1, 0, false);
     }
 
     /**
      * Set up the thread allocation.  Default allocation is 1 CPU thread, 0 IO threads.
      * (0 IO threads means that no threads are devoted exclusively to IO; they're inline on the CPU thread).
-     * @param totalThreads Complete number of threads to allocate.
-     * @param numCPUThreads Total number of threads allocated to the traversal.
+     * @param numDataThreads Total number of threads allocated to the traversal.
+     * @param numCPUThreadsPerDataThread The number of CPU threads per data thread to allocate
      * @param numIOThreads Total number of threads allocated exclusively to IO.
+     * @param monitorEfficiency should we monitor threading efficiency in the GATK?
      */
-    public ThreadAllocation(final int totalThreads, final Integer numCPUThreads, final Integer numIOThreads, final boolean monitorEfficiency) {
-        // If no allocation information is present, allocate all threads to CPU
-        if(numCPUThreads == null && numIOThreads == null) {
-            this.numCPUThreads = totalThreads;
-            this.numIOThreads = 0;
-        }
-        // If only CPU threads are specified, allocate remainder to IO (minimum 0 dedicated IO threads).
-        else if(numIOThreads == null) {
-            if(numCPUThreads > totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) is higher than the total threads",totalThreads,numCPUThreads));
-            this.numCPUThreads = numCPUThreads;
-            this.numIOThreads = totalThreads - numCPUThreads;
-        }
-        // If only IO threads are specified, allocate remainder to CPU (minimum 1 dedicated CPU thread).
-        else if(numCPUThreads == null) {
-            if(numIOThreads > totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of io threads (%d) is higher than the total threads",totalThreads,numIOThreads));
-            this.numCPUThreads = Math.max(1,totalThreads-numIOThreads);
-            this.numIOThreads = numIOThreads;
-        }
-        else {
-            if(numCPUThreads + numIOThreads != totalThreads)
-                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) + the count of io threads (%d) does not match",totalThreads,numCPUThreads,numIOThreads));
-            this.numCPUThreads = numCPUThreads;
-            this.numIOThreads = numIOThreads;
-        }
+    public ThreadAllocation(final int numDataThreads,
+                            final int numCPUThreadsPerDataThread,
+                            final int numIOThreads,
+                            final boolean monitorEfficiency) {
+        if ( numDataThreads < 1 ) throw new ReviewedStingException("numDataThreads cannot be less than 1, but saw " + numDataThreads);
+        if ( numCPUThreadsPerDataThread < 1 ) throw new ReviewedStingException("numCPUThreadsPerDataThread cannot be less than 1, but saw " + numCPUThreadsPerDataThread);
+        if ( numIOThreads < 0 ) throw new ReviewedStingException("numIOThreads cannot be less than 0, but saw " + numIOThreads);
 
+        this.numDataThreads = numDataThreads;
+        this.numCPUThreadsPerDataThread = numCPUThreadsPerDataThread;
+        this.numIOThreads = numIOThreads;
         this.monitorEfficiency = monitorEfficiency;
     }
 }

From 225f3a0ebe380ce8283c4adc4e46fcfa91b2155b Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 16:35:00 -0400
Subject: [PATCH 21/26] Update integration test system to allow us to
 differentiate between testing data and cpu parallelism

---
 .../org/broadinstitute/sting/WalkerTest.java  | 37 +++++++++++++++----
 1 file changed, 30 insertions(+), 7 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java
index 7e38c00f3..660259ca8 100755
--- a/public/java/test/org/broadinstitute/sting/WalkerTest.java
+++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java
@@ -40,13 +40,13 @@ import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
-
-import java.io.*;
-
 import org.testng.Assert;
 import org.testng.annotations.AfterSuite;
 import org.testng.annotations.BeforeMethod;
 
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.PrintStream;
 import java.text.SimpleDateFormat;
 import java.util.*;
 
@@ -251,20 +251,43 @@ public class WalkerTest extends BaseTest {
         return false;
     }
 
-    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
-        return executeTest(name, spec, Arrays.asList(1, 4));
+    public enum ParallelTestType {
+        TREE_REDUCIBLE,
+        NANO_SCHEDULED,
+        BOTH
     }
 
-    protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> parallelThreads) {
+    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec, ParallelTestType testType) {
+        final List<Integer> ntThreads  = testType == ParallelTestType.TREE_REDUCIBLE || testType == ParallelTestType.BOTH ? Arrays.asList(1, 4) : Collections.<Integer>emptyList();
+        final List<Integer> cntThreads = testType == ParallelTestType.NANO_SCHEDULED || testType == ParallelTestType.BOTH ? Arrays.asList(1, 4) : Collections.<Integer>emptyList();
+
+        return executeTest(name, spec, ntThreads, cntThreads);
+    }
+
+    protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
+        return executeTestParallel(name, spec, ParallelTestType.BOTH);
+    }
+
+    protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> ntThreads, List<Integer> cpuThreads) {
         String originalArgs = spec.args;
         Pair<List<File>, List<String>> results = null;
 
-        for ( int nt : parallelThreads ) {
+        boolean ran1 = false;
+        for ( int nt : ntThreads ) {
             String extra = nt == 1 ? "" : (" -nt " + nt);
+            ran1 = ran1 || nt == 1;
             spec.args = originalArgs + extra;
             results = executeTest(name + "-nt-" + nt, spec);
         }
 
+        for ( int cnt : cpuThreads ) {
+            if ( cnt != 1 ) {
+                String extra = " -cnt " + cnt;
+                spec.args = originalArgs + extra;
+                results = executeTest(name + "-cnt-" + cnt, spec);
+            }
+        }
+
         return results;
     }
 

From dddf148a595af445d3a9e6ab66bf20a6d8dc3e93 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 16:35:32 -0400
Subject: [PATCH 22/26] Fixed bug in ThreadAllocation getTotalNumberOfThreads

-- It isnt data + cpu its data * cpu threads.
---
 .../sting/gatk/resourcemanagement/ThreadAllocation.java         | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
index f958c9db8..c86f06c25 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
@@ -81,7 +81,7 @@ public class ThreadAllocation {
      * @return the sum of all thread allocations in this object
      */
     public int getTotalNumThreads() {
-        return getNumDataThreads() + getNumCPUThreadsPerDataThread() + getNumIOThreads();
+        return getNumDataThreads() * getNumCPUThreadsPerDataThread() + getNumIOThreads();
     }
 
     /**

From c5f1ceaa95d17b9aedd9b2e9a33d7d516fee95b8 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 16:38:21 -0400
Subject: [PATCH 23/26] All read and loci traversals go through NanoScheduler
 now

-- The NanoScheduler is doing a good job at tracking important information like time spent in map/reduce/input etc.
-- Can be disabled with static boolean in MicroScheduler if we have problems
-- See GSA-515 Nanoscheduler GSA-549 Retire TraverseReads and TraverseLoci after testing confirms nano scheduler version in single threaded version is fine
---
 .../sting/gatk/executive/MicroScheduler.java    |  8 +++++---
 .../utils/nanoScheduler/NanoScheduler.java      | 17 +++++++++++++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index bc0d5da96..490f44470 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -59,6 +59,8 @@ import java.util.Collection;
 
 /** Shards and schedules data in manageable chunks. */
 public abstract class MicroScheduler implements MicroSchedulerMBean {
+    // TODO -- remove me and retire non nano scheduled versions of traversals
+    private final static boolean USE_NANOSCHEDULER_FOR_EVERYTHING = true;
     protected static final Logger logger = Logger.getLogger(MicroScheduler.class);
 
     /**
@@ -101,7 +103,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
      */
     public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
         if ( threadAllocation.isRunningInParallelMode() )
-            logger.info(String.format("Running the GATK in parallel mode with %d CPU threads for each of %d data threads",
+            logger.info(String.format("Running the GATK in parallel mode with %d CPU thread(s) for each of %d data thread(s)",
                     threadAllocation.getNumCPUThreadsPerDataThread(), threadAllocation.getNumDataThreads()));
 
         if ( threadAllocation.getNumDataThreads() > 1 ) {
@@ -147,11 +149,11 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
         this.rods = rods;
 
         if (walker instanceof ReadWalker) {
-            traversalEngine = threadAllocation.getNumCPUThreadsPerDataThread() > 1
+            traversalEngine = USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1
                     ? new TraverseReadsNano(threadAllocation.getNumCPUThreadsPerDataThread())
                     : new TraverseReads();
         } else if (walker instanceof LocusWalker) {
-            traversalEngine = threadAllocation.getNumCPUThreadsPerDataThread() > 1
+            traversalEngine = USE_NANOSCHEDULER_FOR_EVERYTHING || threadAllocation.getNumCPUThreadsPerDataThread() > 1
                     ? new TraverseLociNano(threadAllocation.getNumCPUThreadsPerDataThread())
                     : new TraverseLociLinear();
         } else if (walker instanceof DuplicateWalker) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 89e44ce93..ade6dcaf5 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -193,6 +193,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
 
         outsideSchedulerTimer.stop();
+
         ReduceType result;
         if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
             result = executeSingleThreaded(inputReader, map, initialValue, reduce);
@@ -214,13 +215,29 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                                              final NanoSchedulerReduceFunction<MapType, ReduceType> reduce) {
         ReduceType sum = initialValue;
         int i = 0;
+
+        // start timer to ensure that both hasNext and next are caught by the timer
+        if ( TIME_CALLS ) inputTimer.restart();
         while ( inputReader.hasNext() ) {
             final InputType input = inputReader.next();
+            if ( TIME_CALLS ) inputTimer.stop();
+
+            // map
+            if ( TIME_CALLS ) mapTimer.restart();
             final MapType mapValue = map.apply(input);
+            if ( TIME_CALLS ) mapTimer.stop();
+
             if ( i++ % bufferSize == 0 && progressFunction != null )
                 progressFunction.progress(input);
+
+            // reduce
+            if ( TIME_CALLS ) reduceTimer.restart();
             sum = reduce.apply(mapValue, sum);
+            if ( TIME_CALLS ) reduceTimer.stop();
+
+            if ( TIME_CALLS ) inputTimer.restart();
         }
+
         return sum;
     }
 

From e11915aa0aa901d44e73a7d44c2ba1b707e42d21 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 17:37:56 -0400
Subject: [PATCH 24/26] GSA-515 Nanoscheduler GSA-550 ThreadSafeMapReduce
 shouldn't be super interface of TreeReducible

---
 .../broadinstitute/sting/gatk/executive/MicroScheduler.java   | 2 +-
 .../src/org/broadinstitute/sting/gatk/walkers/FlagStat.java   | 2 +-
 .../{ThreadSafeMapReduce.java => NanoSchedulable.java}        | 2 +-
 .../src/org/broadinstitute/sting/gatk/walkers/Pileup.java     | 2 +-
 .../src/org/broadinstitute/sting/gatk/walkers/PrintReads.java | 2 +-
 .../org/broadinstitute/sting/gatk/walkers/TreeReducible.java  | 2 +-
 .../sting/gatk/walkers/bqsr/BaseRecalibrator.java             | 2 +-
 .../sting/gatk/walkers/genotyper/UnifiedGenotyper.java        | 2 +-
 .../org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java   | 3 ++-
 .../org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java   | 3 ++-
 .../org/broadinstitute/sting/gatk/walkers/qc/CountReads.java  | 4 ++--
 11 files changed, 14 insertions(+), 12 deletions(-)
 rename public/java/src/org/broadinstitute/sting/gatk/walkers/{ThreadSafeMapReduce.java => NanoSchedulable.java} (97%)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 490f44470..1da712e8a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -116,7 +116,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
                 return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
             }
         } else {
-            if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof ThreadSafeMapReduce) )
+            if ( threadAllocation.getNumCPUThreadsPerDataThread() > 1 && ! (walker instanceof NanoSchedulable) )
                 throw badNT("cnt", engine, walker);
             return new LinearMicroScheduler(engine, walker, reads, reference, rods, threadAllocation);
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
index 14d14aca5..b4ef66aaf 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/FlagStat.java
@@ -45,7 +45,7 @@ import java.text.NumberFormat;
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
 @Requires({DataSource.READS})
-public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements ThreadSafeMapReduce {
+public class FlagStat extends ReadWalker<FlagStat.FlagStatus, FlagStat.FlagStatus> implements NanoSchedulable {
     @Output
     PrintStream out;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java
similarity index 97%
rename from public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
rename to public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java
index 1ce469f8c..731ce7e4e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ThreadSafeMapReduce.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/NanoSchedulable.java
@@ -27,5 +27,5 @@ package org.broadinstitute.sting.gatk.walkers;
  * declare that their map function is thread-safe and so multiple
  * map calls can be run in parallel in the same JVM instance.
  */
-public interface ThreadSafeMapReduce {
+public interface NanoSchedulable {
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
index 607c83966..a3efea9f1 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/Pileup.java
@@ -52,7 +52,7 @@ import java.util.List;
  * samtools pileup [-f in.ref.fasta] [-t in.ref_list] [-l in.site_list] [-iscg] [-T theta] [-N nHap] [-r pairDiffRate] <in.alignment>
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class Pileup extends LocusWalker<String, Integer> implements TreeReducible<Integer> {
+public class Pileup extends LocusWalker<String, Integer> implements TreeReducible<Integer>, NanoSchedulable {
 
     private static final String verboseDelimiter = "@"; // it's ugly to use "@" but it's literally the only usable character not allowed in read names
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
index 4118617fc..37176cbf9 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReads.java
@@ -93,7 +93,7 @@ import java.util.*;
 @ReadTransformersMode(ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
 @BAQMode(QualityMode = BAQ.QualityMode.ADD_TAG, ApplicationTime = ReadTransformer.ApplicationTime.HANDLED_IN_WALKER)
 @Requires({DataSource.READS, DataSource.REFERENCE})
-public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements ThreadSafeMapReduce {
+public class PrintReads extends ReadWalker<GATKSAMRecord, SAMFileWriter> implements NanoSchedulable {
 
     @Output(doc="Write output to this BAM filename instead of STDOUT", required = true)
     SAMFileWriter out;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
index 8621c0e9d..c950e07e4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/TreeReducible.java
@@ -13,7 +13,7 @@ package org.broadinstitute.sting.gatk.walkers;
  * shards of the data can reduce with each other, and the composite result
  * can be reduced with other composite results.
  */
-public interface TreeReducible<ReduceType> extends ThreadSafeMapReduce {
+public interface TreeReducible<ReduceType> {
     /**
      * A composite, 'reduce of reduces' function.
      * @param lhs 'left-most' portion of data in the composite reduce.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
index 443b493be..43aa85a05 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/BaseRecalibrator.java
@@ -109,7 +109,7 @@ import java.util.ArrayList;
 @ReadFilters({MappingQualityZeroFilter.class, MappingQualityUnavailableFilter.class}) // only look at covered loci, not every loci of the reference file
 @Requires({DataSource.READS, DataSource.REFERENCE}) // filter out all reads with zero or unavailable mapping quality
 @PartitionBy(PartitionType.LOCUS) // this walker requires both -I input.bam and -R reference.fasta
-public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long> {
+public class BaseRecalibrator extends LocusWalker<Long, Long> implements TreeReducible<Long>, NanoSchedulable {
     @ArgumentCollection
     private final RecalibrationArgumentCollection RAC = new RecalibrationArgumentCollection(); // all the command line arguments for BQSR and it's covariates
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 93928a780..32ceff715 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -125,7 +125,7 @@ import java.util.*;
 // TODO -- When LocusIteratorByState gets cleaned up, we should enable multiple @By sources:
 // TODO -- @By( {DataSource.READS, DataSource.REFERENCE_ORDERED_DATA} )
 @Downsample(by=DownsampleType.BY_SAMPLE, toCoverage=250)
-public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatible {
+public class UnifiedGenotyper extends LocusWalker<List<VariantCallContext>, UnifiedGenotyper.UGStatistics> implements TreeReducible<UnifiedGenotyper.UGStatistics>, AnnotatorCompatible, NanoSchedulable {
 
     @ArgumentCollection
     private UnifiedArgumentCollection UAC = new UnifiedArgumentCollection();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java
index bd10eab87..cd295f26e 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountLoci.java
@@ -6,6 +6,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 
@@ -40,7 +41,7 @@ import java.io.PrintStream;
  *
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class CountLoci extends LocusWalker<Integer, Long> implements TreeReducible<Long> {
+public class CountLoci extends LocusWalker<Integer, Long> implements TreeReducible<Long>, NanoSchedulable {
     @Output(doc="Write count to this file instead of STDOUT")
     PrintStream out;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java
index 9915d617e..ab37a2322 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountRODs.java
@@ -37,6 +37,7 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.refdata.utils.RODRecordList;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.RodWalker;
 import org.broadinstitute.sting.gatk.walkers.TreeReducible;
 import org.broadinstitute.sting.utils.GenomeLoc;
@@ -73,7 +74,7 @@ import java.util.*;
  *
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>> {
+public class CountRODs extends RodWalker<CountRODs.Datum, Pair<ExpandingArrayList<Long>, Long>> implements TreeReducible<Pair<ExpandingArrayList<Long>, Long>>, NanoSchedulable {
     @Output
     public PrintStream out;
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
index 856ea77f5..301fa5b9b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountReads.java
@@ -4,9 +4,9 @@ import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
 import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
+import org.broadinstitute.sting.gatk.walkers.NanoSchedulable;
 import org.broadinstitute.sting.gatk.walkers.ReadWalker;
 import org.broadinstitute.sting.gatk.walkers.Requires;
-import org.broadinstitute.sting.gatk.walkers.ThreadSafeMapReduce;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
@@ -41,7 +41,7 @@ import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
  */
 @DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
 @Requires({DataSource.READS, DataSource.REFERENCE})
-public class CountReads extends ReadWalker<Integer, Integer> implements ThreadSafeMapReduce {
+public class CountReads extends ReadWalker<Integer, Integer> implements NanoSchedulable {
     public Integer map(ReferenceContext ref, GATKSAMRecord read, RefMetaDataTracker tracker) {
         return 1;
     }

From 574a8f710b5b193f7a2d4299b5f9222605aa6ff7 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 17:40:02 -0400
Subject: [PATCH 25/26] Add static boolean controlled output of individual map
 call timing to nanoSecond resolution

---
 .../sting/utils/nanoScheduler/NanoScheduler.java               | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index ade6dcaf5..24db0f7dc 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -47,6 +47,7 @@ import java.util.concurrent.*;
 public class NanoScheduler<InputType, MapType, ReduceType> {
     private final static Logger logger = Logger.getLogger(NanoScheduler.class);
     private final static boolean ALLOW_SINGLE_THREAD_FASTPATH = true;
+    private final static boolean LOG_MAP_TIMES = false;
     private final static boolean TIME_CALLS = true;
 
     final int bufferSize;
@@ -224,7 +225,9 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
             // map
             if ( TIME_CALLS ) mapTimer.restart();
+            final long preMapTime = LOG_MAP_TIMES ? 0 : mapTimer.currentTimeNano();
             final MapType mapValue = map.apply(input);
+            if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (mapTimer.currentTimeNano() - preMapTime));
             if ( TIME_CALLS ) mapTimer.stop();
 
             if ( i++ % bufferSize == 0 && progressFunction != null )

From 228bac75e48b390e886d9fdbf222978aaad1fc2b Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Sep 2012 20:57:49 -0400
Subject: [PATCH 26/26] By default do only NT tests in integration tests

---
 public/java/test/org/broadinstitute/sting/WalkerTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/WalkerTest.java b/public/java/test/org/broadinstitute/sting/WalkerTest.java
index 660259ca8..bcfd00aed 100755
--- a/public/java/test/org/broadinstitute/sting/WalkerTest.java
+++ b/public/java/test/org/broadinstitute/sting/WalkerTest.java
@@ -265,7 +265,7 @@ public class WalkerTest extends BaseTest {
     }
 
     protected Pair<List<File>, List<String>> executeTestParallel(final String name, WalkerTestSpec spec) {
-        return executeTestParallel(name, spec, ParallelTestType.BOTH);
+        return executeTestParallel(name, spec, ParallelTestType.TREE_REDUCIBLE);
     }
 
     protected Pair<List<File>, List<String>> executeTest(final String name, WalkerTestSpec spec, List<Integer> ntThreads, List<Integer> cpuThreads) {