From 32ee2c7dffde3210e2c3b183f5f2fefd3a49af23 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Mon, 15 Oct 2012 17:04:29 -0400
Subject: [PATCH 001/236] Refactored the compression interface per sample in
 ReduceReadsa

The CompressionStash is now responsible for keeping track of all intervals that must be kept uncompressed by all samples. In general this is a list generated by a tumor sample that will enforce all normal samples to abide.
  - Updated ReduceReads integration tests
  - Sliding Window is now using the CompressionStash (single sample).

DEV-104 #resolve #time 3m
---
 .../reducereads/CompressionStash.java         | 21 +++++++
 .../reducereads/MultiSampleCompressor.java    |  5 +-
 .../compression/reducereads/ReduceReads.java  |  4 +-
 .../reducereads/SingleSampleCompressor.java   |  7 ++-
 .../reducereads/SlidingWindow.java            | 55 ++++++++++---------
 .../ReduceReadsIntegrationTest.java           | 30 +++++-----
 .../reducereads/SimpleGenomeLoc.java          | 30 ++++++++++
 7 files changed, 107 insertions(+), 45 deletions(-)
 create mode 100644 protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
new file mode 100644
index 000000000..714a4df18
--- /dev/null
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
@@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.utils.GenomeLocComparator;
+
+import java.util.TreeSet;
+
+/**
+ * A stash of regions that must be kept uncompressed in all samples
+ *
+ * In general, these are regions that were kept uncompressed by a tumor sample and we want to force
+ * all other samples (normals and/or tumors) to also keep these regions uncompressed
+ *
+ * User: carneiro
+ * Date: 10/15/12
+ * Time: 4:08 PM
+ */
+public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
+    public CompressionStash() {
+        super(new GenomeLocComparator());
+    }
+}
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
index 7c9fc101b..2c3439010 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
@@ -55,11 +55,12 @@ public class MultiSampleCompressor implements Compressor {
                                  final int minBaseQual,
                                  final ReduceReads.DownsampleStrategy downsampleStrategy,
                                  final int nContigs,
-                                 final boolean allowPolyploidReduction) {
+                                 final boolean allowPolyploidReduction,
+                                 final CompressionStash compressionStash) {
         for ( String name : SampleUtils.getSAMFileSamples(header) ) {
             compressorsPerSample.put(name,
                     new SingleSampleCompressor(contextSize, downsampleCoverage,
-                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
+                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction, compressionStash));
         }
     }
 
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 5810bc94f..b6761f4a6 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -222,6 +222,8 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
     HashMap<String, Long> readNameHash;                                     // This hash will keep the name of the original read the new compressed name (a number).
     Long nextReadNumber = 1L;                                               // The next number to use for the compressed read name.
 
+    CompressionStash compressionStash = new CompressionStash();
+
     SortedSet<GenomeLoc> intervalList;
     
     private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads";   // The name that will go in the @PG tag
@@ -328,7 +330,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
      */
     @Override
     public ReduceReadsStash reduceInit() {
-        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
+        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION, compressionStash));
     }
 
     /**
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
index 6a086c53b..82a433300 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
@@ -20,6 +20,7 @@ public class SingleSampleCompressor implements Compressor {
     final private ReduceReads.DownsampleStrategy downsampleStrategy;
     final private int nContigs;
     final private boolean allowPolyploidReduction;
+    final CompressionStash compressionStash;
 
     private SlidingWindow slidingWindow;
     private int slidingWindowCounter;
@@ -33,7 +34,8 @@ public class SingleSampleCompressor implements Compressor {
                                   final int minBaseQual,
                                   final ReduceReads.DownsampleStrategy downsampleStrategy,
                                   final int nContigs,
-                                  final boolean allowPolyploidReduction) {
+                                  final boolean allowPolyploidReduction,
+                                  final CompressionStash compressionStash) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
         this.minMappingQuality = minMappingQuality;
@@ -44,6 +46,7 @@ public class SingleSampleCompressor implements Compressor {
         this.downsampleStrategy = downsampleStrategy;
         this.nContigs = nContigs;
         this.allowPolyploidReduction = allowPolyploidReduction;
+        this.compressionStash = compressionStash;
     }
 
     /**
@@ -65,7 +68,7 @@ public class SingleSampleCompressor implements Compressor {
         }
 
         if ( slidingWindow == null) {                                                  // this is the first read
-            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
+            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction, compressionStash);
             slidingWindowCounter++;
         }
 
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
index 6fdf85317..22c40c542 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@@ -6,7 +6,6 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.recalibration.EventType;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
@@ -56,6 +55,7 @@ public class SlidingWindow {
     private final int nContigs;
 
     private boolean allowPolyploidReductionInGeneral;
+    private CompressionStash compressionStash;
 
     /**
      * The types of synthetic reads to use in the finalizeAndAdd method
@@ -87,7 +87,7 @@ public class SlidingWindow {
     }
 
 
-    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
+    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction, CompressionStash compressionStash) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
 
@@ -118,6 +118,7 @@ public class SlidingWindow {
         this.nContigs = nContigs;
 
         this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
+        this.compressionStash = compressionStash;
     }
 
     /**
@@ -145,7 +146,7 @@ public class SlidingWindow {
      * @param variantSite  boolean array with true marking variant regions
      * @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
      */
-    private Pair<Integer, Integer> getNextVariantRegion(int from, int to, boolean[] variantSite) {
+    private SimpleGenomeLoc getNextVariantRegion(int from, int to, boolean[] variantSite) {
         boolean foundStart = false;
         int variantRegionStartIndex = 0;
         for (int i=from; i<to; i++) {
@@ -154,10 +155,10 @@ public class SlidingWindow {
                 foundStart = true;
             }
             else if(!variantSite[i] && foundStart) {
-                return(new Pair<Integer, Integer>(variantRegionStartIndex, i-1));
+                return(new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, i-1, true));
             }
         }
-        return (foundStart) ? new Pair<Integer, Integer>(variantRegionStartIndex, -1) : null;
+        return (foundStart) ? new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, to-1, false) : null;
     }
 
     /**
@@ -168,23 +169,22 @@ public class SlidingWindow {
      * @param variantSite  boolean array with true marking variant regions
      * @return a list with start/stops of variant regions following getNextVariantRegion description
      */
-    private List<Pair<Integer, Integer>> getAllVariantRegions(int from, int to, boolean[] variantSite) {
-        List<Pair<Integer,Integer>> regions = new LinkedList<Pair<Integer, Integer>>();
+    private CompressionStash getVariantRegionsFromThisSample(int from, int to, boolean[] variantSite) {
+        CompressionStash regions = new CompressionStash();
         int index = from;
         while(index < to) {
-            Pair<Integer,Integer> result = getNextVariantRegion(index, to, variantSite);
+            SimpleGenomeLoc result = getNextVariantRegion(index, to, variantSite);
             if (result == null)
                 break;
 
             regions.add(result);
-            if (result.getSecond() < 0)
+            if (result.getStop() < 0)
                 break;
-            index = result.getSecond() + 1;
+            index = result.getStop() + 1;
         }
         return regions;
     }
 
-
     /**
      * Determines if the window can be slid given the new incoming read.
      *
@@ -203,7 +203,7 @@ public class SlidingWindow {
             boolean[] variantSite = markSites(getStartLocation(windowHeader) + readStartHeaderIndex);
             int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0);                                       // this is the limit of what we can close/send to consensus (non-inclusive)
 
-            List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, breakpoint, variantSite);
+            CompressionStash regions = getVariantRegionsFromThisSample(0, breakpoint, variantSite);
             finalizedReads = closeVariantRegions(regions, false);
 
             List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
@@ -567,26 +567,31 @@ public class SlidingWindow {
         result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
         result.addAll(finalizeAndAdd(ConsensusType.BOTH));
 
-        return result;                                                                                                  // finalized reads will be downsampled if necessary
+        return result;                                      // finalized reads will be downsampled if necessary
     }
 
 
-    private List<GATKSAMRecord> closeVariantRegions(List<Pair<Integer, Integer>> regions, boolean forceClose) {
+    private List<GATKSAMRecord> closeVariantRegions(CompressionStash regions, boolean forceClose) {
         List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
         if (!regions.isEmpty()) {
             int lastStop = -1;
-            for (Pair<Integer, Integer> region : regions) {
-                int start = region.getFirst();
-                int stop = region.getSecond();
-                if (stop < 0 && forceClose)
-                    stop = windowHeader.size() - 1;
-                if (stop >= 0) {
-                    allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
-                    lastStop = stop;
+            for (SimpleGenomeLoc region : regions) {
+                int start = region.getStart();
+                int stop = region.getStop();
+
+                if (!region.isFinished()) {
+                    if(forceClose)                          // region is unfinished but we're forcing the close of this window
+                        stop = windowHeader.size() - 1;
+                    else
+                        continue;                           // region is unfinished and we're not forcing the close of this window
                 }
+
+                allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
+                lastStop = stop;
             }
-            for (int i = 0; i < lastStop; i++)                                                                          // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
-                windowHeader.remove();                                                                                  // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
+
+            for (int i = 0; i < lastStop; i++)              // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
+                windowHeader.remove();                      // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
         }
         return allReads;
     }
@@ -626,7 +631,7 @@ public class SlidingWindow {
 
         if (!windowHeader.isEmpty()) {
             boolean[] variantSite = markSites(getStopLocation(windowHeader) + 1);
-            List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), variantSite);
+            CompressionStash regions = getVariantRegionsFromThisSample(0, windowHeader.size(), variantSite);
             finalizedReads = closeVariantRegions(regions, true);
 
             if (!windowHeader.isEmpty()) {
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index db8ea4eb8..89f251ed4 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -21,36 +21,36 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
         executeTest(testName, spec);
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testDefaultCompression() {
-        RRTest("testDefaultCompression ", L, "323dd4deabd7767efa0f2c6e7fa4189f");
+        RRTest("testDefaultCompression ", L, "1f95f3193bd9f120a73c34a0087abaf6");
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testMultipleIntervals() {
         String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
-        RRTest("testMultipleIntervals ", intervals, "c437fb160547ff271f8eba30e5f3ff76");
+        RRTest("testMultipleIntervals ", intervals, "79213d6ac68d56d4d72dcf511223e424");
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testHighCompression() {
-        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "3a607bc3ebaf84e9dc44e005c5f8a047");
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "dab2aa8e3655139974bbe12a568363d9");
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testLowCompression() {
         RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testIndelCompression() {
-        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f7b9fa44c10bc4b2247813d2b8dc1973");
+        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "1255245ed4ebeacda90f0dbb4e4da081");
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testFilteredDeletionCompression() {
         String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
-        executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("891bd6dcda66611f343e8ff25f34aaeb")));
+        executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("122e4e60c4412a31d0aeb3cce879e841")));
     }
 
     /**
@@ -61,20 +61,20 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
      * 
      * This bam is simplified to replicate the exact bug with the three provided intervals.
      */
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testAddingReadAfterTailingTheStash() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
-        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("886b43e1f26ff18425814dc7563931c6")));
+        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("4b590269cbe3574dbdd5bdc2bc6f5f1c")));
     }
 
     /**
      * Divide by zero bug reported by GdA and users in the forum. Happens when the downsampler goes over a region where all reads get
      * filtered out.
      */
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testDivideByZero() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
-        executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("93ffdc209d4cc0fc4f0169ca9be55cc2")));
+        executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("d8d066304f7c187f182bfb50f39baa0c")));
     }
 
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
new file mode 100644
index 000000000..45e105751
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
@@ -0,0 +1,30 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+
+/**
+ * GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
+ * with them.
+ *
+ * However, GenomeLocs are bound to strict validation through the GenomeLocParser and cannot
+ * be created easily for small tasks that do not require the rigors of the GenomeLocParser validation
+ *
+ * SimpleGenomeLoc is a simple utility to create GenomeLocs without going through the parser. Should
+ * only be used outside of the engine.
+ *
+ * User: carneiro
+ * Date: 10/16/12
+ * Time: 2:07 PM
+ */
+public class SimpleGenomeLoc extends GenomeLoc {
+    private boolean finished;
+
+    public SimpleGenomeLoc(String contigName, int contigIndex, int start, int stop, boolean finished) {
+        super(contigName,  contigIndex, start, stop);
+        this.finished = finished;
+    }
+
+    public boolean isFinished() {
+        return finished;
+    }
+}

From 55ac4ba70bfe28cdde85b3efc002813e857ca88e Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Wed, 17 Oct 2012 19:39:03 -0400
Subject: [PATCH 002/236] Added another utility that can convert to
 RemoteFiles. QScripts will now generate remote versions of files if the
 caller has not already passed in remote versions (or the QScript replaces the
 passed in remote references... not good) Instead of having yet another
 plugin, combined QStatusMessenger and RemoteFileConverter under general
 QCommandPlugin trait.

---
 .../sting/queue/QCommandLine.scala            | 53 ++++++++++++-------
 .../sting/queue/QCommandPlugin.scala          |  9 ++++
 .../broadinstitute/sting/queue/QScript.scala  | 25 +++++++++
 .../sting/queue/util/ClassFieldCache.scala    | 11 ++++
 .../queue/util/RemoteFileConverter.scala      | 21 ++++++++
 5 files changed, 100 insertions(+), 19 deletions(-)
 create mode 100644 public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
 create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/RemoteFileConverter.scala

diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 5b84bfd16..2afa66d9c 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -24,7 +24,6 @@
 
 package org.broadinstitute.sting.queue
 
-import function.QFunction
 import java.io.File
 import org.broadinstitute.sting.commandline._
 import org.broadinstitute.sting.queue.util._
@@ -96,18 +95,18 @@ class QCommandLine extends CommandLineProgram with Logging {
     new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
   }
 
-  private lazy val qStatusMessengerPluginManager = {
-    new PluginManager[QStatusMessenger](classOf[QStatusMessenger])
+  private lazy val qCommandPlugin = {
+    new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
   }
 
-  ClassFieldCache.parsingEngine = new ParsingEngine(this)
-
   /**
    * Takes the QScripts passed in, runs their script() methods, retrieves their generated
    * functions, and then builds and runs a QGraph based on the dependencies.
    */
   def execute = {
-    val allStatusMessengers = qStatusMessengerPluginManager.createAllTypes()
+    ClassFieldCache.parsingEngine = this.parser
+
+    val allCommandPlugins = qCommandPlugin.createAllTypes()
 
     if (settings.qSettings.runName == null)
       settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
@@ -115,14 +114,24 @@ class QCommandLine extends CommandLineProgram with Logging {
       settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
     qGraph.initializeWithSettings(settings)
 
-    for (statusMessenger <- allStatusMessengers) {
-      loadArgumentsIntoObject(statusMessenger)
+    for (commandPlugin <- allCommandPlugins) {
+      loadArgumentsIntoObject(commandPlugin)
     }
 
-    for (statusMessenger <- allStatusMessengers) {
-      statusMessenger.started()
+    for (commandPlugin <- allCommandPlugins) {
+      if (commandPlugin.statusMessenger != null)
+        commandPlugin.statusMessenger.started()
     }
 
+    // TODO: Default command plugin argument?
+    val remoteFileConverter = (
+      for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
+        yield commandPlugin.remoteFileConverter
+      ).headOption.getOrElse(null)
+
+    if (remoteFileConverter != null)
+      loadArgumentsIntoObject(remoteFileConverter)
+
     val allQScripts = qScriptPluginManager.createAllTypes()
     for (script <- allQScripts) {
       logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
@@ -137,10 +146,15 @@ class QCommandLine extends CommandLineProgram with Logging {
         case e: Exception =>
           throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
       }
+
+      if (remoteFileConverter != null) {
+        if (remoteFileConverter.convertToRemoteEnabled)
+          script.mkRemoteOutputs(remoteFileConverter)
+      }
+
       script.functions.foreach(qGraph.add(_))
       logger.info("Added " + script.functions.size + " functions")
     }
-
     // Execute the job graph
     qGraph.run()
 
@@ -162,14 +176,16 @@ class QCommandLine extends CommandLineProgram with Logging {
     if (!success) {
       logger.info("Done with errors")
       qGraph.logFailed()
-      for (statusMessenger <- allStatusMessengers)
-        statusMessenger.exit("Done with errors")
+      for (commandPlugin <- allCommandPlugins)
+        if (commandPlugin.statusMessenger != null)
+          commandPlugin.statusMessenger.exit("Done with errors")
       1
     } else {
       if (settings.run) {
         allQScripts.foreach(_.pushOutputs())
-        for (statusMessenger <- allStatusMessengers)
-          statusMessenger.done(allQScripts.map(_.remoteOutputs))
+        for (commandPlugin <- allCommandPlugins)
+          if (commandPlugin.statusMessenger != null)
+            commandPlugin.statusMessenger.done(allQScripts.map(_.remoteOutputs))
       }
       0
     }
@@ -189,7 +205,7 @@ class QCommandLine extends CommandLineProgram with Logging {
   override def getArgumentSources = {
     var plugins = Seq.empty[Class[_]]
     plugins ++= qScriptPluginManager.getPlugins
-    plugins ++= qStatusMessengerPluginManager.getPlugins
+    plugins ++= qCommandPlugin.getPlugins
     plugins.toArray
   }
 
@@ -200,11 +216,10 @@ class QCommandLine extends CommandLineProgram with Logging {
   override def getArgumentSourceName(source: Class[_]) = {
     if (classOf[QScript].isAssignableFrom(source))
       qScriptPluginManager.getName(source.asSubclass(classOf[QScript]))
-    else if (classOf[QStatusMessenger].isAssignableFrom(source))
-      qStatusMessengerPluginManager.getName(source.asSubclass(classOf[QStatusMessenger]))
+    else if (classOf[QCommandPlugin].isAssignableFrom(source))
+      qCommandPlugin.getName(source.asSubclass(classOf[QCommandPlugin]))
     else
       null
-
   }
 
   /**
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
new file mode 100644
index 000000000..499c31554
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
@@ -0,0 +1,9 @@
+package org.broadinstitute.sting.queue
+
+import engine.QStatusMessenger
+import util.RemoteFileConverter
+
+trait QCommandPlugin {
+  def statusMessenger: QStatusMessenger = null
+  def remoteFileConverter: RemoteFileConverter = null
+}
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 2dcfb916c..3df61b1e3 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -108,6 +108,24 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
     functions.foreach( f => add(f) )
   }
 
+  /**
+   * Convert all @Output files to remote output files.
+   * @param remoteFileConverter Converter for files to remote files.
+   */
+  def mkRemoteOutputs(remoteFileConverter: RemoteFileConverter) {
+    for (field <- outputFields) {
+      val fieldFile = ClassFieldCache.getFieldFile(this, field)
+      if (fieldFile != null && !fieldFile.isInstanceOf[RemoteFile]) {
+        val fieldName = ClassFieldCache.fullName(field)
+        val remoteFile = remoteFileConverter.convertToRemote(fieldFile, fieldName)
+        ClassFieldCache.setFieldValue(this, field, remoteFile)
+      }
+    }
+  }
+
+  /**
+   * Pull all remote files to the local disk.
+   */
   def pullInputs() {
     val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
     for (remoteFile <- filterRemoteFiles(inputs)) {
@@ -116,6 +134,9 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
     }
   }
 
+  /**
+   * Push all remote files from the local disk.
+   */
   def pushOutputs() {
     val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
     for (remoteFile <- filterRemoteFiles(outputs)) {
@@ -124,6 +145,10 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
     }
   }
 
+  /**
+   * List out the remote outputs
+   * @return the RemoteFile outputs by argument source
+   */
   def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
     outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala
index 870dd5617..ae3db6860 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/ClassFieldCache.scala
@@ -180,4 +180,15 @@ object ClassFieldCache {
     case unknown => throw new QException("Non-file found.  Try removing the annotation, change the annotation to @Argument, or extend File with FileExtension: %s: %s".format(field.field, unknown))
   }
 
+
+  //
+  // other utilities
+  //
+
+  /**
+   * Retrieves the fullName of the argument
+   * @param field ArgumentSource to check
+   * @return Full name of the argument source
+   */
+  def fullName(field: ArgumentSource) = field.createArgumentDefinitions().get(0).fullName
 }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFileConverter.scala b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFileConverter.scala
new file mode 100644
index 000000000..c77c242d0
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/RemoteFileConverter.scala
@@ -0,0 +1,21 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io.File
+
+trait RemoteFileConverter {
+  type RemoteFileType <: RemoteFile
+
+  /**
+   * If this remote file creator is capable of converting to a remote file.
+   * @return true if ready to convert
+   */
+  def convertToRemoteEnabled: Boolean
+
+  /**
+   * Converts to a remote file
+   * @param file The original file
+   * @param name A "name" to use for the remote file
+   * @return The new version of this remote file.
+   */
+  def convertToRemote(file: File, name: String): RemoteFileType
+}

From 7860ff7981510b67e6f4af4aeff73ef19ee2f245 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Mon, 22 Oct 2012 19:59:15 -0400
Subject: [PATCH 010/236] a) Resolve [#DEV-56] - test data with indels in new
 directory private/testdata/CMITestData/. b) Skeleton (not yet working) of
 fastq-BAM unit test, c) misc bug fixes for QC functions to work (not done
 yet)

---
 .../picard/CollectGcBiasMetrics.scala         |  5 +-
 .../picard/CollectMultipleMetrics.scala       |  4 +-
 .../picard/PicardMetricsFunction.scala        | 53 +++++++++++++++++++
 3 files changed, 58 insertions(+), 4 deletions(-)
 create mode 100644 public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardMetricsFunction.scala

diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
index e783422b3..cba6ce2bb 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
@@ -11,8 +11,8 @@ import java.io.File
  * To change this template use File | Settings | File Templates.
  */
 class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction {
-  analysisName = "CalculateGcMetrics"
-  javaMainClass = "net.sf.picard.analysis.GcBiasSummaryMetrics"
+  analysisName = "CollectGcBiasMetrics"
+  javaMainClass = "net.sf.picard.analysis.CollectGcBiasMetrics"
 
   @Input(doc="The input SAM or BAM files to analyze.  Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
   var input: Seq[File] = Nil
@@ -26,6 +26,7 @@ class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaC
   override def inputBams = input
   override def outputFile = output
   override def commandLine = super.commandLine +
+    required("SUMMARY_OUTPUT=" + output) +
     required("CHART_OUTPUT=" + output+".pdf") +
     required("REFERENCE_SEQUENCE=" + reference) +
     required("ASSUME_SORTED=true")
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
index eab6c79ce..b9af5258c 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
@@ -11,8 +11,8 @@ import java.io.File
  * To change this template use File | Settings | File Templates.
  */
 class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction{
-  analysisName = "CalculateMultipleMetrics"
-  javaMainClass = "net.sf.picard.analysis.CalculateMultipleMetrics"
+  analysisName = "CollectMultipleMetrics"
+  javaMainClass = "net.sf.picard.analysis.CollectMultipleMetrics"
 
   @Input(doc="The input SAM or BAM files to analyze.  Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
   var input: Seq[File] = Nil
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardMetricsFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardMetricsFunction.scala
new file mode 100644
index 000000000..89169e972
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/PicardMetricsFunction.scala
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.queue.extensions.picard
+
+import java.io.File
+import org.broadinstitute.sting.queue.function.JavaCommandLineFunction
+import net.sf.samtools.SAMFileReader.ValidationStringency
+import net.sf.samtools.SAMFileHeader.SortOrder
+
+/**
+ * Wraps a Picard function that operates on BAM files but doesn't output a new BAM file (i.e. QC metric files).
+ * See http://picard.sourceforge.net/ for more info.
+ *
+ * Since the various BAM utilities take slightly different arguments
+ * some values are optional.
+ */
+trait PicardMetricsFunction extends JavaCommandLineFunction {
+  var validationStringency = ValidationStringency.SILENT
+  var maxRecordsInRam: Option[Int] = None
+  var assumeSorted: Option[Boolean] = None
+  protected def inputBams: Seq[File]
+  protected def outputFile: File
+
+  abstract override def commandLine = super.commandLine +
+    repeat("INPUT=", inputBams, spaceSeparated=false) +
+    required("TMP_DIR=" + jobTempDir) +
+    optional("VALIDATION_STRINGENCY=", validationStringency, spaceSeparated=false) +
+    optional("OUTPUT=", outputFile, spaceSeparated=false) +
+    optional("MAX_RECORDS_IN_RAM=", maxRecordsInRam, spaceSeparated=false) +
+    optional("ASSUME_SORTED=", assumeSorted, spaceSeparated=false)
+}

From bbf7a0fb091937691551a686171266852dffccca Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Mon, 22 Oct 2012 16:09:49 -0400
Subject: [PATCH 013/236] Adding integration test to ReduceReads coreduction
 DEV-117 #resolve

---
 .../reducereads/ReduceReadsIntegrationTest.java          | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 89f251ed4..73b7025c3 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -14,6 +14,9 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     final String DIVIDEBYZERO_BAM = validationDataLocation + "ReduceReadsDivideByZeroBug.bam";
     final String DIVIDEBYZERO_L = " -L " + validationDataLocation + "ReduceReadsDivideByZeroBug.intervals";
     final String L = " -L 20:10,100,000-10,120,000 ";
+    final String COREDUCTION_BAM_A = validationDataLocation + "coreduction.test.A.bam";
+    final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
+    final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
 
     private void RRTest(String testName, String args, String md5) {
         String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s ";
@@ -77,5 +80,11 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
         executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("d8d066304f7c187f182bfb50f39baa0c")));
     }
 
+    @Test(enabled = true)
+    public void testCoReduction() {
+        String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
+        executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("")));
+    }
+
 }
 

From 4cd1a923587779b132a3e76835c7aa5cf79c0fe4 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 23 Oct 2012 11:26:26 -0400
Subject: [PATCH 014/236] Updating RR integration tests

Forgot to update the integration tests after merging DEV-117 with optimizations from GATK main repo.
---
 .../reducereads/ReduceReadsIntegrationTest.java    | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 73b7025c3..50500536f 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -26,28 +26,28 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
 
     @Test(enabled = true)
     public void testDefaultCompression() {
-        RRTest("testDefaultCompression ", L, "1f95f3193bd9f120a73c34a0087abaf6");
+        RRTest("testDefaultCompression ", L, "46ea88e32bae3072f5cd68a0db4b55f1");
     }
 
     @Test(enabled = true)
     public void testMultipleIntervals() {
         String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
-        RRTest("testMultipleIntervals ", intervals, "79213d6ac68d56d4d72dcf511223e424");
+        RRTest("testMultipleIntervals ", intervals, "c3784a0b42f5456b705f9b152a4b697a");
     }
 
     @Test(enabled = true)
     public void testHighCompression() {
-        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "dab2aa8e3655139974bbe12a568363d9");
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e385eb0ae5768f8507671d5303a212d5");
     }
 
     @Test(enabled = true)
     public void testLowCompression() {
-        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "7c9b4a70c2c90b0a995800aa42852e63");
+        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "6b5546be9363e493b9838542f5dc8cae");
     }
 
     @Test(enabled = true)
     public void testIndelCompression() {
-        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "1255245ed4ebeacda90f0dbb4e4da081");
+        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "f6c9ea83608f35f113cf1f62a77ee6d0");
     }
 
     @Test(enabled = true)
@@ -67,7 +67,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     @Test(enabled = true)
     public void testAddingReadAfterTailingTheStash() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
-        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("4b590269cbe3574dbdd5bdc2bc6f5f1c")));
+        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("647b0f0f95730de8e6bc4f74186ad4df")));
     }
 
     /**
@@ -77,7 +77,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     @Test(enabled = true)
     public void testDivideByZero() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s", DIVIDEBYZERO_L, REF, DIVIDEBYZERO_BAM) + " -o %s ";
-        executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("d8d066304f7c187f182bfb50f39baa0c")));
+        executeTest("testDivideByZero", new WalkerTestSpec(base, Arrays.asList("2c87985972dd43ee9dd50b463d93a511")));
     }
 
     @Test(enabled = true)

From 0cce1ae8b2fc568182befeefe3d00cdc92c434c2 Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Tue, 23 Oct 2012 12:38:39 -0400
Subject: [PATCH 015/236] When gathering VCFs, using CombineVariants from the
 current classpath, and not the GATK used to run the command. This was a
 concern for external modules that bundled the engine but not CombineVariants.

---
 .../sting/queue/extensions/gatk/VcfGatherFunction.scala          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
index fb22554f0..3fb5101d0 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/VcfGatherFunction.scala
@@ -39,7 +39,6 @@ class VcfGatherFunction extends CombineVariants with GatherFunction with RetryMe
   private lazy val originalGATK = this.originalFunction.asInstanceOf[CommandLineGATK]
 
   override def freezeFieldValues() {
-    this.jarFile = this.originalGATK.jarFile
     this.variant = this.gatherParts.zipWithIndex map { case (input, index) => new TaggedFile(input, "input"+index) }
     this.out = this.originalOutput
     GATKIntervals.copyIntervalArguments(this.originalGATK, this)

From 5fac5bf12eb8c40af4809f0d4a31812a29d3b4a0 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Tue, 23 Oct 2012 14:08:31 -0400
Subject: [PATCH 017/236] Fixed issues with Queue packaging of Picard QC
 classes: separate jar's are needed fromPicard. User needs to specify the
 -picardBase argument to point to input path for jars. > Also, reenable joint
 cleaning as now it works. > DEV-125 #resolve > DEV-90 #resolve

---
 .../sting/queue/extensions/picard/CalculateHsMetrics.scala       | 1 -
 .../sting/queue/extensions/picard/CollectGcBiasMetrics.scala     | 1 -
 .../sting/queue/extensions/picard/CollectMultipleMetrics.scala   | 1 -
 3 files changed, 3 deletions(-)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala
index aa36e29b6..3db498210 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CalculateHsMetrics.scala
@@ -13,7 +13,6 @@ import net.sf.picard.analysis.MetricAccumulationLevel
  */
 class CalculateHsMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction {
   analysisName = "CalculateHsMetrics"
-  javaMainClass = "net.sf.picard.analysis.directed.CalculateHsMetrics"
 
   @Input(doc="The input SAM or BAM files to analyze.  Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
   var input: Seq[File] = Nil
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
index cba6ce2bb..fa655206d 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectGcBiasMetrics.scala
@@ -12,7 +12,6 @@ import java.io.File
  */
 class CollectGcBiasMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction {
   analysisName = "CollectGcBiasMetrics"
-  javaMainClass = "net.sf.picard.analysis.CollectGcBiasMetrics"
 
   @Input(doc="The input SAM or BAM files to analyze.  Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
   var input: Seq[File] = Nil
diff --git a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
index b9af5258c..3695114c4 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/picard/CollectMultipleMetrics.scala
@@ -12,7 +12,6 @@ import java.io.File
  */
 class CollectMultipleMetrics extends org.broadinstitute.sting.queue.function.JavaCommandLineFunction with PicardMetricsFunction{
   analysisName = "CollectMultipleMetrics"
-  javaMainClass = "net.sf.picard.analysis.CollectMultipleMetrics"
 
   @Input(doc="The input SAM or BAM files to analyze.  Must be coordinate sorted.", shortName = "input", fullName = "input_bam_files", required = true)
   var input: Seq[File] = Nil

From 8dfa24df7b9d163b2a5f46c3bd2d89173efa7b62 Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Tue, 23 Oct 2012 12:34:26 -0400
Subject: [PATCH 018/236] Sending a version of per job status messages. In
 addition to outputs, inputs are passed to QStatusMessenger.done()
 CloneFunction.cloneIndex has a new CloneFunction.cloneCount companion useful
 for display purposes.

---
 .../sting/queue/QCommandLine.scala            | 11 ++++--
 .../broadinstitute/sting/queue/QScript.scala  | 13 +++++--
 .../sting/queue/engine/QGraph.scala           | 34 +++++++++++++++----
 .../sting/queue/engine/QStatusMessenger.scala |  6 +++-
 .../scattergather/CloneFunction.scala         |  1 +
 .../ScatterGatherableFunction.scala           |  1 +
 6 files changed, 54 insertions(+), 12 deletions(-)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 2afa66d9c..65abaf7be 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -123,6 +123,8 @@ class QCommandLine extends CommandLineProgram with Logging {
         commandPlugin.statusMessenger.started()
     }
 
+    qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq
+
     // TODO: Default command plugin argument?
     val remoteFileConverter = (
       for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
@@ -178,14 +180,17 @@ class QCommandLine extends CommandLineProgram with Logging {
       qGraph.logFailed()
       for (commandPlugin <- allCommandPlugins)
         if (commandPlugin.statusMessenger != null)
-          commandPlugin.statusMessenger.exit("Done with errors")
+          commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts))
       1
     } else {
       if (settings.run) {
         allQScripts.foreach(_.pushOutputs())
         for (commandPlugin <- allCommandPlugins)
-          if (commandPlugin.statusMessenger != null)
-            commandPlugin.statusMessenger.done(allQScripts.map(_.remoteOutputs))
+          if (commandPlugin.statusMessenger != null) {
+            val allInputs = allQScripts.map(_.remoteInputs)
+            val allOutputs = allQScripts.map(_.remoteOutputs)
+            commandPlugin.statusMessenger.done(allInputs, allOutputs)
+          }
       }
       0
     }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 3df61b1e3..8c834696c 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -149,8 +149,17 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
    * List out the remote outputs
    * @return the RemoteFile outputs by argument source
    */
-  def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] =
-    outputFields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
+  def remoteInputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(inputFields)
+
+  /**
+   * List out the remote outputs
+   * @return the RemoteFile outputs by argument source
+   */
+  def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(outputFields)
+
+  private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = {
+    fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
+  }
 
   private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] =
     fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile])
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
index 2c33596e1..4f7dd665d 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QGraph.scala
@@ -47,6 +47,7 @@ import java.io.{OutputStreamWriter, File}
  */
 class QGraph extends Logging {
   var settings: QGraphSettings = _
+  var messengers: Seq[QStatusMessenger] = Nil
 
   private def dryRun = !settings.run
   private var numMissingValues = 0
@@ -95,7 +96,7 @@ class QGraph extends Logging {
    * The settings aren't necessarily available until after this QGraph object has been constructed, so
    * this function must be called once the QGraphSettings have been filled in.
    *
-   * @param settings
+   * @param settings QGraphSettings
    */
   def initializeWithSettings(settings: QGraphSettings) {
     this.settings = settings
@@ -430,6 +431,7 @@ class QGraph extends Logging {
           val edge = readyJobs.head
           edge.runner = newRunner(edge.function)
           edge.start()
+          messengers.foreach(_.started(jobShortName(edge.function)))
           startedJobs += edge
           readyJobs -= edge
           logNextStatusCounts = true
@@ -465,8 +467,14 @@ class QGraph extends Logging {
         updateStatus()
 
         runningJobs.foreach(edge => edge.status match {
-          case RunnerStatus.DONE => doneJobs += edge
-          case RunnerStatus.FAILED => failedJobs += edge
+          case RunnerStatus.DONE => {
+            doneJobs += edge
+            messengers.foreach(_.done(jobShortName(edge.function)))
+          }
+          case RunnerStatus.FAILED => {
+            failedJobs += edge
+            messengers.foreach(_.exit(jobShortName(edge.function), edge.function.jobErrorLines.mkString("%n".format())))
+          }
           case RunnerStatus.RUNNING => /* do nothing while still running */
         })
 
@@ -493,7 +501,7 @@ class QGraph extends Logging {
         // incremental
         if ( logNextStatusCounts && INCREMENTAL_JOBS_REPORT ) {
           logger.info("Writing incremental jobs reports...")
-          writeJobsReport(false)
+          writeJobsReport(plot = false)
         }
 
         readyJobs ++= getReadyJobs
@@ -516,9 +524,13 @@ class QGraph extends Logging {
   private def nextRunningCheck(lastRunningCheck: Long) =
     ((30 * 1000L) - (System.currentTimeMillis - lastRunningCheck))
 
+  def formattedStatusCounts: String = {
+    "%d Pend, %d Run, %d Fail, %d Done".format(
+      statusCounts.pending, statusCounts.running, statusCounts.failed, statusCounts.done)
+  }
+
   private def logStatusCounts() {
-    logger.info("%d Pend, %d Run, %d Fail, %d Done".format(
-      statusCounts.pending, statusCounts.running, statusCounts.failed, statusCounts.done))
+    logger.info(formattedStatusCounts)
   }
 
   /**
@@ -533,6 +545,16 @@ class QGraph extends Logging {
     traverseFunctions(edge => recheckDone(edge))
   }
 
+  // TODO: Yet another field to add (with overloads) to QFunction?
+  private def jobShortName(function: QFunction): String = {
+    var name = function.analysisName
+    if (function.isInstanceOf[CloneFunction]) {
+      val cloneFunction = function.asInstanceOf[CloneFunction]
+      name += " %d of %d".format(cloneFunction.cloneIndex, cloneFunction.cloneCount)
+    }
+    name
+  }
+
   /**
    * First pass that checks if an edge is done or if it's an intermediate edge if it can be skipped.
    * This function may modify the status of previous edges if it discovers that the edge passed in
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
index eeabe6d1d..c4151dafc 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
@@ -8,6 +8,10 @@ import org.broadinstitute.sting.queue.util.RemoteFile
  */
 trait QStatusMessenger {
   def started()
-  def done(files: Seq[Map[ArgumentSource, Seq[RemoteFile]]])
+  def done(inputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]], outputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]])
   def exit(message: String)
+
+  def started(job: String)
+  def done(job: String)
+  def exit(job: String, message: String)
 }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala
index 91cacbb71..861db3f80 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/CloneFunction.scala
@@ -38,6 +38,7 @@ object CloneFunction {
 class CloneFunction extends CommandLineFunction {
   var originalFunction: ScatterGatherableFunction = _
   var cloneIndex: Int = _
+  var cloneCount: Int = _
 
   private var overriddenFields = Map.empty[ArgumentSource, Any]
   private var withScatterPartCount = 0
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
index 5dd7d4c79..b00437f9f 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/scattergather/ScatterGatherableFunction.scala
@@ -176,6 +176,7 @@ trait ScatterGatherableFunction extends CommandLineFunction {
       cloneFunction.originalFunction = this
       cloneFunction.analysisName = this.analysisName
       cloneFunction.cloneIndex = i
+      cloneFunction.cloneCount = numClones
       cloneFunction.commandDirectory = this.scatterGatherTempDir(dirFormat.format(i))
       cloneFunction.jobOutputFile = if (IOUtils.isSpecialFile(this.jobOutputFile)) this.jobOutputFile else new File(this.jobOutputFile.getName)
       if (this.jobErrorFile != null)

From 596c1723aeec43ebf2feb7ede387b1954910736c Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Thu, 25 Oct 2012 10:35:43 -0400
Subject: [PATCH 024/236] Hidden, unsupported ability of VariantEval to run
 AlleleCount stratification on sites-only VCFs. I'll expose it/add tests on it
 if people think this is generaly useful. User needs to specify total # of
 samples as command line argument since genotypes are not available. Also,
 fixes to large-scale validation script: lower -minIndelFrac threshold or else
 we'll kill most indels since default 0.25 is too high for pools, fix also VE
 stratifications and add one VE run where eval=1KG, comp=pool data and AC
 stratification based on 1KG annotation

---
 .../sting/gatk/walkers/varianteval/VariantEval.java  | 12 ++++++++++++
 .../varianteval/stratifications/AlleleCount.java     |  2 +-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
index a73e125ad..201028d99 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEval.java
@@ -183,6 +183,10 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
     @Argument(fullName="keepAC0", shortName="keepAC0", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
     private boolean keepSitesWithAC0 = false;
 
+    @Hidden
+    @Argument(fullName="numSamples", shortName="numSamples", doc="If provided, modules that track polymorphic sites will not require that a site have AC > 0 when the input eval has genotypes", required=false)
+    private int numSamplesFromArgument = 0;
+
     /**
      * If true, VariantEval will treat -eval 1 -eval 2 as separate tracks from the same underlying
      * variant set, and evaluate the union of the results.  Useful when you want to do -eval chr1.vcf -eval chr2.vcf etc.
@@ -589,6 +593,14 @@ public class VariantEval extends RodWalker<Integer, Integer> implements TreeRedu
     public boolean isSubsettingToSpecificSamples() { return isSubsettingSamples; }
     public Set<String> getSampleNamesForEvaluation() { return sampleNamesForEvaluation; }
 
+    public int getNumberOfSamplesForEvaluation() {
+        if (sampleNamesForEvaluation!= null &&  !sampleNamesForEvaluation.isEmpty())
+            return sampleNamesForEvaluation.size();
+        else {
+            return numSamplesFromArgument;
+        }
+
+    }
     public Set<String> getSampleNamesForStratification() { return sampleNamesForStratification; }
 
     public List<RodBinding<VariantContext>> getComps() { return comps; }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
index e6efd4482..7197fc14c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/AlleleCount.java
@@ -29,7 +29,7 @@ public class AlleleCount extends VariantStratifier {
 
         // There are ploidy x n sample chromosomes
         // TODO -- generalize to handle multiple ploidy
-        nchrom = getVariantEvalWalker().getSampleNamesForEvaluation().size() * getVariantEvalWalker().getSamplePloidy();
+        nchrom = getVariantEvalWalker().getNumberOfSamplesForEvaluation() * getVariantEvalWalker().getSamplePloidy();
         if ( nchrom < 2 )
             throw new UserException.BadArgumentValue("AlleleCount", "AlleleCount stratification requires an eval vcf with at least one sample");
 

From cde4f037d3a79802569d06134358d9c7ed14a8ce Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Thu, 25 Oct 2012 16:18:25 -0400
Subject: [PATCH 026/236] Begin moving XHMM scripts to public

---
 .../queue/qscripts/CNV/xhmmCNVpipeline.scala  | 499 ++++++++++++++++++
 .../sting/queue/util/DoC/package.scala        | 123 +++++
 2 files changed, 622 insertions(+)
 create mode 100644 public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
 create mode 100644 public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala

diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
new file mode 100644
index 000000000..362337c84
--- /dev/null
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
@@ -0,0 +1,499 @@
+package org.broadinstitute.sting.queue.qscripts.CNV
+
+import org.broadinstitute.sting.queue.extensions.gatk._
+import org.broadinstitute.sting.queue.QScript
+import org.broadinstitute.sting.queue.util.VCF_BAM_utilities
+import org.broadinstitute.sting.queue.util.DoC._
+import org.broadinstitute.sting.commandline.Hidden
+import java.io.{PrintStream, PrintWriter}
+import org.broadinstitute.sting.utils.text.XReadLines
+import collection.JavaConversions._
+
+class xhmmCNVpipeline extends QScript {
+  qscript =>
+
+  @Input(doc = "bam input, as .bam or as a list of files", shortName = "I", required = true)
+  var bams: File = _
+
+  @Argument(doc = "gatk jar file", shortName = "J", required = true)
+  var gatkJarFile: File = _
+
+  @Argument(doc = "xhmm executable file", shortName = "xhmmExec", required = true)
+  var xhmmExec: File = _
+
+  @Argument(doc = "Plink/Seq executable file", shortName = "pseqExec", required = true)
+  var pseqExec: File = _
+
+  @Argument(doc = "Plink/Seq SEQDB file (Reference genome sequence)", shortName = "SEQDB", required = true)
+  var pseqSeqDB: String = _
+
+  @Argument(shortName = "R", doc = "ref", required = true)
+  var referenceFile: File = _
+
+  @Argument(shortName = "L", doc = "Intervals", required = false)
+  var intervals: File = _
+
+  @Input(doc = "level of parallelism for BAM DoC.   By default is set to 0 [no scattering].", shortName = "scatter", required = false)
+  var scatterCountInput = 0
+
+  @Input(doc = "Samples to run together for DoC.   By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false)
+  var samplesPerJob = 1
+
+  @Output(doc = "Base name for files to output", shortName = "o", required = true)
+  var outputBase: File = _
+
+  @Input(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
+  var MAX_DEPTH = 20000
+
+  @Hidden
+  @Input(doc = "Number of read-depth bins", shortName = "NUM_BINS", required = false)
+  var NUM_BINS = 200
+
+  @Hidden
+  @Input(doc = "Starting value of read-depth bins", shortName = "START_BIN", required = false)
+  var START_BIN = 1
+
+  @Input(doc = "Minimum read mapping quality", shortName = "MMQ", required = false)
+  var minMappingQuality = 0
+
+  @Input(doc = "Memory (in GB) required for storing the whole matrix in memory", shortName = "wholeMatrixMemory", required = false)
+  var wholeMatrixMemory = -1
+
+  @Argument(shortName = "minTargGC", doc = "Exclude all targets with GC content less than this value", required = false)
+  var minTargGC : Double = 0.1
+
+  @Argument(shortName = "maxTargGC", doc = "Exclude all targets with GC content greater than this value", required = false)
+  var maxTargGC : Double = 0.9
+
+  @Argument(shortName = "minTargRepeats", doc = "Exclude all targets with % of repeat-masked bases less than this value", required = false)
+  var minTargRepeats : Double = 0.0
+
+  @Argument(shortName = "maxTargRepeats", doc = "Exclude all targets with % of repeat-masked bases greater than this value", required = false)
+  var maxTargRepeats : Double = 0.1
+
+  @Argument(shortName = "sampleIDsMap", doc = "File mapping BAM sample IDs to desired sample IDs", required = false)
+  var sampleIDsMap: String = ""
+
+  @Argument(shortName = "sampleIDsMapFromColumn", doc = "Column number of OLD sample IDs to map", required = false)
+  var sampleIDsMapFromColumn = 1
+
+  @Argument(shortName = "sampleIDsMapToColumn", doc = "Column number of NEW sample IDs to map", required = false)
+  var sampleIDsMapToColumn = 2
+
+  @Argument(shortName = "rawFilters", doc = "xhmm command-line parameters to filter targets and samples from raw data", required = false)
+  var targetSampleFiltersString: String = ""
+
+  @Argument(shortName = "PCAnormalize", doc = "xhmm command-line parameters to Normalize data using PCA information", required = false)
+  var PCAnormalizeMethodString: String = ""
+
+  @Argument(shortName = "normalizedFilters", doc = "xhmm command-line parameters to filter targets and samples from PCA-normalized data", required = false)
+  var targetSampleNormalizedFiltersString: String = ""
+
+  @Argument(shortName = "xhmmParams", doc = "xhmm model parameters file", required = true)
+  var xhmmParamsArg: File = _
+
+  @Argument(shortName = "discoverParams", doc = "xhmm command-line parameters for discovery step", required = false)
+  var discoverCommandLineParams: String = ""
+
+  @Argument(shortName = "genotypeParams", doc = "xhmm command-line parameters for genotyping step", required = false)
+  var genotypeCommandLineParams: String = ""
+
+  @Argument(shortName = "genotypeSubsegments", doc = "Should we also genotype all subsegments of the discovered CNV?", required = false)
+  var genotypeSubsegments: Boolean = false
+
+  @Argument(shortName = "maxTargetsInSubsegment", doc = "If genotypeSubsegments, then only consider sub-segments consisting of this number of targets or fewer", required = false)
+  var maxTargetsInSubsegment = 30
+
+  @Argument(shortName = "subsegmentGenotypeThreshold", doc = "If genotypeSubsegments, this is the default genotype quality threshold for the sub-segments", required = false)
+  var subsegmentGenotypeThreshold = 20.0
+
+  @Argument(shortName = "longJobQueue", doc = "Job queue to run the 'long-running' commands", required = false)
+  var longJobQueue: String = ""
+
+
+  val PREPARED_TARGS_SUFFIX: String = ".merged.interval_list"
+
+  val RD_OUTPUT_SUFFIX: String = ".RD.txt"
+
+  val TARGS_GC_SUFFIX = ".locus_GC.txt"
+  val EXTREME_GC_TARGS_SUFFIX = ".extreme_gc_targets.txt"
+
+  val TARGS_REPEAT_COMPLEXITY_SUFFIX = ".locus_complexity.txt"
+  val EXTREME_REPEAT_COMPLEXITY_SUFFIX = ".extreme_complexity_targets.txt"
+
+  val FILTERED_TARGS_SUFFIX: String = ".filtered_targets.txt"
+  val FILTERED_SAMPS_SUFFIX: String =  ".filtered_samples.txt"
+
+
+  trait WholeMatrixMemoryLimit extends CommandLineFunction {
+    // Since loading ALL of the data can take significant memory:
+    if (wholeMatrixMemory < 0) {
+      this.memoryLimit = 24
+    }
+    else {
+      this.memoryLimit = wholeMatrixMemory
+    }
+  }
+
+  trait LongRunTime extends CommandLineFunction {
+    if (longJobQueue != "")
+      this.jobQueue = longJobQueue
+  }
+
+  def script = {
+    val prepTargets = new PrepareTargets(List(qscript.intervals), outputBase.getPath + PREPARED_TARGS_SUFFIX, xhmmExec, referenceFile)
+    add(prepTargets)
+
+    trait CommandLineGATKArgs extends CommandLineGATK {
+      this.intervals :+= prepTargets.out
+      this.jarFile = qscript.gatkJarFile
+      this.reference_sequence = qscript.referenceFile
+      this.logging_level = "INFO"
+    }
+
+    val sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = VCF_BAM_utilities.getMapOfBAMsForSample(VCF_BAM_utilities.parseBAMsInput(bams))
+    val samples: List[String] = sampleToBams.keys.toList
+    Console.out.printf("Samples are %s%n", samples)
+
+    val groups: List[Group] = buildDoCgroups(samples, sampleToBams, samplesPerJob, outputBase)
+    var docs: List[DoC] = List[DoC]()
+    for (group <- groups) {
+      Console.out.printf("Group is %s%n", group)
+      docs ::= new DoC(group.bams, group.DoC_output, MAX_DEPTH, minMappingQuality, scatterCountInput, START_BIN, NUM_BINS, Nil) with CommandLineGATKArgs
+    }
+    addAll(docs)
+
+    val mergeDepths = new MergeGATKdepths(docs.map(u => u.intervalSampleOut), outputBase.getPath + RD_OUTPUT_SUFFIX, "_mean_cvg", xhmmExec, sampleIDsMap, sampleIDsMapFromColumn, sampleIDsMapToColumn, None, false) with WholeMatrixMemoryLimit
+    add(mergeDepths)
+
+    var excludeTargets : List[File] = List[File]()
+    if (minTargGC > 0 || maxTargGC < 1) {
+      val calcGCcontents = new GCContentByInterval with CommandLineGATKArgs
+      calcGCcontents.out = outputBase.getPath + TARGS_GC_SUFFIX
+      add(calcGCcontents)
+
+      val excludeTargetsBasedOnGC = new ExcludeTargetsBasedOnValue(calcGCcontents.out, EXTREME_GC_TARGS_SUFFIX, minTargGC, maxTargGC)
+      add(excludeTargetsBasedOnGC)
+      excludeTargets ::= excludeTargetsBasedOnGC.out
+    }
+
+    class CalculateRepeatComplexity(outFile : String) extends CommandLineFunction {
+      @Input(doc="")
+      var intervals: File = prepTargets.out
+
+      @Output(doc="")
+      var out : File = new File(outFile)
+
+      val regFile : String = outputBase.getPath + ".targets.reg"
+      val locDB : String = outputBase.getPath + ".targets.LOCDB"
+
+      val removeFiles = "rm -f " + regFile + " " + locDB
+      val createRegFile = "cat " + intervals + " | awk 'BEGIN{OFS=\"\\t\"; print \"#CHR\\tBP1\\tBP2\\tID\"} {split($1,a,\":\"); chr=a[1]; if (match(chr,\"chr\")==0) {chr=\"chr\"chr} split(a[2],b,\"-\"); bp1=b[1]; bp2=bp1; if (length(b) > 1) {bp2=b[2]} print chr,bp1,bp2,NR}' > " + regFile
+      val createLOCDB = pseqExec + " . loc-load --locdb " + locDB + " --file " + regFile + " --group targets --out " + locDB + ".loc-load"
+      val calcRepeatMaskedPercent = pseqExec + " . loc-stats --locdb " + locDB + " --group targets --seqdb " + pseqSeqDB + " --out " + locDB + ".loc-stats"
+      val extractRepeatMaskedPercent = "cat " + locDB + ".loc-stats.locstats | awk '{if (NR > 1) print $_}' | sort -k1 -g | awk '{print $10}' | paste " + intervals + " - | awk '{print $1\"\\t\"$2}' > " + out
+
+      var command: String =
+        removeFiles +
+          " && " + createRegFile +
+          " && " + createLOCDB +
+          " && " + calcRepeatMaskedPercent +
+          " && " + extractRepeatMaskedPercent
+
+      def commandLine = command
+
+      override def description = "Calculate the percentage of each target that is repeat-masked in the reference sequence: " + command
+    }
+
+    if (minTargRepeats > 0 || maxTargRepeats < 1) {
+      val calcRepeatComplexity = new CalculateRepeatComplexity(outputBase.getPath + TARGS_REPEAT_COMPLEXITY_SUFFIX)
+      add(calcRepeatComplexity)
+
+      val excludeTargetsBasedOnRepeats = new ExcludeTargetsBasedOnValue(calcRepeatComplexity.out, EXTREME_REPEAT_COMPLEXITY_SUFFIX, minTargRepeats, maxTargRepeats)
+      add(excludeTargetsBasedOnRepeats)
+      excludeTargets ::= excludeTargetsBasedOnRepeats.out
+    }
+
+    val filterCenterDepths = new FilterCenterRawMatrix(mergeDepths.mergedDoC, excludeTargets)
+    add(filterCenterDepths)
+
+    val pca = new PCA(filterCenterDepths.filteredCentered)
+    add(pca)
+
+    val normalize = new Normalize(pca)
+    add(normalize)
+
+    val filterZscore = new FilterAndZscoreNormalized(normalize.normalized)
+    add(filterZscore)
+
+    val filterOriginal = new FilterOriginalData(mergeDepths.mergedDoC, filterCenterDepths, filterZscore)
+    add(filterOriginal)
+
+    val discover = new DiscoverCNVs(filterZscore.filteredZscored, filterOriginal.sameFiltered)
+    add(discover)
+
+    val genotype = new GenotypeCNVs(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
+    add(genotype)
+
+    if (genotypeSubsegments) {
+      val genotypeSegs = new GenotypeCNVandSubsegments(filterZscore.filteredZscored, discover.xcnv, filterOriginal.sameFiltered)
+      add(genotypeSegs)
+    }
+  }
+
+  class ExcludeTargetsBasedOnValue(locus_valueIn : File, outSuffix : String, minVal : Double, maxVal : Double) extends InProcessFunction {
+    @Input(doc="")
+    var locus_value : File = locus_valueIn
+
+    @Output(doc="")
+    var out : File = new File(outputBase.getPath + outSuffix)
+
+    def run = {
+      var outWriter = new PrintWriter(new PrintStream(out))
+      var elems = asScalaIterator(new XReadLines(locus_value))
+
+      while (elems.hasNext) {
+        val line = elems.next
+        val splitLine = line.split("\\s+")
+        val locus = splitLine(0)
+        val locValStr = splitLine(1)
+        try {
+          val locVal = locValStr.toDouble
+          if (locVal < minVal || locVal > maxVal)
+            outWriter.printf("%s%n", locus)
+        }
+        catch {
+          case nfe: NumberFormatException => println("Ignoring non-numeric value " + locValStr + " for locus " + locus)
+          case e: Exception => throw e
+        }
+      }
+
+      outWriter.close
+    }
+  }
+
+  class FilterCenterRawMatrix(inputParam: File, excludeTargetsIn : List[File]) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val excludeTargets = excludeTargetsIn
+
+    @Output
+    val filteredCentered: File = new File(outputBase.getPath + ".filtered_centered" + RD_OUTPUT_SUFFIX)
+    @Output
+    val filteredTargets: File = new File(filteredCentered.getPath + FILTERED_TARGS_SUFFIX)
+    @Output
+    val filteredSamples: File = new File(filteredCentered.getPath + FILTERED_SAMPS_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      " --centerData --centerType target" +
+      " -o " + filteredCentered +
+      " --outputExcludedTargets " + filteredTargets +
+      " --outputExcludedSamples " + filteredSamples
+    command += excludeTargets.map(u => " --excludeTargets " + u).reduceLeft(_ + "" + _)
+    if (targetSampleFiltersString != "")
+      command += " " + targetSampleFiltersString
+
+    def commandLine = command
+
+    override def description = "Filters samples and targets and then mean-centers the targets: " + command
+  }
+
+  class PCA(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    val PCAbase: String = outputBase.getPath + ".RD_PCA"
+
+    @Output
+    val outPC: File = new File(PCAbase + ".PC.txt")
+    @Output
+    val outPC_SD: File = new File(PCAbase + ".PC_SD.txt")
+    @Output
+    val outPC_LOADINGS: File = new File(PCAbase + ".PC_LOADINGS.txt")
+
+    var command: String =
+      xhmmExec + " --PCA" +
+      " -r " + input +
+      " --PCAfiles " + PCAbase
+
+    def commandLine = command
+
+    override def description = "Runs PCA on mean-centered data: " + command
+  }
+
+  class Normalize(pca: PCA) extends CommandLineFunction {
+    @Input(doc = "")
+    val input = pca.input
+
+    @Input(doc = "")
+    val inPC = pca.outPC
+
+    @Input(doc = "")
+    val inPC_SD = pca.outPC_SD
+
+    @Input(doc = "")
+    val inPC_LOADINGS = pca.outPC_LOADINGS
+
+    @Output
+    val normalized: File = new File(outputBase.getPath + ".PCA_normalized.txt")
+
+    var command: String =
+      xhmmExec + " --normalize" +
+      " -r " + input +
+      " --PCAfiles " + pca.PCAbase +
+      " --normalizeOutput " + normalized
+    if (PCAnormalizeMethodString != "")
+      command += " " + PCAnormalizeMethodString
+
+    def commandLine = command
+
+    override def description = "Normalizes mean-centered data using PCA information: " + command
+  }
+
+  class FilterAndZscoreNormalized(inputParam: File) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Output
+    val filteredZscored: File = new File(outputBase.getPath + ".PCA_normalized.filtered.sample_zscores" + RD_OUTPUT_SUFFIX)
+    @Output
+    val filteredTargets: File = new File(filteredZscored.getPath + FILTERED_TARGS_SUFFIX)
+    @Output
+    val filteredSamples: File = new File(filteredZscored.getPath + FILTERED_SAMPS_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      " --centerData --centerType sample --zScoreData" +
+      " -o " + filteredZscored +
+      " --outputExcludedTargets " + filteredTargets +
+      " --outputExcludedSamples " + filteredSamples
+    if (targetSampleNormalizedFiltersString != "")
+      command += " " + targetSampleNormalizedFiltersString
+
+    def commandLine = command
+
+    override def description = "Filters and z-score centers (by sample) the PCA-normalized data: " + command
+  }
+
+  class FilterOriginalData(inputParam: File, filt1: FilterCenterRawMatrix, filt2: FilterAndZscoreNormalized) extends CommandLineFunction with WholeMatrixMemoryLimit {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val targFilters: List[File] = List(filt1.filteredTargets, filt2.filteredTargets).map(u => new File(u))
+
+    @Input(doc = "")
+    val sampFilters: List[File] = List(filt1.filteredSamples, filt2.filteredSamples).map(u => new File(u))
+
+    @Output
+    val sameFiltered: File = new File(outputBase.getPath + ".same_filtered" + RD_OUTPUT_SUFFIX)
+
+    var command: String =
+      xhmmExec + " --matrix" +
+      " -r " + input +
+      targFilters.map(u => " --excludeTargets " + u).reduceLeft(_ + "" + _) +
+      sampFilters.map(u => " --excludeSamples " + u).reduceLeft(_ + "" + _) +
+      " -o " + sameFiltered
+
+    def commandLine = command
+
+    override def description = "Filters original read-depth data to be the same as filtered, normalized data: " + command
+  }
+
+  class DiscoverCNVs(inputParam: File, origRDParam: File) extends CommandLineFunction with LongRunTime {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val xhmmParams = xhmmParamsArg
+
+    @Input(doc = "")
+    val origRD = origRDParam
+
+    @Output
+    val xcnv: File = new File(outputBase.getPath + ".xcnv")
+
+    @Output
+    val aux_xcnv: File = new File(outputBase.getPath + ".aux_xcnv")
+
+    val posteriorsBase = outputBase.getPath
+
+    @Output
+    val dipPosteriors: File = new File(posteriorsBase + ".posteriors.DIP.txt")
+
+    @Output
+    val delPosteriors: File = new File(posteriorsBase + ".posteriors.DEL.txt")
+
+    @Output
+    val dupPosteriors: File = new File(posteriorsBase + ".posteriors.DUP.txt")
+
+    var command: String =
+      xhmmExec + " --discover" +
+      " -p " + xhmmParams +
+      " -r " + input +
+      " -R " + origRD +
+      " -c " + xcnv +
+      " -a " + aux_xcnv +
+      " -s " + posteriorsBase +
+      " " + discoverCommandLineParams
+
+    def commandLine = command
+
+    override def description = "Discovers CNVs in normalized data: " + command
+  }
+
+  abstract class BaseGenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends CommandLineFunction with LongRunTime {
+    @Input(doc = "")
+    val input = inputParam
+
+    @Input(doc = "")
+    val xhmmParams = xhmmParamsArg
+
+    @Input(doc = "")
+    val origRD = origRDParam
+
+    @Input(doc = "")
+    val inXcnv = xcnv
+
+    var command: String =
+      xhmmExec + " --genotype" +
+      " -p " + xhmmParams +
+      " -r " + input +
+      " -g " + inXcnv +
+      " -F " + referenceFile +
+      " -R " + origRD +
+      " " + genotypeCommandLineParams
+  }
+
+  class GenotypeCNVs(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam) {
+    @Output
+    val vcf: File = new File(outputBase.getPath + ".vcf")
+
+    command +=
+      " -v " +  vcf
+
+    def commandLine = command
+
+    override def description = "Genotypes discovered CNVs in all samples: " + command
+  }
+
+  class GenotypeCNVandSubsegments(inputParam: File, xcnv: File, origRDParam: File) extends BaseGenotypeCNVs(inputParam, xcnv, origRDParam) {
+    @Output
+    val vcf: File = new File(outputBase.getPath + ".subsegments.vcf")
+
+    command +=
+      " -v " +  vcf +
+      " --subsegments" +
+      " --maxTargetsInSubsegment " + maxTargetsInSubsegment +
+      " --genotypeQualThresholdWhenNoExact " + subsegmentGenotypeThreshold
+
+    def commandLine = command
+
+    override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + command
+  }
+}
\ No newline at end of file
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala b/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
new file mode 100644
index 000000000..f35db4aa3
--- /dev/null
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
@@ -0,0 +1,123 @@
+package org.broadinstitute.sting.queue.util
+
+import java.io.File
+import org.broadinstitute.sting.queue.extensions.gatk.{IntervalScatterFunction, CommandLineGATK}
+import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFunction
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType
+import org.broadinstitute.sting.commandline.{Input, Gather, Output}
+import org.broadinstitute.sting.queue.function.CommandLineFunction
+
+package object DoC {
+  class DoC(val bams: List[File], val DoC_output: File, val MAX_DEPTH: Int, val minMappingQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int]) extends CommandLineGATK with ScatterGatherableFunction {
+    val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary"
+
+    // So that the output files of this DoC run get deleted once they're used further downstream:
+    this.isIntermediate = true
+
+    this.analysis_type = "DepthOfCoverage"
+
+    this.input_file = bams
+
+    this.downsample_to_coverage = Some(MAX_DEPTH)
+    this.downsampling_type = DownsampleType.BY_SAMPLE
+
+    this.scatterCount = scatterCountInput
+    this.scatterClass = classOf[IntervalScatterFunction]
+
+    // HACK for DoC to work properly within Queue:
+    @Output
+    @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])
+    var intervalSampleOut: File = new File(DoC_output.getPath + DOC_OUTPUT_SUFFIX)
+
+    override def commandLine = super.commandLine +
+      " --omitDepthOutputAtEachBase" +
+      " --omitLocusTable" +
+      " --minBaseQuality 0" +
+      " --minMappingQuality " + minMappingQuality +
+      " --start " + START_BIN + " --stop " + MAX_DEPTH + " --nBins " + NUM_BINS +
+      (if (!minCoverageCalcs.isEmpty) minCoverageCalcs.map(cov => " --summaryCoverageThreshold " + cov).reduceLeft(_ + "" + _) else "") +
+      " --includeRefNSites" +
+      " -o " + DoC_output
+
+    override def shortDescription = "DoC: " + DoC_output
+  }
+
+  class DoCwithDepthOutputAtEachBase(bams: List[File], DoC_output: File, MAX_DEPTH: Int, minMappingQuality: Int, scatterCountInput: Int, START_BIN: Int, NUM_BINS: Int, minCoverageCalcs: Seq[Int]) extends DoC(bams, DoC_output, MAX_DEPTH: Int, minMappingQuality, scatterCountInput, START_BIN, NUM_BINS, minCoverageCalcs) {
+    // HACK for DoC to work properly within Queue:
+    @Output
+    @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])
+    var outPrefix = DoC_output
+
+    override def commandLine = super.commandLine.replaceAll(" --omitDepthOutputAtEachBase", "")
+  }
+
+  def buildDoCgroups(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]], samplesPerJob: Int, outputBase: File): List[Group] = {
+
+    def buildDoCgroupsHelper(samples: List[String], count: Int): List[Group] = (samples splitAt samplesPerJob) match {
+      case (Nil, y) =>
+        return Nil
+      case (subsamples, remaining) =>
+        return new Group("group" + count, outputBase, subsamples, VCF_BAM_utilities.findBAMsForSamples(subsamples, sampleToBams)) :: buildDoCgroupsHelper(remaining, count + 1)
+    }
+
+    return buildDoCgroupsHelper(samples, 0)
+  }
+
+  // A group has a list of samples and bam files to use for DoC
+  class Group(val name: String, val outputBase: File, val samples: List[String], val bams: List[File]) {
+    // getName() just includes the file name WITHOUT the path:
+    val groupOutputName = name + "." + outputBase.getName
+
+    // Comment this out, so that when jobs are scattered in DoC class below, they do not scatter into outputs at directories that don't exist!!! :
+    //def DoC_output = new File(outputBase.getParentFile(), groupOutputName)
+
+    def DoC_output = new File(groupOutputName)
+
+    override def toString(): String = String.format("[Group %s [%s] with samples %s against bams %s]", name, DoC_output, samples, bams)
+  }
+
+  class MergeGATKdepths(DoCsToCombine: List[File], outFile: String, columnSuffix: String, xhmmExec: File, sampleIDsMap: String, sampleIDsMapFromColumn: Int, sampleIDsMapToColumn: Int, rdPrecisionArg: Option[Int], outputTargetsBySamples: Boolean) extends CommandLineFunction {
+    @Input(doc = "")
+    var inputDoCfiles: List[File] = DoCsToCombine
+
+    @Output
+    val mergedDoC: File = new File(outFile)
+    var command: String =
+      xhmmExec + " --mergeGATKdepths" +
+        inputDoCfiles.map(input => " --GATKdepths " + input).reduceLeft(_ + "" + _) +
+        " --columnSuffix " + columnSuffix +
+        " -o " + mergedDoC
+    if (sampleIDsMap != "")
+      command += " --sampleIDmap " + sampleIDsMap + " --fromID " + sampleIDsMapFromColumn + " --toID " + sampleIDsMapToColumn
+    rdPrecisionArg match {
+      case Some(rdPrecision) => {
+        command += " --rdPrecision " + rdPrecision
+      }
+      case None => {}
+    }
+    if (outputTargetsBySamples)
+      command += " --outputTargetsBySamples"
+
+    def commandLine = command
+
+    override def description = "Combines DoC outputs for multiple samples (at same loci): " + command
+  }
+
+  class PrepareTargets(intervalsIn: List[File], outIntervals: String, val xhmmExec: File, val referenceFile: File) extends CommandLineFunction {
+    @Input(doc = "List of files containing targeted intervals to be prepared and merged")
+    var inIntervals: List[File] = intervalsIn
+
+    @Output(doc = "The merged intervals file to write to")
+    var out: File = new File(outIntervals)
+
+    var command: String =
+      xhmmExec + " --prepareTargets" +
+        " -F " + referenceFile +
+        inIntervals.map(intervFile => " --targets " + intervFile).reduceLeft(_ + "" + _) +
+        " --mergedTargets " + out
+
+    def commandLine = command
+
+    override def description = "Sort all target intervals, merge overlapping ones, and print the resulting interval list: " + command
+  }
+}

From 9af4b34fd8a45d4bd561f29dc337f5676a57e21b Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Fri, 26 Oct 2012 01:21:05 -0400
Subject: [PATCH 027/236] Changed @Input to @Argument for non-File types

---
 .../queue/qscripts/CNV/xhmmCNVpipeline.scala     | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
index 362337c84..8db089484 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
@@ -33,30 +33,30 @@ class xhmmCNVpipeline extends QScript {
   @Argument(shortName = "L", doc = "Intervals", required = false)
   var intervals: File = _
 
-  @Input(doc = "level of parallelism for BAM DoC.   By default is set to 0 [no scattering].", shortName = "scatter", required = false)
+  @Argument(doc = "level of parallelism for BAM DoC.   By default is set to 0 [no scattering].", shortName = "scatter", required = false)
   var scatterCountInput = 0
 
-  @Input(doc = "Samples to run together for DoC.   By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false)
+  @Argument(doc = "Samples to run together for DoC.   By default is set to 1 [one job per sample].", shortName = "samplesPerJob", required = false)
   var samplesPerJob = 1
 
   @Output(doc = "Base name for files to output", shortName = "o", required = true)
   var outputBase: File = _
 
-  @Input(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
+  @Argument(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
   var MAX_DEPTH = 20000
 
   @Hidden
-  @Input(doc = "Number of read-depth bins", shortName = "NUM_BINS", required = false)
+  @Argument(doc = "Number of read-depth bins", shortName = "NUM_BINS", required = false)
   var NUM_BINS = 200
 
   @Hidden
-  @Input(doc = "Starting value of read-depth bins", shortName = "START_BIN", required = false)
+  @Argument(doc = "Starting value of read-depth bins", shortName = "START_BIN", required = false)
   var START_BIN = 1
 
-  @Input(doc = "Minimum read mapping quality", shortName = "MMQ", required = false)
+  @Argument(doc = "Minimum read mapping quality", shortName = "MMQ", required = false)
   var minMappingQuality = 0
 
-  @Input(doc = "Memory (in GB) required for storing the whole matrix in memory", shortName = "wholeMatrixMemory", required = false)
+  @Argument(doc = "Memory (in GB) required for storing the whole matrix in memory", shortName = "wholeMatrixMemory", required = false)
   var wholeMatrixMemory = -1
 
   @Argument(shortName = "minTargGC", doc = "Exclude all targets with GC content less than this value", required = false)
@@ -496,4 +496,4 @@ class xhmmCNVpipeline extends QScript {
 
     override def description = "Genotypes discovered CNVs (and their sub-segments, of up to " + maxTargetsInSubsegment + " targets) in all samples: " + command
   }
-}
\ No newline at end of file
+}

From c8e17a7adf410aa540f6e51091160f56bd4231e2 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Tue, 30 Oct 2012 13:57:54 -0400
Subject: [PATCH 033/236] totally experimental UG feature, to be removed

---
 .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java   | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 3c4a97ec1..d9b46ad36 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -190,6 +190,9 @@ public class UnifiedGenotyperEngine {
                     final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
                     if ( vc != null )
                         results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
+                    else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
+                        results.add(generateEmptyContext(tracker, refContext, null, rawContext));
+
                 }
             }        
         }

From f5697532d63841acfe24f3045e6dbc4612c95f0e Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Wed, 31 Oct 2012 11:49:50 -0400
Subject: [PATCH 035/236] Added mvninstall.queue.all target which includes
 private, along with supporting sub-targets.

---
 build.xml | 32 ++++++++++++++++++++++----------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/build.xml b/build.xml
index c6b1afc56..18ab3e684 100644
--- a/build.xml
+++ b/build.xml
@@ -327,14 +327,18 @@
 
 
     <!-- INIT OVERRIDES: call these targets BEFORE init to override build defaults -->
-    <target name="init.publiconly">
+    <target name="init.build.publiconly">
         <property name="build.target" value="public" />
     </target>
 
-    <target name="init.publicprotectedonly">
+    <target name="init.build.publicprotectedonly">
         <property name="build.target" value="protected" />
     </target>
 
+    <target name="init.build.all">
+        <property name="build.target" value="all" />
+    </target>
+
     <target name="init.javaonly">
         <property name="compile.scala" value="false" />
     </target>
@@ -842,19 +846,23 @@
     <!-- Release-related tasks                                                            -->
     <!-- ******************************************************************************** -->
 
-    <target name="init.buildgatkfull" depends="init.publicprotectedonly, init.javaonly">
+    <target name="init.executable.gatkfull" depends="init.build.publicprotectedonly, init.javaonly">
         <property name="executable" value="GenomeAnalysisTK" />
     </target>
 
-    <target name="init.buildgatklite" depends="init.publiconly, init.javaonly">
+    <target name="init.executable.gatklite" depends="init.build.publiconly, init.javaonly">
         <property name="executable" value="GenomeAnalysisTKLite" />
     </target>
 
-    <target name="init.buildqueuefull" depends="init.publicprotectedonly, init.javaandscala">
+    <target name="init.executable.queueall" depends="init.build.all, init.javaandscala">
         <property name="executable" value="Queue" />
     </target>
 
-    <target name="init.buildqueuelite" depends="init.publiconly, init.javaandscala">
+    <target name="init.executable.queuefull" depends="init.build.publicprotectedonly, init.javaandscala">
+        <property name="executable" value="Queue" />
+    </target>
+
+    <target name="init.executable.queuelite" depends="init.build.publiconly, init.javaandscala">
         <property name="executable" value="QueueLite" />
     </target>
 
@@ -906,13 +914,15 @@
     </target>
 
     <!-- Package specific versions of the GATK/Queue. ALWAYS do an ant clean before invoking these! -->
-    <target name="package.gatk.full" depends="init.buildgatkfull,package" />
+    <target name="package.gatk.full" depends="init.executable.gatkfull,package" />
 
-    <target name="package.gatk.lite" depends="init.buildgatklite,package" />
+    <target name="package.gatk.lite" depends="init.executable.gatklite,package" />
 
-    <target name="package.queue.full" depends="init.buildqueuefull,package" />
+    <target name="package.queue.all" depends="init.executable.queueall,package" />
 
-    <target name="package.queue.lite" depends="init.buildqueuelite,package" />
+    <target name="package.queue.full" depends="init.executable.queuefull,package" />
+
+    <target name="package.queue.lite" depends="init.executable.queuelite,package" />
 
 
     <!-- Release a build. Don't call this target directly. Call one of the specific release targets below -->
@@ -975,6 +985,8 @@
 
     <target name="mvninstall.gatk.lite" depends="package.gatk.lite,mvninstall" />
 
+    <target name="mvninstall.queue.all" depends="package.queue.all,mvninstall" />
+
     <target name="mvninstall.queue.full" depends="package.queue.full,mvninstall" />
 
     <target name="mvninstall.queue.lite" depends="package.queue.lite,mvninstall" />

From 96344c6b62df84d4727bbbd8307d03f88da0f0c1 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 31 Oct 2012 12:35:45 -0400
Subject: [PATCH 036/236] Add note to realigner docs

---
 .../sting/gatk/walkers/indels/IndelRealigner.java               | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 998894fbf..8f2528e23 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -236,6 +236,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
      * then extensions (".bam" or ".sam") will be stripped from the input file names and the provided string value will be pasted on instead; 2) if the
      * value ends with a '.map' (e.g. input_output.map), then the two-column tab-separated file with the specified name must exist and list unique output
      * file name (2nd column) for each input file name (1st column).
+     *
+     * Note that some GATK arguments do NOT work in conjunction with nWayOut (e.g. --disable_bam_indexing).
      */
     @Argument(fullName="nWayOut", shortName="nWayOut", required=false, doc="Generate one output file for each input (-I) bam file")
     protected String N_WAY_OUT = null;

From 9cd04c335c9f5662bb02d1581ec2ad970f7abbd7 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Mon, 29 Oct 2012 19:06:05 -0400
Subject: [PATCH 040/236] Work on GSA-508 / CachingIndexedFastaReader should
 internally upper case bases loading data

-- As one might expect, CachingIndexedFastaSequenceFile now internally upper cases the FASTA reference bases.  This is now done by default, unless requested explicitly to preserve the original bases.
-- This is really the correct place to do this for a variety of reasons.  First, you don't need to work about upper casing bases throughout the code.  Second, the cache is only upper cased once, no matter how often the bases are accessed, which walkers cannot optimize themselves.  Finally, this uses the fastest function for this -- Picard's toUpperCase(byte[]) which is way better than String.toUpperCase()
-- Added unit tests to ensure this functionality works correct.
-- Removing unnecessary upper casing of bases in some core GATK tools, now that RefContext guarentees that the reference bases are all upper case.
-- Added contracts to ensure this is the case.
-- Remove a ton of sh*t from BaseUtils that was so old I had no idea what it was doing any longer, and didn't have any unit tests to ensure it was correct, and wasn't used anywhere in our code
---
 .../sting/gatk/contexts/ReferenceContext.java |  14 +-
 .../gatk/walkers/indels/IndelRealigner.java   |   8 +-
 .../broadinstitute/sting/utils/BaseUtils.java | 215 +-----------------
 .../CachingIndexedFastaSequenceFile.java      | 140 +++++++++---
 ...chingIndexedFastaSequenceFileUnitTest.java |  65 +++++-
 5 files changed, 185 insertions(+), 257 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
index af330bba9..c8bf1e3e8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
@@ -27,7 +27,6 @@ package org.broadinstitute.sting.gatk.contexts;
 
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
-import net.sf.samtools.util.StringUtil;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@@ -39,10 +38,7 @@ import org.broadinstitute.sting.utils.GenomeLocParser;
  * @author hanna
  * @version 0.1
  */
-
 public class ReferenceContext {
-    final public static boolean UPPERCASE_REFERENCE = true;
-
     /**
      * Facilitates creation of new GenomeLocs.
      */
@@ -59,7 +55,8 @@ public class ReferenceContext {
     final private GenomeLoc window;
 
     /**
-     * The bases in the window around the current locus.  If null, then bases haven't been fetched yet
+     * The bases in the window around the current locus.  If null, then bases haven't been fetched yet.
+     * Bases are always upper cased
      */
     private byte[] basesCache = null;
 
@@ -81,7 +78,7 @@ public class ReferenceContext {
          *
          * @return
          */
-        @Ensures("result != null")
+        @Ensures({"result != null", "BaseUtils.isUpperCase(result)"})
         public byte[] getBases();
     }
 
@@ -146,7 +143,6 @@ public class ReferenceContext {
     private void fetchBasesFromProvider() {
         if ( basesCache == null ) {
             basesCache = basesProvider.getBases();
-            if (UPPERCASE_REFERENCE) StringUtil.toUpperCase(basesCache);
         }
     }
 
@@ -176,6 +172,7 @@ public class ReferenceContext {
      * Get the base at the given locus.
      * @return The base at the given locus from the reference.
      */
+    @Ensures("BaseUtils.isUpperCase(result)")
     public byte getBase() {
         return getBases()[(locus.getStart() - window.getStart())];
     }
@@ -185,7 +182,7 @@ public class ReferenceContext {
      * @return All bases available.  If the window is of size [0,0], the array will
      *         contain only the base at the given locus.
      */
-    @Ensures({"result != null", "result.length > 0"})
+    @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"})
     public byte[] getBases() {
         fetchBasesFromProvider();
         return basesCache;
@@ -194,6 +191,7 @@ public class ReferenceContext {
     /**
      * All the bases in the window from the current base forward to the end of the window.
      */
+    @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"})
     public byte[] getForwardBases() {
         final byte[] bases = getBases();
         final int mid = locus.getStart() - window.getStart();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
index 8f2528e23..345f79b2b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/IndelRealigner.java
@@ -25,7 +25,7 @@
 
 package org.broadinstitute.sting.gatk.walkers.indels;
 
-import net.sf.picard.reference.IndexedFastaSequenceFile;
+import com.google.java.contract.Requires;
 import net.sf.samtools.*;
 import net.sf.samtools.util.RuntimeIOException;
 import net.sf.samtools.util.SequenceUtil;
@@ -276,7 +276,7 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
     protected String OUT_SNPS = null;
 
     // fasta reference reader to supplement the edges of the reference sequence
-    private IndexedFastaSequenceFile referenceReader;
+    private CachingIndexedFastaSequenceFile referenceReader;
 
     // the intervals input by the user
     private Iterator<GenomeLoc> intervals = null;
@@ -1603,7 +1603,8 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
 
         public List<GATKSAMRecord> getReads() { return reads; }
 
-        public byte[] getReference(IndexedFastaSequenceFile referenceReader) {
+        @Requires("referenceReader.isUppercasingBases()")
+        public byte[] getReference(CachingIndexedFastaSequenceFile referenceReader) {
             // set up the reference if we haven't done so yet
             if ( reference == null ) {
                 // first, pad the reference to handle deletions in narrow windows (e.g. those with only 1 read)
@@ -1611,7 +1612,6 @@ public class IndelRealigner extends ReadWalker<Integer, Integer> {
                 int padRight = Math.min(loc.getStop()+REFERENCE_PADDING, referenceReader.getSequenceDictionary().getSequence(loc.getContig()).getSequenceLength());
                 loc = getToolkit().getGenomeLocParser().createGenomeLoc(loc.getContig(), padLeft, padRight);
                 reference = referenceReader.getSubsequenceAt(loc.getContig(), loc.getStart(), loc.getStop()).getBases();
-                StringUtil.toUpperCase(reference);
             }
 
             return reference;
diff --git a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
index 69920ece4..53a49d8b2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/BaseUtils.java
@@ -24,33 +24,6 @@ public class BaseUtils {
     public final static byte[] BASES = {'A', 'C', 'G', 'T'};
     public final static byte[] EXTENDED_BASES = {'A', 'C', 'G', 'T', 'N', 'D'};
 
-    public enum Base {
-        A('A', 0),
-        C('C', 1),
-        G('G', 2),
-        T('T', 3);
-
-        byte b;
-        int index;
-
-        private Base(char base, int index) {
-            this.b = (byte) base;
-            this.index = index;
-        }
-
-        public byte getBase() { return b; }
-
-        public char getBaseAsChar() { return (char) b; }
-
-        public int getIndex() { return index; }
-
-        public boolean sameBase(byte o) { return b == o; }
-
-        public boolean sameBase(char o) { return b == (byte) o; }
-
-        public boolean sameBase(int i) { return index == i; }
-    }
-
     static private final int[] baseIndexMap = new int[256];
     static {
         Arrays.fill(baseIndexMap, -1);
@@ -130,6 +103,17 @@ public class BaseUtils {
         return false;
     }
 
+    public static boolean isUpperCase(final byte[] bases) {
+        for ( byte base : bases )
+            if ( ! isUpperCase(base) )
+                return false;
+        return true;
+    }
+
+    public static boolean isUpperCase(final byte base) {
+        return base >= 'A' && base <= 'Z';
+    }
+
     /**
      * Converts a IUPAC nucleotide code to a pair of bases
      *
@@ -271,59 +255,6 @@ public class BaseUtils {
         }
     }
 
-    /**
-     * Converts a base index to a base index representing its cross-talk partner
-     *
-     * @param baseIndex 0, 1, 2, 3
-     * @return 1, 0, 3, 2, or -1 if the index can't be understood
-     */
-    static public int crossTalkPartnerIndex(int baseIndex) {
-        switch (baseIndex) {
-            case 0:
-                return 1; // A -> C
-            case 1:
-                return 0; // C -> A
-            case 2:
-                return 3; // G -> T
-            case 3:
-                return 2; // T -> G
-            default:
-                return -1;
-        }
-    }
-
-    /**
-     * Converts a base to the base representing its cross-talk partner
-     *
-     * @param base [AaCcGgTt]
-     * @return C, A, T, G, or '.' if the base can't be understood
-     */
-    @Deprecated
-    static public char crossTalkPartnerBase(char base) {
-        return (char) baseIndexToSimpleBase(crossTalkPartnerIndex(simpleBaseToBaseIndex(base)));
-    }
-
-    /**
-     * Return the complement of a base index.
-     *
-     * @param baseIndex the base index (0:A, 1:C, 2:G, 3:T)
-     * @return the complementary base index
-     */
-    static public byte complementIndex(int baseIndex) {
-        switch (baseIndex) {
-            case 0:
-                return 3; // a -> t
-            case 1:
-                return 2; // c -> g
-            case 2:
-                return 1; // g -> c
-            case 3:
-                return 0; // t -> a
-            default:
-                return -1; // wtf?
-        }
-    }
-
     /**
      * Return the complement (A <-> T or C <-> G) of a base, or the specified base if it can't be complemented (i.e. an ambiguous base).
      *
@@ -350,7 +281,7 @@ public class BaseUtils {
     }
 
     @Deprecated
-    static public char simpleComplement(char base) {
+    static private char simpleComplement(char base) {
         return (char) simpleComplement((byte) base);
     }
 
@@ -370,22 +301,6 @@ public class BaseUtils {
         return rcbases;
     }
 
-    /**
-     * Complement a byte array of bases (that is, chars casted to bytes, *not* base indices in byte form)
-     *
-     * @param bases the byte array of bases
-     * @return the complement of the base byte array
-     */
-    static public byte[] simpleComplement(byte[] bases) {
-        byte[] rcbases = new byte[bases.length];
-
-        for (int i = 0; i < bases.length; i++) {
-            rcbases[i] = simpleComplement(bases[i]);
-        }
-
-        return rcbases;
-    }
-
     /**
      * Reverse complement a char array of bases
      *
@@ -403,23 +318,6 @@ public class BaseUtils {
         return rcbases;
     }
 
-    /**
-     * Complement a char array of bases
-     *
-     * @param bases the char array of bases
-     * @return the complement of the base char array
-     */
-    @Deprecated
-    static public char[] simpleComplement(char[] bases) {
-        char[] rcbases = new char[bases.length];
-
-        for (int i = 0; i < bases.length; i++) {
-            rcbases[i] = simpleComplement(bases[i]);
-        }
-
-        return rcbases;
-    }
-
     /**
      * Reverse complement a String of bases.  Preserves ambiguous bases.
      *
@@ -431,17 +329,6 @@ public class BaseUtils {
         return new String(simpleReverseComplement(bases.getBytes()));
     }
 
-    /**
-     * Complement a String of bases.  Preserves ambiguous bases.
-     *
-     * @param bases the String of bases
-     * @return the complement of the String
-     */
-    @Deprecated
-    static public String simpleComplement(String bases) {
-        return new String(simpleComplement(bases.getBytes()));
-    }
-
     /**
      * Returns the uppercased version of the bases
      *
@@ -543,82 +430,4 @@ public class BaseUtils {
 
         return randomBaseIndex;
     }
-
-    /**
-     * Return a random base (A, C, G, T).
-     *
-     * @return a random base (A, C, G, T)
-     */
-    @Deprecated
-    static public byte getRandomBase() {
-        return getRandomBase('.');
-    }
-
-    /**
-     * Return a random base, excluding some base.
-     *
-     * @param excludeBase the base to exclude
-     * @return a random base, excluding the one specified (A, C, G, T)
-     */
-    @Deprecated
-    static public byte getRandomBase(char excludeBase) {
-        return BaseUtils.baseIndexToSimpleBase(getRandomBaseIndex(BaseUtils.simpleBaseToBaseIndex(excludeBase)));
-    }
-
-    /**
-     * Computes the smallest period >= minPeriod for the specified string. The period is defined as such p,
-     * that for all  i = 0... seq.length-1,  seq[ i % p ] = seq[i] (or equivalently seq[i] = seq[i+p] for i=0...seq.length-1-p).
-     * The sequence does <i>not</i> have to contain whole number of periods. For instance, "ACACACAC" has a period
-     * of 2 (it has a period of 4 as well), and so does
-     * "ACACA"; similarly, smallest periods of "CTCCTC", "CTCCT", and "CTCC" are all equal to 3. The "trivial" period is
-     * the length of the string itself, and it will always be returned if no smaller period can be found in the specified period range
-     * or if specified minPeriod is greater than the sequence length.
-     *
-     * @param seq
-     * @return
-     */
-    public static int sequencePeriod(byte[] seq, int minPeriod) {
-        int period = (minPeriod > seq.length ? seq.length : minPeriod);
-        // we assume that bases [0,period-1] repeat themselves and check this assumption
-        // until we find correct period
-
-        for (int pos = period; pos < seq.length; pos++) {
-
-            int offset = pos % period; // we are currenlty 'offset' bases into the putative repeat of period 'period'
-            // if our current hypothesis holds, base[pos] must be the same as base[offset]
-
-            if (Character.toUpperCase(seq[pos]) != Character.toUpperCase(seq[offset])) {
-
-                // period we have been trying so far does not work.
-                // two possibilities:
-                // A) offset = 0, i.e. current position pos must be start of the next repeat, but it is not;
-                //      in this case only bases from start up to the current one, inclusive, may form a repeat, if at all;
-                //       so period is at least pos+1 (remember, pos is 0-based), then on the next loop re-entrance
-                //      pos will be autoincremented and we will be checking next base
-                // B) offset != 0, i.e. the current base breaks the repeat, but maybe it starts a new one?
-                //     hence we should first check if it matches the first base of the sequence, and to do that
-                //     we set period to pos  (thus trying the hypothesis that bases from start up to the current one,
-                //     non-inclusive are repeated hereafter), and decrement pos (this will re-test current base against the first base
-                // on the next loop re-entrance after pos is autoincremented)
-                if (offset == 0)
-                    period = pos + 1;
-                else
-                    period = pos--;
-
-            }
-        }
-        return period;
-    }
 }
-
-/* code snippet for testing sequencePeriod():
- * 
- *     	String str = "CCTTG";
-    	int p = 0;
-    	System.out.print("Periods of " + str +" are:");
-    	while ( p < str.length() ) {
-    		p = sequencePeriod(str, p+1);
-        	System.out.print(" "+p);
-    	}
-    	System.out.println(); System.exit(1);
-*/
diff --git a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
index db54851dd..0e8a3ea70 100644
--- a/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFile.java
@@ -29,6 +29,7 @@ import net.sf.picard.reference.FastaSequenceIndex;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.picard.reference.ReferenceSequence;
 import net.sf.samtools.SAMSequenceRecord;
+import net.sf.samtools.util.StringUtil;
 import org.apache.log4j.Priority;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
@@ -40,6 +41,8 @@ import java.util.Arrays;
  * A caching version of the IndexedFastaSequenceFile that avoids going to disk as often as the raw indexer.
  *
  * Thread-safe!  Uses a thread-local cache
+ *
+ * Automatically upper-cases the bases coming in, unless they the flag preserveCase is explicitly set
  */
 public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
     protected static final org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(CachingIndexedFastaSequenceFile.class);
@@ -54,10 +57,15 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
     public static final long DEFAULT_CACHE_SIZE = 1000000;
 
     /** The cache size of this CachingIndexedFastaSequenceFile */
-    final long cacheSize;
+    private final long cacheSize;
 
     /** When we have a cache miss at position X, we load sequence from X - cacheMissBackup */
-    final long cacheMissBackup;
+    private final long cacheMissBackup;
+
+    /**
+     * If true, we will preserve the case of the original base in the genome, not
+     */
+    private final boolean preserveCase;
 
     // information about checking efficiency
     long cacheHits = 0;
@@ -84,37 +92,17 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
     /**
      * Same as general constructor but allows one to override the default cacheSize
      *
-     * @param fasta
-     * @param index
-     * @param cacheSize
+     * @param fasta the file we will read our FASTA sequence from.
+     * @param index the index of the fasta file, used for efficient random access
+     * @param cacheSize the size in bp of the cache we will use for this reader
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
      */
-    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
+    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize, final boolean preserveCase) {
         super(fasta, index);
         if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
         this.cacheSize = cacheSize;
         this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
-    }
-
-    /**
-     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
-     *
-     * @param fasta The file to open.
-     * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
-     * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
-     */
-    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
-        this(fasta, index, DEFAULT_CACHE_SIZE);
-    }
-
-    /**
-     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
-     *
-     * Looks for a index file for fasta on disk
-     *
-     * @param fasta The file to open.
-     */
-    public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
-        this(fasta, DEFAULT_CACHE_SIZE);
+        this.preserveCase = preserveCase;
     }
 
     /**
@@ -124,12 +112,76 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
      * Uses provided cacheSize instead of the default
      *
      * @param fasta The file to open.
+     * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
      */
-    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
+    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize, final boolean preserveCase ) throws FileNotFoundException {
         super(fasta);
         if ( cacheSize < 0 ) throw new IllegalArgumentException("cacheSize must be > 0");
         this.cacheSize = cacheSize;
         this.cacheMissBackup = Math.max(cacheSize / 1000, 1);
+        this.preserveCase = preserveCase;
+    }
+
+//    /**
+//     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+//     *
+//     * @param fasta The file to open.
+//     * @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk.
+//     * @throws java.io.FileNotFoundException If the fasta or any of its supporting files cannot be found.
+//     */
+//    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index) {
+//        this(fasta, index, DEFAULT_CACHE_SIZE);
+//    }
+
+    /**
+     * Same as general constructor but allows one to override the default cacheSize
+     *
+     * By default, this CachingIndexedFastaReader converts all incoming bases to upper case
+     *
+     * @param fasta the file we will read our FASTA sequence from.
+     * @param index the index of the fasta file, used for efficient random access
+     * @param cacheSize the size in bp of the cache we will use for this reader
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final FastaSequenceIndex index, final long cacheSize) {
+        this(fasta, index, cacheSize, false);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk.
+     * This CachingIndexedFastaReader will convert all FASTA bases to upper cases under the hood
+     *
+     * @param fasta The file to open.
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta) throws FileNotFoundException {
+        this(fasta, false);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk
+     *
+     * @param fasta The file to open.
+     * @param preserveCase If true, we will keep the case of the underlying bases in the FASTA, otherwise everything is converted to upper case
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final boolean preserveCase) throws FileNotFoundException {
+        this(fasta, DEFAULT_CACHE_SIZE, preserveCase);
+    }
+
+    /**
+     * Open the given indexed fasta sequence file.  Throw an exception if the file cannot be opened.
+     *
+     * Looks for a index file for fasta on disk
+     * Uses provided cacheSize instead of the default
+     *
+     * @param fasta The file to open.
+     * @param cacheSize the size of the cache to use in this CachingIndexedFastaReader, must be >= 0
+     */
+    public CachingIndexedFastaSequenceFile(final File fasta, final long cacheSize ) throws FileNotFoundException {
+        this(fasta, cacheSize, false);
     }
 
     /**
@@ -168,6 +220,25 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
         return cacheSize;
     }
 
+    /**
+     * Is this CachingIndexedFastaReader keeping the original case of bases in the fasta, or is
+     * everything being made upper case?
+     *
+     * @return true if the bases coming from this reader are in the original case in the fasta, false if they are all upper cased
+     */
+    public boolean isPreservingCase() {
+        return preserveCase;
+    }
+
+    /**
+     * Is uppercasing bases?
+     *
+     * @return true if bases coming from this CachingIndexedFastaSequenceFile are all upper cased, false if this reader are in the original case in the fasta
+     */
+    public boolean isUppercasingBases() {
+        return ! isPreservingCase();
+    }
+
     /**
      * Gets the subsequence of the contig in the range [start,stop]
      *
@@ -177,8 +248,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
      * @param contig Contig whose subsequence to retrieve.
      * @param start inclusive, 1-based start of region.
      * @param stop inclusive, 1-based stop of region.
-     * @return The partial reference sequence associated with this range.
+     * @return The partial reference sequence associated with this range.  If preserveCase is false, then
+     *         all of the bases in the ReferenceSequence returned by this method will be upper cased.
      */
+    @Override
     public ReferenceSequence getSubsequenceAt( final String contig, final long start, final long stop ) {
         final ReferenceSequence result;
         final Cache myCache = cache.get();
@@ -186,6 +259,7 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
         if ( (stop - start) >= cacheSize ) {
             cacheMisses++;
             result = super.getSubsequenceAt(contig, start, stop);
+            if ( ! preserveCase ) StringUtil.toUpperCase(result.getBases());
         } else {
             // todo -- potential optimization is to check if contig.name == contig, as this in generally will be true
             SAMSequenceRecord contigInfo = super.getSequenceDictionary().getSequence(contig);
@@ -198,7 +272,9 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
                 myCache.start = Math.max(start - cacheMissBackup, 0);
                 myCache.stop  = Math.min(start + cacheSize + cacheMissBackup, contigInfo.getSequenceLength());
                 myCache.seq   = super.getSubsequenceAt(contig, myCache.start, myCache.stop);
-                //System.out.printf("New cache at %s %d-%d%n", contig, cacheStart, cacheStop);
+
+                // convert all of the bases in the sequence to upper case if we aren't preserving cases
+                if ( ! preserveCase ) StringUtil.toUpperCase(myCache.seq.getBases());
             } else {
                 cacheHits++;
             }
@@ -215,8 +291,10 @@ public class CachingIndexedFastaSequenceFile extends IndexedFastaSequenceFile {
             }
         }
 
+        // for debugging -- print out our efficiency if requested
         if ( PRINT_EFFICIENCY && (getCacheHits() + getCacheMisses()) % PRINT_FREQUENCY == 0 )
             printEfficiency(Priority.INFO);
+
         return result;
     }
 }
\ No newline at end of file
diff --git a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java
index 736162300..bcd846184 100644
--- a/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/fasta/CachingIndexedFastaSequenceFileUnitTest.java
@@ -30,6 +30,7 @@ import java.util.concurrent.Executors;
 public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
     private File simpleFasta = new File(publicTestDir + "/exampleFASTA.fasta");
     private static final int STEP_SIZE = 1;
+    private final static boolean DEBUG = false;
 
     //private static final List<Integer> QUERY_SIZES = Arrays.asList(1);
     private static final List<Integer> QUERY_SIZES = Arrays.asList(1, 10, 100);
@@ -53,9 +54,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
         return cacheSizeRequested == -1 ? CachingIndexedFastaSequenceFile.DEFAULT_CACHE_SIZE : cacheSizeRequested;
     }
 
-    @Test(dataProvider = "fastas", enabled = true)
+    @Test(dataProvider = "fastas", enabled = true && ! DEBUG)
     public void testCachingIndexedFastaReaderSequential1(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
 
         SAMSequenceRecord contig = caching.getSequenceDictionary().getSequence(0);
         logger.warn(String.format("Checking contig %s length %d with cache size %d and query size %d",
@@ -64,6 +65,8 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
     }
 
     private void testSequential(final CachingIndexedFastaSequenceFile caching, final File fasta, final int querySize) throws FileNotFoundException {
+        Assert.assertTrue(caching.isPreservingCase(), "testSequential only works for case preserving CachingIndexedFastaSequenceFile readers");
+
         final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
 
         SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
@@ -92,10 +95,10 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
     }
 
     // Tests grabbing sequences around a middle cached value.
-    @Test(dataProvider = "fastas", enabled = true)
+    @Test(dataProvider = "fastas", enabled = true && ! DEBUG)
     public void testCachingIndexedFastaReaderTwoStage(File fasta, int cacheSize, int querySize) throws FileNotFoundException {
         final IndexedFastaSequenceFile uncached = new IndexedFastaSequenceFile(fasta);
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
 
         SAMSequenceRecord contig = uncached.getSequenceDictionary().getSequence(0);
 
@@ -123,11 +126,6 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
     @DataProvider(name = "ParallelFastaTest")
     public Object[][] createParallelFastaTest() {
         List<Object[]> params = new ArrayList<Object[]>();
-//        for ( int nt : Arrays.asList(1, 2, 3) ) {
-//            for ( int cacheSize : CACHE_SIZES ) {
-//                params.add(new Object[]{simpleFasta, cacheSize, 10, nt});
-//            }
-//        }
 
         for ( File fasta : Arrays.asList(simpleFasta) ) {
             for ( int cacheSize : CACHE_SIZES ) {
@@ -143,9 +141,9 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
     }
 
 
-    @Test(dataProvider = "ParallelFastaTest", enabled = true, timeOut = 60000)
+    @Test(dataProvider = "ParallelFastaTest", enabled = true && ! DEBUG, timeOut = 60000)
     public void testCachingIndexedFastaReaderParallel(final File fasta, final int cacheSize, final int querySize, final int nt) throws FileNotFoundException, InterruptedException {
-        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize));
+        final CachingIndexedFastaSequenceFile caching = new CachingIndexedFastaSequenceFile(fasta, getCacheSize(cacheSize), true);
 
         logger.warn(String.format("Parallel caching index fasta reader test cacheSize %d querySize %d nt %d", caching.getCacheSize(), querySize, nt));
         for ( int iterations = 0; iterations < 1; iterations++ ) {
@@ -163,4 +161,49 @@ public class CachingIndexedFastaSequenceFileUnitTest extends BaseTest {
             executor.shutdownNow();
         }
     }
+
+    // make sure some bases are lower case and some are upper case
+    @Test(enabled = true)
+    public void testMixedCasesInExample() throws FileNotFoundException, InterruptedException {
+        final IndexedFastaSequenceFile original = new IndexedFastaSequenceFile(new File(exampleFASTA));
+        final CachingIndexedFastaSequenceFile casePreserving = new CachingIndexedFastaSequenceFile(new File(exampleFASTA), true);
+        final CachingIndexedFastaSequenceFile allUpper = new CachingIndexedFastaSequenceFile(new File(exampleFASTA));
+
+        int nMixedCase = 0;
+        for ( SAMSequenceRecord contig : original.getSequenceDictionary().getSequences() ) {
+            nMixedCase += testCases(original, casePreserving, allUpper, contig.getSequenceName(), -1, -1);
+
+            final int step = 100;
+            for ( int lastPos = step; lastPos < contig.getSequenceLength(); lastPos += step ) {
+                testCases(original, casePreserving, allUpper, contig.getSequenceName(), lastPos - step, lastPos);
+            }
+        }
+
+        Assert.assertTrue(nMixedCase > 0, "No mixed cases sequences found in file.  Unexpected test state");
+    }
+
+    private int testCases(final IndexedFastaSequenceFile original,
+                          final IndexedFastaSequenceFile casePreserving,
+                          final IndexedFastaSequenceFile allUpper,
+                          final String contig, final int start, final int stop ) {
+        final String orig = fetchBaseString(original, contig, start, stop);
+        final String keptCase = fetchBaseString(casePreserving, contig, start, stop);
+        final String upperCase = fetchBaseString(allUpper, contig, start, stop).toUpperCase();
+
+        final String origToUpper = orig.toUpperCase();
+        if ( ! orig.equals(origToUpper) ) {
+            Assert.assertEquals(keptCase, orig, "Case preserving operation not equal to the original case for contig " + contig);
+            Assert.assertEquals(upperCase, origToUpper, "All upper case reader not equal to the uppercase of original case for contig " + contig);
+            return 1;
+        } else {
+            return 0;
+        }
+    }
+
+    private String fetchBaseString(final IndexedFastaSequenceFile reader, final String contig, final int start, final int stop) {
+        if ( start == -1 )
+            return new String(reader.getSequence(contig).getBases());
+        else
+            return new String(reader.getSubsequenceAt(contig, start, stop).getBases());
+    }
 }

From 1444cd753bfcdb0084afdadae3e7c45d257d8f91 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 30 Oct 2012 16:58:55 -0400
Subject: [PATCH 041/236] Bugfix for GSA-647 HaplotypeCaller misses good
 variant because the active region doesn't trigger for an exome

-- The logic for determining active regions was a bit broken in the HC when intervals were used in the system
-- TraverseActiveRegions now uses the AllLocus view, since we always want to see all reference sites, not just those covered.  Simplifies logic of TAR
-- Non-overlapping intervals are always treated as separate objects for determing active / inactive state.  This means that each exon will stand on its own when deciding if it should be active or inactive
-- Misc. cleanup, docs of some TAR infrastructure to make it safer and easier to debug in the future.
-- Committing the SingleExomeCalling script that I used to find this problem, and will continue to use in evaluating calling of a single exome with the HC
-- Make sure to get all of the reads into the set of potentially active reads, even for genomic locations that themselves don't overlap the engine intervals but may have reads that overlap the regions
-- Remove excessively expensive calls to check bases are upper cased in ReferenceContext
-- Update md5s after a lot of manual review and discussion with Ryan
---
 .../haplotypecaller/HaplotypeCaller.java      |  16 +-
 .../HaplotypeCallerIntegrationTest.java       |   6 +-
 .../sting/gatk/contexts/ReferenceContext.java |  10 +-
 .../traversals/TraverseActiveRegions.java     | 199 +++++++++---------
 .../targets/FindCoveredIntervals.java         |   2 +-
 .../utils/activeregion/ActiveRegion.java      |  20 +-
 .../utils/activeregion/ActivityProfile.java   |  54 ++++-
 .../activeregion/ActivityProfileResult.java   |  52 ++++-
 .../activeregion/ActivityProfileUnitTest.java |   2 +-
 9 files changed, 226 insertions(+), 135 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 5aba23faa..a185ba6af 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.walkers.haplotypecaller;
 
 import com.google.java.contract.Ensures;
-import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broadinstitute.sting.commandline.*;
 import org.broadinstitute.sting.gatk.CommandLineGATK;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
@@ -41,7 +40,10 @@ import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatible;
-import org.broadinstitute.sting.gatk.walkers.genotyper.*;
+import org.broadinstitute.sting.gatk.walkers.genotyper.GenotypeLikelihoodsCalculationModel;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
+import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
@@ -212,7 +214,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
     private VariantAnnotatorEngine annotationEngine;
 
     // fasta reference reader to supplement the edges of the reference sequence
-    private IndexedFastaSequenceFile referenceReader;
+    private CachingIndexedFastaSequenceFile referenceReader;
 
     // reference base padding size
     private static final int REFERENCE_PADDING = 900;
@@ -324,15 +326,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
                 }
             }
             if( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ) {
-                return new ActivityProfileResult(1.0);
+                return new ActivityProfileResult(ref.getLocus(), 1.0);
             }
         }
 
         if( USE_ALLELES_TRIGGER ) {
-            return new ActivityProfileResult( tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
+            return new ActivityProfileResult( ref.getLocus(), tracker.getValues(UG_engine.getUAC().alleles, ref.getLocus()).size() > 0 ? 1.0 : 0.0 );
         }
 
-        if( context == null ) { return new ActivityProfileResult(0.0); }
+        if( context == null ) { return new ActivityProfileResult(ref.getLocus(), 0.0); }
 
         final List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
         noCall.add(Allele.NO_CALL);
@@ -369,7 +371,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
         final VariantCallContext vcOut = UG_engine_simple_genotyper.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.INDEL);
         final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
 
-        return new ActivityProfileResult( isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
+        return new ActivityProfileResult( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > 6.0 ? ActivityProfileResult.ActivityProfileResultState.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileResult.ActivityProfileResultState.NONE, averageHQSoftClips.mean() );
     }
 
     //---------------------------------------------------------------------------------------------------------------
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index 86f3748ce..d00f5b61d 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "16013a9203367c3d1c4ce1dcdc81ef4a");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4");
     }
 
     private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -69,8 +69,8 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void HCTestProblematicReadsModifiedInActiveRegions() {
-        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("c306140ad28515ee06c603c225217939"));
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56"));
         executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
index c8bf1e3e8..34627b973 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/contexts/ReferenceContext.java
@@ -78,7 +78,7 @@ public class ReferenceContext {
          *
          * @return
          */
-        @Ensures({"result != null", "BaseUtils.isUpperCase(result)"})
+        @Ensures({"result != null"})
         public byte[] getBases();
     }
 
@@ -143,6 +143,9 @@ public class ReferenceContext {
     private void fetchBasesFromProvider() {
         if ( basesCache == null ) {
             basesCache = basesProvider.getBases();
+
+            // must be an assertion that only runs when the bases are fetch to run in a reasonable amount of time
+            assert BaseUtils.isUpperCase(basesCache);
         }
     }
 
@@ -172,7 +175,6 @@ public class ReferenceContext {
      * Get the base at the given locus.
      * @return The base at the given locus from the reference.
      */
-    @Ensures("BaseUtils.isUpperCase(result)")
     public byte getBase() {
         return getBases()[(locus.getStart() - window.getStart())];
     }
@@ -182,7 +184,7 @@ public class ReferenceContext {
      * @return All bases available.  If the window is of size [0,0], the array will
      *         contain only the base at the given locus.
      */
-    @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"})
+    @Ensures({"result != null", "result.length > 0"})
     public byte[] getBases() {
         fetchBasesFromProvider();
         return basesCache;
@@ -191,7 +193,7 @@ public class ReferenceContext {
     /**
      * All the bases in the window from the current base forward to the end of the window.
      */
-    @Ensures({"result != null", "result.length > 0", "BaseUtils.isUpperCase(result)"})
+    @Ensures({"result != null", "result.length > 0"})
     public byte[] getForwardBases() {
         final byte[] bases = getBases();
         final int mid = locus.getStart() - window.getStart();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 5d38df0f5..a2c37944a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -11,7 +11,6 @@ import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
 import org.broadinstitute.sting.gatk.walkers.DataSource;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfile;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
@@ -46,99 +45,127 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                        T sum) {
         logger.debug(String.format("TraverseActiveRegion.traverse: Shard is %s", dataProvider));
 
-        final LocusView locusView = getLocusView( walker, dataProvider );
-        final GenomeLocSortedSet initialIntervals = engine.getIntervals();
+        final LocusView locusView = new AllLocusView(dataProvider);
 
         final LocusReferenceView referenceView = new LocusReferenceView( walker, dataProvider );
         final int activeRegionExtension = walker.getClass().getAnnotation(ActiveRegionExtension.class).extension();
         final int maxRegionSize = walker.getClass().getAnnotation(ActiveRegionExtension.class).maxRegion();
 
-        if ( locusView.hasNext() ) { // trivial optimization to avoid unnecessary processing when there's nothing here at all
-            int minStart = Integer.MAX_VALUE;
-            ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
+        int minStart = Integer.MAX_VALUE;
+        final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
+        ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
 
-            ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
+        ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
 
-            // We keep processing while the next reference location is within the interval
-            GenomeLoc prevLoc = null;
-            while( locusView.hasNext() ) {
-                final AlignmentContext locus = locusView.next();
-                GenomeLoc location = locus.getLocation();
+        // We keep processing while the next reference location is within the interval
+        GenomeLoc prevLoc = null;
+        while( locusView.hasNext() ) {
+            final AlignmentContext locus = locusView.next();
+            final GenomeLoc location = locus.getLocation();
 
-                if(prevLoc != null) {
-                    // fill in the active / inactive labels from the stop of the previous location to the start of this location
-                    // TODO refactor to separate function
-                    for(int iii = prevLoc.getStop() + 1; iii < location.getStart(); iii++ ) {
-                        final GenomeLoc fakeLoc = engine.getGenomeLocParser().createGenomeLoc(prevLoc.getContig(), iii, iii);
-                        if( initialIntervals == null || initialIntervals.overlaps( fakeLoc ) ) {
-                            profile.add(fakeLoc, new ActivityProfileResult( walker.hasPresetActiveRegions() && walker.presetActiveRegions.overlaps(fakeLoc) ? 1.0 : 0.0 ));
-                        }
-                    }
+            // Grab all the previously unseen reads from this pileup and add them to the massive read list
+            // Note that this must occur before we leave because we are outside the intervals because
+            // reads may occur outside our intervals but overlap them in the future
+            // TODO -- this whole HashSet logic should be changed to a linked list of reads with
+            // TODO -- subsequent pass over them to find the ones overlapping the active regions
+            for( final PileupElement p : locus.getBasePileup() ) {
+                final GATKSAMRecord read = p.getRead();
+                if( !myReads.contains(read) ) {
+                    myReads.add(read);
                 }
 
-                dataProvider.getShard().getReadMetrics().incrementNumIterations();
-
-                // create reference context. Note that if we have a pileup of "extended events", the context will
-                // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
-                final ReferenceContext refContext = referenceView.getReferenceContext(location);
-
-                // Iterate forward to get all reference ordered data covering this location
-                final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
-
-                // Call the walkers isActive function for this locus and add them to the list to be integrated later
-                if( initialIntervals == null || initialIntervals.overlaps( location ) ) {
-                    profile.add(location, walkerActiveProb(walker, tracker, refContext, locus, location));
-                }
-
-                // Grab all the previously unseen reads from this pileup and add them to the massive read list
-                for( final PileupElement p : locus.getBasePileup() ) {
-                    final GATKSAMRecord read = p.getRead();
-                    if( !myReads.contains(read) ) {
-                        myReads.add(read);
-                    }
-
-                    // If this is the last pileup for this shard calculate the minimum alignment start so that we know
-                    // which active regions in the work queue are now safe to process
-                    minStart = Math.min(minStart, read.getAlignmentStart());
-                }
-
-                prevLoc = location;
-
-                printProgress(locus.getLocation());
+                // If this is the last pileup for this shard calculate the minimum alignment start so that we know
+                // which active regions in the work queue are now safe to process
+                minStart = Math.min(minStart, read.getAlignmentStart());
             }
 
-            updateCumulativeMetrics(dataProvider.getShard());
+            // skip this location -- it's not part of our engine intervals
+            // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
+            if ( outsideEngineIntervals(location) )
+                continue;
 
-            // Take the individual isActive calls and integrate them into contiguous active regions and
-            // add these blocks of work to the work queue
-            // band-pass filter the list of isActive probabilities and turn into active regions
-            final ActivityProfile bandPassFiltered = profile.bandPassFilter();
-            final List<ActiveRegion> activeRegions = bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize );
-
-            // add active regions to queue of regions to process
-            // first check if can merge active regions over shard boundaries
-            if( !activeRegions.isEmpty() ) {
-                if( !workQueue.isEmpty() ) {
-                    final ActiveRegion last = workQueue.getLast();
-                    final ActiveRegion first = activeRegions.get(0);
-                    if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
-                        workQueue.removeLast();
-                        activeRegions.remove(first);
-                        workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
-                    }
-                }
-                workQueue.addAll( activeRegions );
+            if ( prevLoc != null && location.getStart() != prevLoc.getStop() + 1 ) {
+                // we've move across some interval boundary, restart profile
+                profile = incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
             }
 
-            logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
+            dataProvider.getShard().getReadMetrics().incrementNumIterations();
 
-            // now go and process all of the active regions
-            sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
+            // create reference context. Note that if we have a pileup of "extended events", the context will
+            // hold the (longest) stretch of deleted reference bases (if deletions are present in the pileup).
+            final ReferenceContext refContext = referenceView.getReferenceContext(location);
+
+            // Iterate forward to get all reference ordered data covering this location
+            final RefMetaDataTracker tracker = referenceOrderedDataView.getReferenceOrderedDataAtLocus(locus.getLocation(), refContext);
+
+            // Call the walkers isActive function for this locus and add them to the list to be integrated later
+            profile.add(walkerActiveProb(walker, tracker, refContext, locus, location));
+
+            prevLoc = location;
+
+            printProgress(locus.getLocation());
         }
 
+        updateCumulativeMetrics(dataProvider.getShard());
+
+        if ( ! profile.isEmpty() )
+            incorporateActiveRegions(profile, activeRegions, activeRegionExtension, maxRegionSize);
+
+        // add active regions to queue of regions to process
+        // first check if can merge active regions over shard boundaries
+        if( !activeRegions.isEmpty() ) {
+            if( !workQueue.isEmpty() ) {
+                final ActiveRegion last = workQueue.getLast();
+                final ActiveRegion first = activeRegions.get(0);
+                if( last.isActive == first.isActive && last.getLocation().contiguousP(first.getLocation()) && last.getLocation().size() + first.getLocation().size() <= maxRegionSize ) {
+                    workQueue.removeLast();
+                    activeRegions.remove(first);
+                    workQueue.add( new ActiveRegion(last.getLocation().union(first.getLocation()), first.isActive, this.engine.getGenomeLocParser(), activeRegionExtension) );
+                }
+            }
+            workQueue.addAll( activeRegions );
+        }
+
+        logger.debug("Integrated " + profile.size() + " isActive calls into " + activeRegions.size() + " regions." );
+
+        // now go and process all of the active regions
+        sum = processActiveRegions(walker, sum, minStart, dataProvider.getLocus().getContig());
+
         return sum;
     }
 
+    /**
+     * Is the loc outside of the intervals being requested for processing by the GATK?
+     * @param loc
+     * @return
+     */
+    private boolean outsideEngineIntervals(final GenomeLoc loc) {
+        return engine.getIntervals() != null && ! engine.getIntervals().overlaps(loc);
+    }
+
+    /**
+     * Take the individual isActive calls and integrate them into contiguous active regions and
+     * add these blocks of work to the work queue
+     * band-pass filter the list of isActive probabilities and turn into active regions
+     *
+     * @param profile
+     * @param activeRegions
+     * @param activeRegionExtension
+     * @param maxRegionSize
+     * @return
+     */
+    private ActivityProfile incorporateActiveRegions(final ActivityProfile profile,
+                                                     final List<ActiveRegion> activeRegions,
+                                                     final int activeRegionExtension,
+                                                     final int maxRegionSize) {
+        if ( profile.isEmpty() )
+            throw new IllegalStateException("trying to incorporate an empty active profile " + profile);
+
+        final ActivityProfile bandPassFiltered = profile.bandPassFilter();
+        activeRegions.addAll(bandPassFiltered.createActiveRegions( activeRegionExtension, maxRegionSize ));
+        return new ActivityProfile( engine.getGenomeLocParser(), profile.hasPresetRegions() );
+    }
+
 
     // --------------------------------------------------------------------------------
     //
@@ -150,7 +177,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                                           final RefMetaDataTracker tracker, final ReferenceContext refContext,
                                           final AlignmentContext locus, final GenomeLoc location) {
         if ( walker.hasPresetActiveRegions() ) {
-            return new ActivityProfileResult(walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
+            return new ActivityProfileResult(location, walker.presetActiveRegions.overlaps(location) ? 1.0 : 0.0);
         } else {
             return walker.isActive( tracker, refContext, locus );
         }
@@ -250,30 +277,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
         return walker.reduce( x, sum );
     }
 
-    // --------------------------------------------------------------------------------
-    //
-    // engine interaction code
-    //
-    // --------------------------------------------------------------------------------
-
-    /**
-     * Gets the best view of loci for this walker given the available data.
-     * @param walker walker to interrogate.
-     * @param dataProvider Data which which to drive the locus view.
-     * @return A view of the locus data, where one iteration of the locus view maps to one iteration of the traversal.
-     */
-    private LocusView getLocusView( final Walker<M,T> walker, final LocusShardDataProvider dataProvider ) {
-        final DataSource dataSource = WalkerManager.getWalkerDataSource(walker);
-        if( dataSource == DataSource.READS )
-            return new CoveredLocusView(dataProvider);
-        else if( dataSource == DataSource.REFERENCE ) //|| ! GenomeAnalysisEngine.instance.getArguments().enableRodWalkers )
-            return new AllLocusView(dataProvider);
-        else if( dataSource == DataSource.REFERENCE_ORDERED_DATA )
-            return new RodLocusView(dataProvider);
-        else
-            throw new UnsupportedOperationException("Unsupported traversal type: " + dataSource);
-    }
-
     /**
      * Special function called in LinearMicroScheduler to empty out the work queue.
      * Ugly for now but will be cleaned up when we push this functionality more into the engine
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
index e17c6cdb7..85b7159e8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/diagnostics/targets/FindCoveredIntervals.java
@@ -57,7 +57,7 @@ public class FindCoveredIntervals extends ActiveRegionWalker<GenomeLoc, Long> {
         int depth = ThresHolder.DEFAULTS.getFilteredCoverage(context.getBasePileup());
 
         // note the linear probability scale
-        return new ActivityProfileResult(Math.min(depth / coverageThreshold, 1));
+        return new ActivityProfileResult(ref.getLocus(), Math.min(depth / coverageThreshold, 1));
 
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
index decc54d47..0d12d53cc 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegion.java
@@ -1,11 +1,11 @@
 package org.broadinstitute.sting.utils.activeregion;
 
-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.util.StringUtil;
+import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.HasGenomeLocation;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.ArrayList;
@@ -54,27 +54,31 @@ public class ActiveRegion implements HasGenomeLocation {
 
     public ArrayList<GATKSAMRecord> getReads() { return reads; }
 
-    public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader ) {
         return getActiveRegionReference(referenceReader, 0);
     }
 
-    public byte[] getActiveRegionReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getActiveRegionReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
         return getReference( referenceReader, padding, extendedLoc );
     }
 
-    public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader ) {
         return getFullReference(referenceReader, 0);
     }
 
-    public byte[] getFullReference( final IndexedFastaSequenceFile referenceReader, final int padding ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    public byte[] getFullReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding ) {
         return getReference( referenceReader, padding, fullExtentReferenceLoc );
     }
 
-    private byte[] getReference( final IndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
+    @Requires("referenceReader.isUppercasingBases()")
+    private byte[] getReference( final CachingIndexedFastaSequenceFile referenceReader, final int padding, final GenomeLoc genomeLoc ) {
         final byte[] reference =  referenceReader.getSubsequenceAt( genomeLoc.getContig(),
                 Math.max(1, genomeLoc.getStart() - padding),
                 Math.min(referenceReader.getSequenceDictionary().getSequence(genomeLoc.getContig()).getSequenceLength(), genomeLoc.getStop() + padding) ).getBases();
-        StringUtil.toUpperCase(reference);
         return reference;
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
index 73f3cc487..e96eb843d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@@ -24,11 +24,11 @@
 
 package org.broadinstitute.sting.utils.activeregion;
 
+import com.google.java.contract.Requires;
 import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
 import java.util.ArrayList;
 import java.util.Collections;
@@ -45,6 +45,7 @@ public class ActivityProfile {
     final GenomeLocParser parser;
     final boolean presetRegions;
     GenomeLoc regionStartLoc = null;
+    GenomeLoc regionStopLoc = null;
     final List<ActivityProfileResult> isActiveList;
     private static final int FILTER_SIZE = 80;
     private static final double[] GaussianKernel;
@@ -71,19 +72,49 @@ public class ActivityProfile {
         this.regionStartLoc = regionStartLoc;
     }
 
-    public void add(final GenomeLoc loc, final ActivityProfileResult result) {
-        if ( loc.size() != 1 )
-            throw new ReviewedStingException("Bad add call to ActivityProfile: loc " + loc + " size != 1" );
-        isActiveList.add(result);
-        if( regionStartLoc == null ) {
+    @Override
+    public String toString() {
+        return "ActivityProfile{" +
+                "start=" + regionStartLoc +
+                ", stop=" + regionStopLoc +
+                '}';
+    }
+
+    /**
+     * Add the next ActivityProfileResult to this profile.
+     *
+     * Must be contiguous with the previously added result, or an IllegalArgumentException will be thrown
+     *
+     * @param result a well-formed ActivityProfileResult result to incorporate into this profile
+     */
+    @Requires("result != null")
+    public void add(final ActivityProfileResult result) {
+        final GenomeLoc loc = result.getLoc();
+
+        if ( regionStartLoc == null ) {
             regionStartLoc = loc;
+            regionStopLoc = loc;
+        } else {
+            if ( regionStopLoc.getStart() != loc.getStart() - 1 )
+                throw new IllegalArgumentException("Bad add call to ActivityProfile: loc " + loc + " not immediate after last loc " + regionStopLoc );
+            regionStopLoc = loc;
         }
+
+        isActiveList.add(result);
     }
 
     public int size() {
         return isActiveList.size();
     }
 
+    public boolean isEmpty() {
+        return isActiveList.isEmpty();
+    }
+
+    public boolean hasPresetRegions() {
+        return presetRegions;
+    }
+
     /**
      * Band pass this ActivityProfile, producing a new profile that's band pass filtered
      * @return a new ActivityProfile that's the band-pass filtered version of this profile
@@ -104,14 +135,21 @@ public class ActivityProfile {
             }
             iii++;
         }
-        final double[] filteredProbArray = new double[activeProbArray.length];
+
+        final double[] filteredProbArray;
         if( !presetRegions ) {
+            // if we aren't using preset regions, actually apply the band pass filter for activeProbArray into filteredProbArray
+            filteredProbArray = new double[activeProbArray.length];
             for( iii = 0; iii < activeProbArray.length; iii++ ) {
                 final double[] kernel = ArrayUtils.subarray(GaussianKernel, Math.max(FILTER_SIZE-iii, 0), Math.min(GaussianKernel.length,FILTER_SIZE + activeProbArray.length - iii));
                 final double[] activeProbSubArray = ArrayUtils.subarray(activeProbArray, Math.max(0,iii - FILTER_SIZE), Math.min(activeProbArray.length,iii + FILTER_SIZE + 1));
                 filteredProbArray[iii] = MathUtils.dotProduct(activeProbSubArray, kernel);
             }
+        } else {
+            // otherwise we simply use the activeProbArray directly
+            filteredProbArray = activeProbArray;
         }
+
         iii = 0;
         for( final double prob : filteredProbArray ) {
             final ActivityProfileResult result = isActiveList.get(iii++);
@@ -119,6 +157,7 @@ public class ActivityProfile {
             result.resultState = ActivityProfileResult.ActivityProfileResultState.NONE;
             result.resultValue = null;
         }
+
         return new ActivityProfile(parser, presetRegions, isActiveList, regionStartLoc);
     }
 
@@ -166,6 +205,7 @@ public class ActivityProfile {
     private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize) {
         return createActiveRegion(isActive, curStart, curEnd, activeRegionExtension, maxRegionSize, new ArrayList<ActiveRegion>());
     }
+
     private final List<ActiveRegion> createActiveRegion(final boolean isActive, final int curStart, final int curEnd, final int activeRegionExtension, final int maxRegionSize, final List<ActiveRegion> returnList) {
         if( !isActive || curEnd - curStart < maxRegionSize ) {
             final GenomeLoc loc = parser.createGenomeLoc(regionStartLoc.getContig(), regionStartLoc.getStart() + curStart, regionStartLoc.getStart() + curEnd);
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
index 8dc29aa3c..273c2e785 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfileResult.java
@@ -1,12 +1,16 @@
 package org.broadinstitute.sting.utils.activeregion;
 
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.GenomeLoc;
+
 /**
  * Created with IntelliJ IDEA.
  * User: rpoplin
  * Date: 7/27/12
  */
-
 public class ActivityProfileResult {
+    private GenomeLoc loc;
     public double isActiveProb;
     public ActivityProfileResultState resultState;
     public Number resultValue;
@@ -16,16 +20,52 @@ public class ActivityProfileResult {
         HIGH_QUALITY_SOFT_CLIPS
     }
 
-    public ActivityProfileResult( final double isActiveProb ) {
-        this.isActiveProb = isActiveProb;
-        this.resultState = ActivityProfileResultState.NONE;
-        this.resultValue = null;
+    /**
+     * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb
+     *
+     * @param loc the position of the result profile (for debugging purposes)
+     * @param isActiveProb the probability of being active (between 0 and 1)
+     */
+    @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
+    public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb ) {
+        this(loc, isActiveProb, ActivityProfileResultState.NONE, null);
     }
 
-    public ActivityProfileResult( final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
+    /**
+     * Create a new ActivityProfileResult at loc with probability of being active of isActiveProb that maintains some
+     * information about the result state and value (TODO RYAN -- what do these mean?)
+     *
+     * @param loc the position of the result profile (for debugging purposes)
+     * @param isActiveProb the probability of being active (between 0 and 1)
+     */
+    @Requires({"loc != null", "isActiveProb >= 0.0 && isActiveProb <= 1.0"})
+    public ActivityProfileResult( final GenomeLoc loc, final double isActiveProb, final ActivityProfileResultState resultState, final Number resultValue ) {
+        // make sure the location of that activity profile is 1
+        if ( loc.size() != 1 )
+            throw new IllegalArgumentException("Location for an ActivityProfileResult must have to size 1 bp but saw " + loc);
+
+        this.loc = loc;
         this.isActiveProb = isActiveProb;
         this.resultState = resultState;
         this.resultValue = resultValue;
     }
 
+    /**
+     * Get the genome loc associated with the ActivityProfileResult
+     * @return the location of this result
+     */
+    @Ensures("result != null")
+    public GenomeLoc getLoc() {
+        return loc;
+    }
+
+    @Override
+    public String toString() {
+        return "ActivityProfileResult{" +
+                "loc=" + loc +
+                ", isActiveProb=" + isActiveProb +
+                ", resultState=" + resultState +
+                ", resultValue=" + resultValue +
+                '}';
+    }
 }
diff --git a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
index f7c564c74..57dd19888 100644
--- a/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/activeregion/ActivityProfileUnitTest.java
@@ -123,7 +123,7 @@ public class ActivityProfileUnitTest extends BaseTest {
         for ( int i = 0; i < cfg.probs.size(); i++ ) {
             double p = cfg.probs.get(i);
             GenomeLoc loc = genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart() + i, cfg.regionStart.getStart() + i);
-            profile.add(loc, new ActivityProfileResult(p));
+            profile.add(new ActivityProfileResult(loc, p));
         }
         Assert.assertEquals(profile.regionStartLoc, genomeLocParser.createGenomeLoc(cfg.regionStart.getContig(), cfg.regionStart.getStart(), cfg.regionStart.getStart() ));
 

From 872abddfcec844f2217fafb3ac3f4451b9cc844b Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 31 Oct 2012 19:52:25 -0400
Subject: [PATCH 043/236] Add custom TestNGTestTransformer that adds a maximum
 test runtime of 10 minutes to all testng tests

-- Closes GSA-494 / Add maximum runtime for integration tests, running them in timeout thread
-- Needed to debug locking issues
-- Needed to debug excessively long running integrationtests
-- Added build.xml maximum runtime for all testng tests of 10 hours.  We will ultimately fail the build if it goes on for more than 10 hours
---
 build.xml                                     |  4 +-
 .../sting/TestNGTestTransformer.java          | 37 +++++++++++++++++++
 2 files changed, 40 insertions(+), 1 deletion(-)
 create mode 100644 public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java

diff --git a/build.xml b/build.xml
index c6b1afc56..7702be7e4 100644
--- a/build.xml
+++ b/build.xml
@@ -1174,14 +1174,16 @@
             <echo message="" />
             <echo message="Sting: Running @{testtype} test cases!"/>
 
+	    <!-- no test is allowed to run for more than 10 hours -->
             <taskdef resource="testngtasks" classpath="${testng.jar}"/>
             <testng outputDir="@{outputdir}"
                     classpathref="${testng.classpath}"
                     haltOnFailure="false" failureProperty="test.failure"
                     verbose="2"
+		            timeout="36000000"
                     workingDir="${basedir}"
                     useDefaultListeners="false"
-                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
+                    listeners="org.testng.reporters.FailedReporter,org.testng.reporters.JUnitXMLReporter,org.broadinstitute.sting.TestNGTestTransformer,org.broadinstitute.sting.StingTextReporter,org.uncommons.reportng.HTMLReporter">
                 <jvmarg value="-Xmx${test.maxmemory}" />
                 <jvmarg value="-ea" />
                 <jvmarg value="-Djava.awt.headless=true" />
diff --git a/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java b/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
new file mode 100644
index 000000000..6a1a37de9
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/TestNGTestTransformer.java
@@ -0,0 +1,37 @@
+package org.broadinstitute.sting;
+
+import org.apache.log4j.Logger;
+import org.testng.IAnnotationTransformer;
+import org.testng.annotations.ITestAnnotation;
+
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Method;
+
+/**
+ * Provide default @Test values for GATK testng tests.
+ *
+ * Currently only sets the maximum runtime to 10 minutes, if it's not been specified.
+ *
+ * See http://beust.com/weblog/2006/10/18/annotation-transformers-in-java/
+ *
+ * @author depristo
+ * @since 10/31/12
+ * @version 0.1
+ */
+public class TestNGTestTransformer implements IAnnotationTransformer {
+    public static final long DEFAULT_TIMEOUT = 1000 * 60 * 10; // 10 minutes max per test
+
+    final static Logger logger = Logger.getLogger(TestNGTestTransformer.class);
+
+    public void transform(ITestAnnotation annotation,
+                          Class testClass,
+                          Constructor testConstructor,
+                          Method testMethod)
+    {
+        if ( annotation.getTimeOut() == 0 ) {
+            logger.warn("test " + testMethod.toString() + " has no specified timeout, adding default timeout " + DEFAULT_TIMEOUT / 1000 / 60 + " minutes");
+            annotation.setTimeOut(DEFAULT_TIMEOUT);
+        }
+    }
+}
+

From 386b45e94db43e5e68b2b0fec3cac8fd97d1d6fc Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Thu, 1 Nov 2012 15:44:41 -0400
Subject: [PATCH 044/236] This VE eval module isn't useful anymore.

---
 .../evaluators/VariantQualityScore.java       | 249 ------------------
 1 file changed, 249 deletions(-)
 delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
deleted file mode 100755
index 347ca56b8..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/evaluators/VariantQualityScore.java
+++ /dev/null
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
-
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.varianteval.util.Analysis;
-import org.broadinstitute.sting.gatk.walkers.varianteval.util.DataPoint;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;
-import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-
-/**
- * @author rpoplin
- * @since Apr 6, 2010
- */
-
-//@Analysis(name = "Variant Quality Score", description = "Shows various stats of sets of variants binned by variant quality score")
-@Deprecated
-public class VariantQualityScore {
-    // TODO - this should really be a stratification
-
-//    public class VariantQualityScore extends VariantEvaluator {
-//
-//    // a mapping from quality score histogram bin to Ti/Tv ratio
-//    @DataPoint(description = "the Ti/Tv ratio broken out by variant quality")
-//    TiTvStats titvStats = null;
-//
-//    @DataPoint(description = "average variant quality for each allele count")
-//    AlleleCountStats alleleCountStats = null;
-//
-//    static class TiTvStats extends TableType {
-//        final static int NUM_BINS = 20;
-//        final HashMap<Integer, Pair<Long,Long>> qualByIsTransition = new HashMap<Integer, Pair<Long,Long>>(); // A hashMap holds all the qualities until we are able to bin them appropriately
-//        final long transitionByQuality[] = new long[NUM_BINS];
-//        final long transversionByQuality[] = new long[NUM_BINS];
-//        final double titvByQuality[] = new double[NUM_BINS]; // the final ti/tv sets that get reported out
-//
-//        public Object[] getRowKeys() {
-//            return new String[]{"sample"};
-//        }
-//
-//        public Object[] getColumnKeys() {
-//            final String columnKeys[] = new String[NUM_BINS];
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                columnKeys[iii] = "titvBin" + iii;
-//            }
-//            return columnKeys;
-//        }
-//
-//        public String getCell(int x, int y) {
-//            return String.valueOf(titvByQuality[y]);
-//        }
-//
-//        public String toString() {
-//            StringBuffer returnString = new StringBuffer();
-//            // output the ti/tv array
-//            returnString.append("titvByQuality: ");
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                returnString.append(titvByQuality[iii]);
-//                returnString.append(" ");
-//            }
-//            return returnString.toString();
-//        }
-//
-//        public void incrValue( final double qual, final boolean isTransition ) {
-//            final Integer qualKey = Math.round((float) qual);
-//            final long numTransition = (isTransition ? 1L : 0L);
-//            final long numTransversion = (isTransition ? 0L : 1L);
-//            if( qualByIsTransition.containsKey(qualKey) ) {
-//                Pair<Long,Long> transitionPair = qualByIsTransition.get(qualKey);
-//                transitionPair.set(transitionPair.getFirst() + numTransition, transitionPair.getSecond() + numTransversion);
-//                qualByIsTransition.put(qualKey, transitionPair);
-//            } else {
-//                qualByIsTransition.put(qualKey, new Pair<Long,Long>(numTransition,numTransversion));
-//            }
-//        }
-//
-//        public void organizeTiTvTables() {
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                transitionByQuality[iii] = 0L;
-//                transversionByQuality[iii] = 0L;
-//                titvByQuality[iii] = 0.0;
-//            }
-//
-//            int maxQual = 0;
-//
-//            // Calculate the maximum quality score in order to normalize and histogram
-//            for( final Integer qual : qualByIsTransition.keySet() ) {
-//                if( qual > maxQual ) {
-//                    maxQual = qual;
-//                }
-//            }
-//
-//            final double binSize = ((double)maxQual) / ((double) (NUM_BINS-1));
-//
-//            for( final Integer qual : qualByIsTransition.keySet() ) {
-//                final int index = (int)Math.floor( ((double) qual) / binSize );
-//                if( index >= 0 ) { // BUGBUG: why is there overflow here?
-//                    Pair<Long,Long> transitionPair = qualByIsTransition.get(qual);
-//                    transitionByQuality[index] += transitionPair.getFirst();
-//                    transversionByQuality[index] += transitionPair.getSecond();
-//                }
-//            }
-//
-//            for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//                if( transitionByQuality[iii] + transversionByQuality[iii] > 800L ) { // need to have a sufficient number of variants to get a useful Ti/Tv ratio
-//                    titvByQuality[iii] = ((double) transitionByQuality[iii]) / ((double) transversionByQuality[iii]);
-//                } else {
-//                    titvByQuality[iii] = 0.0;
-//                }
-//            }
-//
-//        }
-//    }
-//
-//    class AlleleCountStats extends TableType {
-//        final HashMap<Integer, ArrayList<Double>> qualityListMap = new HashMap<Integer, ArrayList<Double>>();
-//        final HashMap<Integer, Double> qualityMap = new HashMap<Integer, Double>();
-//
-//        public Object[] getRowKeys() {
-//            final int NUM_BINS = qualityListMap.keySet().size();
-//            final String rowKeys[] = new String[NUM_BINS];
-//            int iii = 0;
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                rowKeys[iii] = "AC" + key;
-//                iii++;
-//            }
-//            return rowKeys;
-//
-//        }
-//
-//        public Object[] getColumnKeys() {
-//            return new String[]{"alleleCount","avgQual"};
-//        }
-//
-//        public String getCell(int x, int y) {
-//            int iii = 0;
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                if(iii == x) {
-//                    if(y == 0) { return String.valueOf(key); }
-//                    else { return String.valueOf(qualityMap.get(key)); }
-//                }
-//                iii++;
-//            }
-//            return null;
-//        }
-//
-//        public String toString() {
-//            String returnString = "";
-//            // output the quality map
-//            returnString += "AlleleCountStats: ";
-//            //for( int iii = 0; iii < NUM_BINS; iii++ ) {
-//            //    returnString += titvByQuality[iii] + " ";
-//            //}
-//            return returnString;
-//        }
-//
-//        public void incrValue( final double qual, final int alleleCount ) {
-//            ArrayList<Double> list = qualityListMap.get(alleleCount);
-//            if(list==null) { list = new ArrayList<Double>(); }
-//            list.add(qual);
-//            qualityListMap.put(alleleCount, list);
-//        }
-//
-//        public void organizeAlleleCountTables() {
-//            for( final Integer key : qualityListMap.keySet() ) {
-//                final ArrayList<Double> list = qualityListMap.get(key);
-//                double meanQual = 0.0;
-//                final double numQuals = (double)list.size();
-//                for( Double qual : list ) {
-//                    meanQual += qual / numQuals;
-//                }
-//                qualityMap.put(key, meanQual);
-//            }
-//        }
-//    }
-//
-//    //public VariantQualityScore(VariantEvalWalker parent) {
-//        //super(parent);
-//    //}
-//
-//    public String getName() {
-//        return "VariantQualityScore";
-//    }
-//
-//    public int getComparisonOrder() {
-//        return 1;   // we only need to see each eval track
-//    }
-//
-//    public String toString() {
-//        return getName();
-//    }
-//
-//    public String update1(VariantContext eval, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-//        final String interesting = null;
-//
-//        if( eval != null && eval.isSNP() && eval.isBiallelic() && eval.isPolymorphicInSamples() ) { //BUGBUG: only counting biallelic sites (revisit what to do with triallelic sites)
-//            if( titvStats == null ) { titvStats = new TiTvStats(); }
-//            titvStats.incrValue(eval.getPhredScaledQual(), VariantContextUtils.isTransition(eval));
-//
-//            if( alleleCountStats == null ) { alleleCountStats = new AlleleCountStats(); }
-//            int alternateAlleleCount = 0;
-//            for (final Allele a : eval.getAlternateAlleles()) {
-//                alternateAlleleCount += eval.getCalledChrCount(a);
-//            }
-//            alleleCountStats.incrValue(eval.getPhredScaledQual(), alternateAlleleCount);
-//        }
-//
-//        return interesting; // This module doesn't capture any interesting sites, so return null
-//    }
-//
-//    public void finalizeEvaluation() {
-//        if( titvStats != null ) {
-//            titvStats.organizeTiTvTables();
-//        }
-//        if( alleleCountStats != null ) {
-//            alleleCountStats.organizeAlleleCountTables();
-//        }
-//    }
-}
\ No newline at end of file

From 6185e8c43255d7213f4d4c464c71ff3b5e1a09d4 Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Thu, 1 Nov 2012 17:48:58 -0400
Subject: [PATCH 046/236] Allow large-scale tests 5 hours each to run

---
 .../walkers/genotyper/UnifiedGenotyperLargeScaleTest.java   | 6 +++---
 .../gatk/walkers/indels/IndelRealignerLargeScaleTest.java   | 4 ++--
 .../indels/RealignerTargetCreatorLargeScaleTest.java        | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java
index 109088875..c5a5dcc21 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperLargeScaleTest.java
@@ -7,7 +7,7 @@ import java.util.ArrayList;
 
 public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
 
-    @Test
+    @Test( timeOut = 18000000 )
     public void testUnifiedGenotyperWholeGenome() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-R " + hg18Reference +
@@ -22,7 +22,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
         executeTest("testUnifiedGenotyperWholeGenome", spec);
     }
 
-    @Test
+    @Test( timeOut = 18000000 )
     public void testUnifiedGenotyperWholeExome() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-R " + hg18Reference +
@@ -37,7 +37,7 @@ public class UnifiedGenotyperLargeScaleTest extends WalkerTest {
         executeTest("testUnifiedGenotyperWholeExome", spec);
     }
 
-    @Test
+    @Test( timeOut = 18000000 )
     public void testUnifiedGenotyperWGParallel() {
         WalkerTestSpec spec = new WalkerTestSpec(
                 "-R " + hg18Reference +
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java
index 4526fc0d7..2dd5a66fd 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/IndelRealignerLargeScaleTest.java
@@ -6,7 +6,7 @@ import org.testng.annotations.Test;
 import java.util.ArrayList;
 
 public class IndelRealignerLargeScaleTest extends WalkerTest {
-    @Test
+    @Test( timeOut = 18000000 )
     public void testHighCoverage() {
         WalkerTestSpec spec = new WalkerTestSpec(
 
@@ -21,7 +21,7 @@ public class IndelRealignerLargeScaleTest extends WalkerTest {
         executeTest("testIndelRealignerHighCoverage", spec);
     }
 
-    @Test
+    @Test( timeOut = 18000000 )
     public void testRealigner() {
         WalkerTestSpec spec1 = new WalkerTestSpec(
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java
index 3203ee100..e32afd06b 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/indels/RealignerTargetCreatorLargeScaleTest.java
@@ -6,7 +6,7 @@ import org.testng.annotations.Test;
 import java.util.ArrayList;
 
 public class RealignerTargetCreatorLargeScaleTest extends WalkerTest {
-    @Test
+    @Test( timeOut = 18000000 )
     public void testRealignerTargetCreator() {
 
         WalkerTestSpec spec1 = new WalkerTestSpec(

From f8a0a947e34cc7f3874e6adf43175cda5b2d7e26 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 2 Nov 2012 09:09:32 -0400
Subject: [PATCH 047/236] Critical bugfix for GSA-652 / Multi-threaded VCF ->
 BCF writing produces invalid intermediate file that fails on merging

-- New tribble library now uses 64 bit sizes.  The 26K VCF has so much data that low-level tribble block indices where overflowing their int size values.  This includes a to-be-committed tribble jar that fixes this problem
-- See https://jira.broadinstitute.org/browse/GSA-652
-- Minor cleanup of error messages that were useful on the way to solving this monster problem
---
 .../utils/codecs/vcf/AbstractVCFCodec.java    |   2 +-
 .../writer/BCF2FieldWriterManager.java        |   1 -
 .../{tribble-110.jar => tribble-119.jar}      | Bin 313966 -> 319935 bytes
 .../{tribble-110.xml => tribble-119.xml}      |   2 +-
 4 files changed, 2 insertions(+), 3 deletions(-)
 rename settings/repository/org.broad/{tribble-110.jar => tribble-119.jar} (78%)
 rename settings/repository/org.broad/{tribble-110.xml => tribble-119.xml} (79%)

diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
index 043e5e185..652f7f96f 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/AbstractVCFCodec.java
@@ -587,7 +587,7 @@ public abstract class AbstractVCFCodec extends AsciiFeatureCodec<VariantContext>
 
         int nParts = ParsingUtils.split(str, genotypeParts, VCFConstants.FIELD_SEPARATOR_CHAR);
         if ( nParts != genotypeParts.length )
-            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records", lineNo);
+            generateException("there are " + (nParts-1) + " genotypes while the header requires that " + (genotypeParts.length-1) + " genotypes be present for all records at " + chr + ":" + pos, lineNo);
 
         ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nParts);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
index 7b8224568..9c63a69e7 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java
@@ -76,7 +76,6 @@ public class BCF2FieldWriterManager {
         if ( map.containsKey(field) )
             throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
         map.put(field, writer);
-        if ( logger.isDebugEnabled() ) logger.debug(writer);
     }
 
     // -----------------------------------------------------------------
diff --git a/settings/repository/org.broad/tribble-110.jar b/settings/repository/org.broad/tribble-119.jar
similarity index 78%
rename from settings/repository/org.broad/tribble-110.jar
rename to settings/repository/org.broad/tribble-119.jar
index f8e312ad94d7c96eba2e9093a65a2470204911fb..ab456938aba23540eb8aedbac454841f220c6bba 100644
GIT binary patch
delta 34968
zcmcG%2Y6J~);GTPnUkEE$w^2iJ){y6QXvHhgqF}dp%V~EfB>Ne2ne<TMO3gL18l(p
zViZu3CK(WEil`upy<8RSdhfMfuh#<R|6BXaq@efxf8YB&9}jcRKD(^l)?RI|z4Psc
z@Z-;ghm;Mm=}sa$(b54^OYaG3XJ3N%+Nb7@zo$oaUQzDUyw15D3p?iK=MO9Fn%6Zs
zqqL@GZdLN2`P15G20MeDP37D6M)|iBb@l$Gt$KTsMUDy_C=+V}tk6e+4Z0QjIOv_=
z5YBGi;23y)2+{Q+o^8isJRij-yH_}=HBB(6+({ZubWj{kveQ*gnoL*o#}p?X75u0)
zXsSWeoD@V8?KGX=Rs5LYBqz<}9J3fiwL!D_V~&%;X)ec9*l8Ze%(v45gK7*~D9Aat
zdVbZYs><n}fpG;H=Hi;U)wN}nwY61?<_n4)I=gadWrroT)pI*6s+w7~tb;Gf^SzjG
z;MKTX;aUG}>p)Q@D1N96nk(W&H@Wv;qIbT|U<Cz_uC1IlXIN#;n98Yht5ltf3|fp1
zpyqzdrd8F{Rxg;3@7e={<4c6dlP4oYq+Ick2=}Z=aLU_4MUse7k+v~ox_R<njPg94
zu*9y<UQmtpb>)-@S+U&~?g_m!EVr8=t!l0yF-VZTYHroMs`<5x1v#cy&8?nSjXntq
zTvWAq$=q5&!#pR`Q{>bkBE_>u3--7Z({0I=;t`)kdMe_g(@jdIB__pF0$^TMYtT}Y
zuAyZnB~p?}eW<TV?Wvy}IZ!W36=XPvqQAw-CM~CHO}dV*=khn0)RX#g=tj>yLnA$9
zQLZSHN@xg2xzS9ANyBM`AoAQ35eXy+^9)ML_AI`=lgG0xTrNnlMP=ABGRFyu{x9=@
znx`$8S5rN=N{)1C2{P{?5fI#BrsTe-v;m%V&qd1l*NYCGX(Nk0XOiOFalkbKO1QX(
z2<hA}yqySP7?9EiUrWh`5K&xKwwFZK5z@-NWIITDmMh>81s)`0A5v|Uil+-D31bpK
z=@d%Y6iXebEn=c92$Y6+{uf8>D8o&HGFv2OH6~^wEeBs3<s#G`V64WRLInYc&*jG0
zhh^o|lVk7jBNT)-nXce^3PDUrJ+&&<kC3xGR?qcP=y8Au#M&@b&$yjY48%=^i0*{w
z&J;{V=xi6&*p5hSgY0>fkJ^GU6a~m><60X#kV_p=lEJn^>3p<~dtxHS%Edh?!Y`6U
z1EOsPbup-`LB%8t>PFp>q;vm!;MWtqv#Fu}i{6F(Z+i!@fj~2pO3{Vh0N>}z&bj}e
z_D*%qn!;WPm!KWi6wbl$@?b{08I>@Nu3<+gyqu>p!r`UJy-2gLiOChwAac+U)%Iko
zT;n7}`zFCbA?RU3i%Ez^E}jH6(yA>kUa-D4jKEuSBiWJl6zSRZ6xCE~AaWCpXok0r
z#u^>I8k;7Y(^}TU=-Ll3+5pN>t_SbHEYuNpMICOM*sPMk#!3PeMp<ja!YEGTN*GuA
z`?@d{gTccXl9la>_EL;m3(cyq25%NvTy?X-3u&w+gxmvYAVAuICs(2{XX6U3`5UBj
z#oQ<BdoV&EV0dyL83y-J6Ib*P6tHRSYmF6a7?oitlgZ<6^^X&|I*loz5A56M9k28O
zuGrzW&U&PteQY~*^R4t;Eb{C~y}(-Q!#Po(zMZ0)CHv^W@Kw6l(Qqo4+omfTl}HA{
zZnC*)C6>lbh`bpcTSdL_-H&cljb<7Uh?p`OiDGC)WAg{lDAi?-8?9Ep4QMq6AzL#V
z!1=jHOap?+c9{aP_C=T909Nyj;@D&whg6Nmw-hF7`Ya6n83X#W=Gk{P#`9oXQ~qa-
zXmwz8>VGs#fEb>>T?DfL@x*ufW6uG`bF%VbE!I<}8=i!WH<WCV?|t|sFQ9lWC}q0o
zSq&ihnTLEpzR6zYlSh{1Mc6FBcNV%t7z0i$S#s<$BoqD>d-BIcd0O`k^9&gpDnIxU
zP=4<CM~GG8mp&OTE7W$}&ps%L6Srv!O6sKt-ufDgiGRI3Vbd}^=O#xS2%7w>u!`8H
z{NgL3Py5X(Qa1YUn~H(f{Of@t>7LM85%T+o^}quoX7u&tmesfWLT%^u@P(>sG+$`W
z;+aM;OLOo1F!<sLpwJ1=g=@$49^wR*$aP>g+B-pT^PHHm0uB{&Tu1)w#E;I5bmVSm
zXmDcox^NWeb~l5%ADDmLav^$nzPkSI?%~waq*fGaP%mcYr3Ur(2?YJ9)TA(SnOLa^
zfsYzAzy!(+;*V5=27AsQ=;!(6hPU;>gEI%qu=%#g^;=q}BQY2)WP~7w%&lNJc=9q2
zrwF7(dNSPEW25k`IG9*Lx>!jLaTA4!n<-YTqG+U9bTW=&6n|%K%kTtJ;uNRXz~^~2
z2ylCW+|21CJv-ewZicMrpSVpyvX*CSAEY)&1^$Nj7gqpCWdKNg9EEvt6@sb;;-}S<
zQB|Zz>5dH#P=NMC!YxsH1UU{-5dPoRS!KzNj|{9Q^9dBgifroIT@)hjrbw}lqJ@Xr
zi4Cej%+vtCv1^4`t!b)jg%B;;slh9x41{bL%rvY>7D?KH&Sc>ks0bE1c!{D}*zpg7
z22q6vJ|hPs#PSb>R<P<=vNtVF#;Do>%ig<yCSLITuwtaU!2zCjrxUE_E(hIB>zs5C
zt>-uoKQ{1VBR@6;k)(S$wAqQr7Jl5vpZ5pR1N0z=wi>j}poa|F?nHy{veU!-uCvo4
zoaIse*kPxg{IQEa9&=Kdn;s9MCulc6o-}BW6SKLOAN!oNpAOh4an%Zi%K`uX7^89T
zrWH<liVhl7Z;;oCkvZ(dKt0XXAK}n5{5Z-zITl36>4cL`(z6CV=Y;rk%An`%beeO#
z!1cdq&`a4)$V+Dpdf86roEWWFxY}pz^ePv4oI82mpw|p~-Jmx-3sVygTzgZtEf^7R
z8uXSyZzIvO|CR&#JLtQodi5B{NP9d-SI^d;LuH;}_b1BY-6GaA_14p4$I>{H#?vyC
zHR)aIXV7~ly-$BLL4n7ZAit2W!OTp^3IhSzq}6n*$+X)iuHV2i`(PSk&^so5Kp*nj
z>?c3&AUesKfqIPR%x&HCKcWi|#*0n5NFST@5q*NMT$4WKug|#1c>3I+FHD+8^G#Yn
zw|nyITd!Xo5NFbt^p#0p(^m%l-K2leKMnfEq;Kgv&%)d9&KXbNbJ-u1lyA~4+>{>~
z#J}h#gZ^#O&-5P*h)KWDubzE#(gIBSjeeIEH;cBOYu7~EFEQ|s=&~pOn(m$p#nHC$
z^oQrWH3RVe-P3h#s%-()$d5N_k)9L#GY>qlHc^X)x-eK!=*+5GXkLqI%N8uIW(~_z
zd1KcDL3jOKxJ!Yv0^*PZIFG?oxQ`LRo4MLR^~O-*t?{rS?@$#UhHvK39M7E5A3qA;
zm<giLP{Ldx-GQ|6Gy!;54zYzrGhVl4S=k4nLU?M|jSv$(r`C0FPeLs7TBOlcGzD)C
zahsS7mZ{b-`77h8Rh+ntDyR}ArXto-E4bt|d~vP{nvS$8ggcSefG&dX&M;`EL9>J~
zAPV6zyMbjM6`$LbpdkUdXb#FUX$<xCc`m}3ay-jDTklEiI}zy?i8P5@Z;{AgU+cM5
zO)=amZVQNJPzmq}Xj6lZm3zecDh$BP^+{>~m|h1%6<A1%RJVuvfCZrYi`DQqb(Yf@
ztd+)c6ju<=oiz!4Kxb>YlS_K|+P@Sap|f(en2B`IXB!H*L7knaX{LG~X8m?EP2#x2
zlql!~B@KnZn(Wx{AoWIU06tRcsf{a@V<V_7;_a^V9c1V4+YV8?9?%Oj%8yWHxytUP
ztavYBvUB$BXt)<qIj5-=zv`j6364e^bjZJu0SnM@67ducG)WeTlqr&^qezBIoI*22
zDkd(C=7<bhBr<8a$f6rXjv9C#wGp7v%YhggxY$6t77{0OIrpC?gJb-Yk%sAOU<BjA
za?|xBprYx%vAZ5(toV)MHCHo4&<F&?H-MKh@+=U($-Q}j^nKi&V5@8Gx6mVED8k|?
z>cCNlDG$<a_6roaqu~M~TAilw?89Je#2@vPzu7ZtL*Mp=Ime*_xuC*^yE@fVBp0ox
z&h=DOPpzTcb}7$A6oA@tywufmYC~F&0*ED5=x6~@dZ~IZryJEf7V5{HSW}$NZ%*eo
zr}G=5?n&RcpmF7`0O~D7nnpVzZn!ZpHo6P^a3QTgTr6#aRLm3rUH;Rsa*TgiTa#(m
zFR_4A8)dqv6^Qgr!wkClH_DD}HkJZ{_A-ET0~<kwZ3?pU2o;yRx_PPlam3ny7ydOi
zz1bRb{;7VB;ra!WRXiEyMxIN&(|8n5lhYbZ9>9Ql>al}b4Lw2y<*vw{&^&t`--l8m
zkX>T1-g#tpZc6IMtC}fGENlS0B)RWtf2x~f{C&5REo2bZToCet^`LCIr_>qlZtBOP
zX8q9q1dC%BmPKW63oT{-;_FW--rRzKta>W3G^|FjU@}c4DBw?E%Q%YR+I*1L&{}+H
zbO&9HcO2qN!R{v`oQd}#ki{;*s59hfDfYB-ZS6P&g6NO~-^x0pby~kgixw<OuBlwK
zxO)D~WH$08SJoy^U0z#tVCUAqIdvA54&3_K`EVuXdnO$#vgG|u$7UH}X}zbGqQb_1
zQ67;I={f(a?rN%(RggKJhlf90d|DSrJ>Aaa;W_tAAIGy;yU$@-PI(TU87^M%7-wfX
zj$s0iL#{pQx#nz+(tT2XFTW~098@tJ8dRbC0|!hw%bZvj%MH4gMgHrYh`HW@m>cYL
zqZ3%af<IPro|~N1j&A11Dm&f6@6~p?)lRo@;_aMw4Tsh`F^YE@be9v@bGMz=+36n6
zyq>>3PTD{lIkd^3agyU9|7~W`FOo&S&2&FM9&plww3Xww@#7(eyWL>f??I~Lm`Awe
zqjuWC@12~y%b>>$dfcEVJjYHa9XS2+co7&_yI@Y$d>J-P#L8a%^-xjN%z%o4mC3tw
zr|9Ns{mONogRhKopa!(tAbBA5)m&i!9@gq^&zSQy%}uZJ#CZ|qIo}Dt^VRkzS>NeI
z-94qRrB!vJJtpm?8WVzeM_3=Ps=YoVIXly!eI`quCX1P@4V9QwPhJy<RRbJOHR&)-
z5)@Fo2<BiEo4RPhHH)hjnON?VOnRD*aG3!HJ!8^QI_Am$z8#8mqT?vr1}ne>TRLgd
zvjq9@lu6IiX_H={7g<tjQ)tplbOyaJ=`2k(>18@+(kmQ#mCfYmO?r)9H|Y(QAKzjD
z@hY0^xo25;GS>^4@Lfxw^a|43Ppz76(%-mAy{N>q^7WH#{%;JgU0WDlVT=AhwWfFT
zl)TZoZGf`9!-SM!S>5MW&Y#(#&#cNtqpKD!shU5ns@tGUS^0w&F3)~q>v|yc&3xhU
zM5KF4-&!j&<!f)+(&e7FY=%dBJJ-!tQ>Kj?PZL5cmvn-uM<}aAXT&pIv1jck;U$eZ
zvWE4~>JQV;C`by?)D<C1p0Pv3?}jgi(cK559632}Y*skgys!vE90)iF)z4)dBDeh@
zIhvqarr{(&O@=y}0*c#4LAPYMSRZu2f*{lr@C6UQubyIGJ<5#cWFly_)H2njH-i({
z2q(}D;>Kbw3%Y(l+d%ScDBcSV4T9FWg#yIhY-KmzLXp`n@Z^v!WM;cU5!g&Y%4|1Y
z>;?C5WJ8;EDJxsJMTU8-EMrn3#;ha8sR+xo3#`OlG4kErc=b?COSbB4Y+5g0%esP^
zS)y)#fPz)jj5Abo2Uz{qN)YKl(%0F?dc+58SB<q54A0gJ4p9W!6j;HAvCaYU0d*9V
z9l)`j1CCH+d2Xa0=??HxRB>Q@U>!we2Nt-ZT`~0(TWrJz#v1h$7jM*2T(*HSt-ByD
z8gcOu=s10EyuA)$iyh{*AbcYQ17#0Q-a(F7!&Xm;o$ZnK(A;el5*xUmk{|-^pajHZ
z`(hv}BE}uWscjIB4+XsN>`;4wRr?`It*5r}p+R*NmId1#S_Bh7nwQd1H@ZHaW<YF7
zz^L&M-vopq%%qJ5fI$t!P!9pc9}e3_8I;|TFaV6A9H<?|;Dx=gu!n%5je?5c9uM9(
z1^U1Yh#%FEhUU<0$ngsxQrAFGTuAF6DN9;H+aV$Cf{gqWWTa;x2|WuM|01NH*CDFB
z3(4XO*aUeUtb(L81o%4;jWGb(fKD<P&pBw?5O7!nP)lLj<kWSP<5sGY9iW_ODMw-=
z8*$)kHS+>Rzd#>40--}dbVjLuwg%}MtwgmXFyk=@1)$s>rCJ3-Xd;Mb<|T59KiIf5
zUcj7@X3&Ij*fxX&7K^BUh;oOEuwTiD3}?XxW8+`;K5MOwEiTQM(VeQ3)^gMu+o}0*
zl|_`v$Pa_2cpZ4lma4~dLIayHF(4<Du_>Z+U{0t5f%XxdjY#7twJWw;FGo(Ovmn+U
znsbOU#^;0v6~sEMaI6k2@?7_RJ9k7yXEV}_wMPaqqzZj{=iuB(Gcq_bATlu0Sx=cN
z>!ajY<!snNfvcQiRS?X(N30zv#+JdbPGBny?PG({Cg6_5qP!QZb2Iw11*3OA6x0VX
zeOrMJ+c5PHVT!k-L>=AdrbqC(1HrBI7_3=5>###1Q+r5Eyuxn*%1uEi82aZxsz4}+
z2cmJf9#<2`boKE@E%tFB<2Xz-#|=k09S|P?Zt}E7qnXnY3PeNKlDi5)1MOV}r0oZ6
zhk!ye5pn>)SSY6*flEC9`4rfInw-#Rh0<HGWjsf)hC3VoiAuy}3Kn?u6yjIX*z7zP
z#aORRz;9k7I`qVpG6i>nAZEF`;hA$KUhHXx7i{NYCcsQ$K5&((E9<ntEg806M7eni
zEUJ7!6|;B;qc4g<uI+<$XCdPMOw)s#RoaZELmEe=3eMucg5y@Cxj9z<cR<+B0H9uf
z-5|AJ@P7t}N5KYC!mFv>65t94u7MoQgCrnmwx|4(*p|ZWfngsW({vVv545@%9%O~m
z|8YmK6{`Pby<LgS-l0N#ZmOW%xk0p%HK8I<P7V=HWniCl@ZdEj5RiY_EJt&Ch;+L|
zfZP@$TDc*94se3e4s?R!4zkl=CZ0p>G>qTFIdTLOr;+>^#jPF9k1_lhYo~FXYrF$y
zoN|6l<mgHKfJ1_y$#%M$-|$Hg?2{m<k{?r@KyCOVOoKl{BEDyEBpeb1J0u9I<_C~;
zjzM$116qj+VacpLn`Bn*M2y@KD*8mLe4Wq<^?x2n*RnheX|ue3DODzgiLLH-Y*@o?
zE$|YG=mHa{3HJuuRkhhBP-iix6+uBYLG_pVY=iA+xyhCT6PAD*P!2hvNE<ZI1PfSc
zg8kreGp*vak4&rSI%Mi#Ld(0&gp!7+HT+mh*BR9I4u*WE38Hbg2_A5dN$dIH;T#+I
zv5{_&4;6?kx!NVt`4KL+CF1e9OGK;vetnRh=1mC~w`r*<%33WTDW!tR3Fx4VS?Owh
zVf!30neydG5u42$O_tCW3F;B0jM;vnEeGNV3vDb6a)Hn&5$)yz)+&uvnJuCp2sX-@
zIgR(e(N7*rE{3?7sGDh6Otwphl(*TEoC0CtS2B^^l8G2{h|1Rul(8p>aW7SaB?84*
zHSoq?jIRn8D&wtm4T345pkN&uvt-pOPJwa-AmQ`uW1+6Cs!kSj9QEYXD|$ezvBEtQ
z;<u9l`9)9Yd4xncB&7HRFQgYeR)>rO0S0foFLS+{g7FF7<zi2P&E(imq2MO_DGU-G
zr*MgI4zLIm!2w5sD-vDBMlpXm5gpBeAOvDKU?LF9fnZb;2eS>NDgF#;{7m4_R>+nZ
z>qaOPp(Kt9^S6qUWA&y%90&D}K=}>Odk2JzNC*~5U>{kqd36ElFM(VzT=9f)pkU*H
z!Q~KBCSnU>61dz|^f0*felV?*U{`0rsNMh*`UI>d8&KWMc`&Ap^ZmJ+0)a}-19)3Z
zMiQL{1#XK8=|Z~|_14t{6;XiWGE9V`1V*zi6o73pgL*LIfy|E0um+TE5eX|>FM~=z
zX$ve2M8z^ZN<=h6!dCwU2}>CXRY|6?jD#(ihC(?FBjQKq{|<*;H8e0EUO+yBcPRC5
zNt3-tW5nyN6kSEW#(!~&uCCjyJLItp;jqZ^*$h!0Wd$>OiKQ-8>in9G*Yt&y$G;p^
z>f%6QmkDEq>3w&k7-Luk^2)_mzF>T%*l7`Z`C_%`toC%b4*p?SA{JVrXIpiwyf|B=
z%B!E$1LcW%!YzCD(jvTw+h}d#dY8`=)2wYzd9g-xvMPvOC<Zu{A!O9_oL`56y${vo
z)`ztixp$A=jsZEm_1B4En#xmk_3;1P5=?YUunfCV^l<Nk_~-;?OmJdW%I!?`W0ez{
z1K*SR<7zw8{8*~s7|{G|<%wo6(GQ_2J5&4+ruZQ=3(g1pp6!IdG>6N;1ud5S&mf1x
z5sW_;I+-%_qt*%HIcO<Aui+G!Y~V=cgj9V!KW=a`VP^KRg2AnXebLT_nsi!au%RZC
zqgESqD@+WqP1RZkkvH8U!E>S3F5A1sqw;&V80vAn8e$Vv+Kh>OzgjqC#Yzz>i&uz!
zZ7qWe9MS--J$81L%UVBp+o7KSi3KID6pwVwXTAV&hglFah&2e5n85u!W)gQWH(CrH
zb0^(pg4(v{yglUkIigrj+9_hN`-5$1+4m+fwga{lO_=5Gfs2M^Wb2X4`~*BEpFGS@
z5ZcHVCTT*z-OTn3IF;qoW`nkvbRXSs^7f)ZTlr&~3CVW53A#{c&?8*>QG<4H%1#s7
z&totunS|X$2JoZ_Wuw6J(9$H??`9EW%cs3wSXq9s#byIBcnz^zR(lmfzwi$9l)N1+
z--{GxHd`7jW|R!#l>*Y<WI^bxEDr*NqbsJdP%$G<!`p4#429VZi-|};ksCmISTOBi
zu^Xk7gV|VohqsXo*3IoM<^%_!s6!iOh2A2d+y=El4x-}%xCqd$Sk#@4nz=m|zw3Zl
z_Rk=i+kxi?HCx_7n#*bnJ7=LS8tZ0Q(*j;5%!IO!P%ySSLSV0N#fE~g>|@{?u5g6V
z3h)T9kq9_49akE36j;SDs5~)8D7GB-`#4#18>nCFSUoZ@JCd0h^NLZc0>r97s4|}^
z_GK$jI(`DU1=M<|+)0NhS#dkwhEZBO2XdiFGas{HFyFF5vAUPq?j43rgtQ}+-q_Z5
zO)YeziD*p5an6Ljo4?HD&!Z;d2y|nR2|7X-vD9qf7_5|s;@Jy>Q-T5PgIVnhlSe<W
ztN~zI1Hq04LGvDnwLJu-$H1&N77S?|AV4|)7$W88;95K(SAoZsLEPyHU;*H6NdVXj
zl0jEUUTnqD0CFiR$V0_LA=EJq*8wh7RiyzIhj<?dv*e#vga!ditywLgfDR4FWxxT$
zpdnb+Lk+_I#Bc#+?N>@{Kwj`FPGd$k0&Bettf#mcrUr4BEwQP!M|>EaKaz1qWg6wf
z+7|ptA)}@Ss#&SDk*ZC+Vah9M42Fc?<E~^zYs8lhr46v)Utm%8k2Z*FEkBvgJ?nN9
zLRjNpwp>`QwU!l|M6kSTpC0IyQb3-3!h6GG;%HR6-!ZmM)bYs?ss_r=9tI?ZC8z^7
zrvwepJxuw$`S-l6JSmdJL-MARB0C`|)_#}@cq4FLu_G31Y`$Z|7U<gsF!V$D;z=<<
zbdf2~iVp5u#XW@WOxg+ujfcfX`cO!^C^myI5p-B56ro`Ho_;_ZDzW^I!BWpQoQ>E5
zzaC>b1oZ4*2(e@2!}JsV8?&DYL(I>ZTOH!aSyh6?&QGArOT0PsAMBO0-`e~1&p7%9
zgZCVwc}-hDezkzeub&l#qK!A<IdRZ(mkQ~AZNhDkd~TB$PKm;9J(x9RviDCGKe7$V
zF(}u`rJ;HAy8}P+?3B-MI1A}iXi!IkI>}+riv;=GNWH(jWtR=k$E!3`zWKbED{YTy
z-9wxUYG+j~>OFVCG^jZp<+{@%KG;Q_mB$b}2ZhQfKM?KY#nYmn9X>{EIP5BOUl6Hw
z5I_?otGmy+KrVYhJll#F8Do$O`@Di8<ZWa0<UWyzv}~$jXqO8E9FFcpU>pdx1Rk<h
zur9>l%^|-7P%Ew-sc`ECB|86-tu4HOxrkp;w~7c@*W$P?r0|v@)`;3k(||j+%H8;~
z9DXdkPHYCj>8ljZ;L6m%5LRLIxIx{LP+Z$^KNB1sR4D-H7%0nL5~=QkWV38IP2EaE
zm(sDqGBB-Kss@W}cWdlcH<({T(Y7SOMwgg?CpR{mJ%+j20o>#(Tb$Rd#cYI$<#7_=
zR&DU)Cf|)tbCV0Dc}6793}+xY_+b?oI@AnhJ+mnp-I@fp+jK%}IwP(~LANHMu(7p;
zpgyd|MWWSdm>3qadE9jaJ|4*irI!d<@~{?RE%nFF_?P;P2ljvUJlwPSmv1fgvU8%1
zyPp&0!o3by4<WK0aH6^D<0+0g$d7t{c=>UNA8_{>$?iTQ+1+O(yZel!qe0-1mb;Ig
zPBM&Vomfq$*a66L__5IoFky1aOLjWLiD%*RgHsA_u5JvSGuZJbkv)$R+4U!p&O4xj
zycPuW=^OCZaiY&iYs+bEt+cne+}loihu#gM_vn3o{Ech)fD3&X1aFrMLEsS|^WzhP
zKIKM!#%Z6!0mn{XIx*5;K}G+XJ&^Wt!rz17cXPp@e+Getf5Z8|W%%DYnf7olH059L
zwsC@o?6uR+{QeJ*-!B~cl^?&cBNtqFerKZxT0Dd<@#8W-{@_Q0fjukO*vM8rwGsiF
zycny+%J#2{r9$^U@~T*&>j8rH%axg08@W!`GUWI-L@Sx_jwqLZds9?6Q5^pR4Pkiu
zy(LbF0QwqD_B!7Y|3vtA^i=--u6QqKwy>K}-KH3jN>9FE>i`$S3f{|tUB!eQ8;=<@
zlYO7Yo3II1o7kbh$z&^y$wbnmM_I6g&xawLctZsl_P&Uz2ok0tf=vOtkSRh%n4qZs
z{YuA-9o28j(0;=Qj2Sp(^q9U=`t}=V3YRc}r^EV{4xi%BGOl#gpwi)e`k5k}6NCNn
zeMXEOK8Artc-9Sz2rxw?jhEj~vqj3hF|gvCIIJ7dCahyPMuEL_Q^5RS2xN<aYuAb)
z$kkJemZ5z#v!{uJBhjX4P2&v_Z;Awwh_Nz75{e2wh9Oc!8&jl;wuVSEMY?E*aWqAS
z$TUTk$mX*Z^5VO~g)U=b))YCkgsJTKz}X@fXD8%qQj4~mqP^%~V4Jg~OpA&$MLwH*
zmNM`HQD}&c+>uU(=qy*(*dje86T{@*Iz3Si-6h(}x@nm0V;_oQSCQyqimvb#6Gft%
zDY}aun?4dzuAZWo3B5Sm6s4lKDf)<PnO6cZ-|q&II|n0Ad><{`W{AGB;#Zt+xoFec
zd3Sy!KGS5#XwC4fniVBIpNJ)%19Q?mQ$}=^ql&Z?nfWPNdtoU|f90QwHIqjIXW2f2
zgG)UBcoI)M;@BxbgVIloIULP6quxLp^#<msH;_lYfjW4zr`BxjVF;QlOFk3DVxGM2
zGtn+>KGH0Lo2g8^TqIM~$-=Sa7cp19@R?}qo`)pM=)=-m37AnS6g={=q437y;f$>)
z%>GJ?(MTrHzzjUuIGuwxr)ipHfo7-{f9a#3SsY69K@&V}R>>~^L(#r}S(N>uxWC&!
z7aP#uqAx`DSZ=Uo7rD8y!8bzzosOKX(Qr1f*uan`vpx#$kcY9j0UBulc8e_bY2{jh
zTpF)PJm*U97veB#SoozF;N}`eVdi4MAYf1gj4ogfx4@(rfWa6*x2m{X!4_^)aRiSX
zHm>u(osC1cQ_X<}INQ|B+ZyY)VkV>h+fgEjv`aL?pf$fk;AZ5A)u4Y0UL4l2iP5Mx
z5D8DqvaiI!Fz8fviwE<_w;{W{?`zT8{RnpLA628sqq7tE!u=0`;MRuCoahB0G`90;
zSUqO~KF8gIvC~vr(kP(Pp!E=j);FMGHiJ9{ZBRZ}>-9@~G-l&(V7`ic@vwmxL%~y0
ztNR0ZQNhVT-%i0ts8hMCvzLmFvriOUp^5{LSS-*&sN2QZMN~5FX4=Uih_xrEJMUi}
zhJ^}xN8)Dc<oh^51&6UWLbrCc#|FgM)8{1I8e?B@h<eRAL?ymF^;9|@yO<Lj%eUQ(
z4a$HkAzWDivQ2NuHhs{(zOH`t)W4}fJq@U*f%P=VOM{Q^Xc!Y4!0M`w?HGcD!!o)T
z!Uw$Zpk$Rm0v-nG*F6QS_<Ho@4)kCT*zbF=%=s~C3bHPS?r%r-O%USm!=Pm$7tagN
z><8HFx)*N_JplaGRERgfSho2<VUi9Q$FbkF4Rgwv%wmfL?IA!z6c^>4KPwwHq1BYe
zQeeboid4QN8!yoqgX%i`M(MvHqeY1xK?sZpPhQ-|@MfE1!4VpQkr_HX7ux%<oc)yf
z2!-Y1ILGjM8i85aK_UEAW_`hY>(-mb$da^PMR(6bUVj5B=bMd)$$o_X#ZrLoFiGS%
zCX&2ZvVgj~&^114b%R>K+*ymjasCy^Z{>OnId}oVo$?a384tq<l)@)`z1j_6Pa;vn
zD38NC3%wd6G#Xn1BN^UT;cbKAwG!*3I|P$0fewfjKSH%>=2OM)P2RX~#0ATA=e_*B
zkiyQfD+2aR^4|X=_<w93=^`Jl{RMCrEQE7pM@kl*s4aNY(LaQY?1}B)UT}{uf!BX2
zwHCc8Ui6XG4dPJ3AS4Y&(hzDRhEkds2JiXd)Jcq>B2l)nr}n5AxAAjrm}4T$>61{y
zRkEAVhKLGztI&pt8S*Wm^%ZkuoTd#3sDZ$;P|nb_wFyfoNL)jqVi~1~<p6gBbrIK5
zS8+WJ5jV;Zn>H3qa=J|$r;MSYKh$6QJ7@#{dRbEIx_3m141Ed;YOt>TXcO_9>{_0P
zm-%+BCtfvnZJr|m6PpMbK3;xd*K*XDw2oN%p*PN<ed7dtdjyKKm0csW?xKTS8ll}0
z##^w~A-)U{)=cUm!y~m>qKCXbQmcq8!9plSj^0pw`heW_1s?a4KSXMC#Xxy=6u=CX
z_eE*f1ebv+jRfs2gK$1dCPiy62d5!_I<)n6utl_$zeH;tmBsYP`|Dqy(FW>m8_$Vf
zAmS7Wmy~_=5VwFCa|%6Yr&CULPcrBQgGFa}Cb9l_)<(v8ta6=RHrQj{NoV;1>3J#(
z$5ZLNlf9JSEgu9u0)jIfl{mxO@LG3Z1-=WfcPB6w@ZaY*MCK^^z)2s{M+RN6)5oxl
z#w&T*pwFCG$Dca@^b0;a42gL%eZ`+&J3y5G&Y%C_D*wrkZw&g@&a(1U`rZkQhQK_P
zehl(CsI#ja<mJiirT&XSzZxtlcVaQQBTLAIbcve_F}cvqO@*l3jT-R4*<OTxf}rfA
zUP1#HVdIC+j{pD^fkD6=14AY3LBb)Nb`fM3CI^EJ5yClI*+r;bgc*?P1daqd;S(Ii
z>CtGMh~bvK&7oMkhy$!-(HcV{;_V`VAt%~Jl3gSlA_Ze4+88jT?~c{lgo(B|3_NK5
z^r~fJme*84nDj!9H?^oY43UnB!DvGeH(0#Pz|*V6Yq_=n0q?e7yd9IYBq7#X5<cX&
zaCr{Lq2!umZBV{#-r|`cAvHc{d&n~I!4I6aWKo}4iy-#Ttz2BILLrD+gd@rQWk`vZ
zE<Z`uhQ|kSYX7Rr+9ivs;E%er8V>ta(~*6_yam!dOpBBoUKg>Vr)+pv#BI7y*ge~x
zicRU(vm3_A5Lr-c43Q02vl##}X2Iy%MexfM)WLHyAp$3;?Q;Jcw#Zn6Q&j~)(`Hpo
zn=^XBB2*%10Cy+TBVSc{wajty;tV}*(~sC_9-g9wgh6J5&eMM8wHOeH)gsrkVqc<M
zo1&$JiT0-GAYj;@CGz=EAWuf=aq`4g5e-Sckmc!4qBB2=44=#l!$vZckZz(ohT9Z9
zsG84VM8WP2u~a@ZO^cV+ZM4=A!Y56a8kQ{G7gl3W>EKir$kF|pV0pfc*4Z{o^pl|j
z^(dK?swHL568%jvfK?#a#rbisDF)(G45B(Vd2cr{DQMIX>;mkY(@w5S)jCBE6Q$ht
z;iecN$}o<m7%4`{%6p;0G)^&;idkYbL);-N_iG*Hh_+gB@GODDC$q#@Q;ZYiTgv*X
zzywp2i;k=pVJCW)m}rVgVyG#u5|d4FwU}bEamy4Hs9#iyshCJpOcPUa%5u>BnkBW#
zGjO6idES!6waK$8msTZDTQI-2dghV^OBN?jQ<$-M!Mv))?V*b$kFLBX**Ck%rkF0O
zJo$ITXP6Kpo)HwDJY&J!xeKnTnw~tj635@+sGE$%nOr%4dNRyZ^OJoiOCqZc)}>}x
z!!gqovqZHiW{Wwdm@BaL&ic-L9=)Do0lsogQ6nIj4Qtle>cz?P7t|&%R+z>0Eavkk
z$^JFbKG`|GDtU3`HS&fJM4YVsK%^?QjVY*ZiiJX!#@eD}Mw)gcB)I9YJ8lkW)b6|=
zr)lGCa#=5^*ef!%f49;z2JzW=V^-zj;Z@6O<#62=EjMIo=|&*VzhDV@O7BiJgLv>N
ztLHBsQnlPOvMgV=zAU=Sht6xI^20a5NVfbSIyvnA12eL8uAbtVbf-g(%+$M1fn+)v
zbe#=EPR3G%S|M!dGPCi;TjVTl1Z3fWyyfvY2cOn1NG$kxTO|YeO>BIcg!L}Vab~u+
zK3mHYZZ2^xcpsrE4Z52S86prWe=88|cBr-!RHvSR(--36U_clH&0sS0u<6ic=0Z=Y
zg;;Y9Wb@?^ZZ<$Kk+8aNhIIHSQg%XB+K<$ykoOtnIEB0~An!}a`x(MtlH0ACSOZSk
z7p#NTI~~=;0l*>_aX{i(Qwv1l*3i7JLF#6(>SZdl6J1?~6deGqR8~*FX0HmoHGqd}
z1x84Ry&qe6hzZ2Gsui>nqS8&sF`9z^a8o-R7BJLaUT|llH#Xm<;5Qumc|#1ktpVd@
zH|X}pJSYa;1`o~|&)d%W70C|XZbPj+Lp@Lv3%`!6Y`7$1*Mx;i4O*{X#awIk-;xI%
zygP>^o+NC-0S3dm#|Mj#A+h-ERIuU?VW$OO&4F!FV`<s{oQ<%YK3I$x>oKYtpn4ix
z=x))(w7+QLLkjADZ=vU}TbTA&E!_R57AA3b|Eh`DX5@~`YddIh3P~-U{LueTYe0Z~
z<2A@PUT~((!Dds&UkuoD&6*R`IAB4I0~Q1{lo+r`2h(&P+&#ccRw??*j6AKQ8+un>
zIo}GAAFCgt0-h6_nqX@q@D-rTs}Sv8!*=!S5MAC-Ray#v-^MEYHdfi!S0&)9*|MQr
zEbw?Udc!yidovrTw?OC87{%oKdD=KNKG2+z{tlwwLy7mXnm&Z(^COi15CY3bun>RT
zth+^OZJ+>T*ZYBW?5EI)_igd!dJ@UjfTB>Du{gHks3h-B-FAr=!<yfa`yu3E`#g@-
z(grsi87+igh@pYxO({pH8}B-HAD){9y{bn&^=uj_Y#*v|vT)b3;Cur(-@=0PooZv2
z1*CC&QX0o6g?EzCMhDCpkATS7fIJ)B(DzjRS}qC(q5hu*&M);U56$pWNiiQIE8R?Q
z<JG$;5YIlvdR8t^SKnCuF!h5DZ_hnVCibKdhLv4l^U2QJPyM&hI;h^*lp3I63YNJB
z7CVnn6g0``Sh$78x(2cPO@Nzo4A!764>?3b5d}ZzD>4p4Ec*S|iBLk}#M2^mM0u<e
zCvVEUH1asNfEzl>M&om0^{BwZG+L)vefW-sO!Q_<j+=XSn8v~sucn3Ri{;4k6Nc(%
z5X}EzqJ9Bc{S|2OJLvjlpu`_kf&*!g7HJ%;%M+nR-Xv_;P{Rf%%))0e-|vVZ`d9?h
zXCj2Y6ruDF5vHbxH`peESnmX(<vV@cXLu_g)HM&J{&7UIVW1Am>vfo?WRU#bP+l1=
zer}x6pP|U^#iViEJ`64I?f(R3@g!z67&QC<AO+Atq%-@nVz3RbaB~O+{z1VwO^mg5
zr~#_A8(0Y(J#5g^h*kd&F<B-m!SfQ`foD(ySXyB7d|Vpvoh+{S(03wQ!EMU(O>&mP
z2+WRa93J=@FL1PEf~(y6ny9|&$sKnEDHf4xoGi|p2a^ix?oPF-XswgyF&s?HYqL+&
zA{dhZ7YFq4+)mRF02(3^PoUst8hM09z_g>#&NT^D`_rsu<s5eV+jSKZt>Or%Hd|29
zW}u`WpDb$|BZ*pjA`Z%WYg#N4o6(2b#+j;Bv%;fNtLD+74*?{82dT0H=d1ZTA?9{%
z)Yze;;WYUWU44KexhBz!-Nj*iPa;l(vwJ1pY{tlgI8gyWQ}Lb#GsbkhtMHCT>P6ts
z2+9)S&CM9>zS`+q`r~7h_>eRdWf^%RZen6_&ON4Tac%o)&}$Z-`QzzhsG@*RKPAy<
zgT~ltEXzjY9BAoyJ5As>dm8b3lEL1kP8!W0lbvkLwo^qAS~=C{SXxCh_!ACB)7Yi7
zimLf@b`ZKZiyw30yBWaFg#2%~6ZC05=UTw9__`~D7CEpi7W1PPhxUNgmcZJ(YPw9x
z7SY+QmdtOA=?@`XP}ub9#Y+C3&+@kwXU9RIJ~Fwl$n^9v`pPLb-RgP*cOB|)WG&u(
z7D4H%;(MVg7b%@nR^*5r&*Ie9((?#(9c_n}rIb7QevU9b^KQv-x405(DK=_HErH-a
zuWB5$LfoHKIv*l-rMfW-&fG&6ESlMYE5b3M%BGYKzBWWRwL#TB*4Hru+d|EhV5p9R
z=FhKM)CcorTU@nRUWkSiT@$1wI9RyWYoRF1e5cka%2l<jrfM4W#^(2SEtjRgVLK;$
zX7`!q^vc@GvPBD)R!`@Pwj!!o&+cD2t#-kp<xSD4S2U|{^U>|2^c>vS6#;o%wc1yn
z)pbNiWn@m0`7=Z~us!X+^}lbkW?(M#`IYl9jn2^vmMof9WzAn?3)qaVta6yJAZ1}Z
zl=@M<po5h+nH$`J`_=)o-eK(g>X}vZr&P6HTsaTd!dXYgGWpUkZ1D<;Ynj`RO;)xv
z&XLqIhlO31g16YpmQHPP_AF(f2_}(&lO;a)WF4wsfk8K#5TjQZmVYu!&Fr1Ly3r|l
zjdC?pPG*MB6WQlbfNM$Cv-8+m+MtwVsE2AS<;Xo+q+Rkw3irZ3cA)?pR7x!ygN|Do
zcKSZpA%GL{kVQ5tosoB}7G15YoDouw*JgMM>yxGJTkNZ;4Yvr}=&D*z`?9p4uqHf}
zdt>zk&s*L|dEK`nHf3@3wU`-PSuhBr4subosJ7q2C6#kA(*YTSpxntR+i;@sUmw}R
z<V*XsM7g4mrn^S_OMz_$WZ)u|ak5K-wqD+mfqQTc%t>`aX=?*=!@^Oh+IYwD1BicZ
z5fcidp7mxgYN%}>jhJ*iv+=bMC78$-u`&l)Nh^2p(YAxo?$k%1`Y@~yyZUh8!wFd=
z=pdPv`iH|mY+PpXl)W>;@r-~d7l|68l#jK=k%~YwtuuOgsGmhlqOXvj%T>VTfPELt
zLlivJ6*3GAq!qYPJ%t{E#~C<}O9R^s)2(>;E91aSc8(s3jg3g1+{1N9QBeX}B64kq
zDVl|%m|{JG1su31AEJ1R`{baiEYMp%4s1euYX$pgJer!0!7HR>sGKSA*GYv2*cOUu
znrgfyHCOl=Zz2soix{+4Fre~LLyda6G5UPg(1t{IZ|Q<w`OgyeRB6RX<D(=z+TEd`
zS(;c+y!qr`AKz${;m!Ox1S4VaXk;IvgkioxM!cnCXF@Q^QcbgzF5GQp4J>n3_7!8v
zMW|M4S5(R#HGBxBKMqJugAdqX>%xVhf7t@QE$pvb&>j_ZXx0LEh;MZM*DV+Z_ipQ6
znn4hd7$cJcj#6T|O+QLW<+lBlJbHAw?*B*`J=zj|cs`+_@y>8zGfYeJHHXKUJ?#0G
ztupSahB;$7fKQ&WLy-oqcr}%R?+<`ZKk!O^FLMgDZptmqJEu_l)oul6b=OW<M_0T#
zgSC#WLRd5yx9rGIKVb#_l;pYRu^8UyZ|!}kT+@@SGP{;*Zw9E-;H9$;r<@NX;$JCm
z739fvE42{$vRjL|8oWB2?}~(Tu#FjaHZ9@MQgAT%Q?QS)jg|+osli}3;UHRM&<*^3
zV-S-_eyrq-H$j2on`>4%nNo7bTaiolT%py-o7QV_g5(n`wEH~US4D7Bv%FhZYU?xu
z;tKp_Z;&}VwPJ70Ds7^$1FhA;XLWOv>K2TBSOC^}VO$-ld>TM#JJF*iw3waPBQqh<
z?ebI(ijozpwQE8-Z9klLd?%$l$-l4G+6?b$(6c6X_~220iqAK9H?h^%mk&AD^W`za
zOw1Y{FVayy?F<R~44-z!x$Ku|mr3X7X$JcWKVHR=XTB4rr}y4lwb?c?SU$N;%al25
zwZ1)hsq#PTOfc{MH=POo3qw{HWVIIARamN5OL4az?r1y%EYGaf(%}mD!&)smjn}tj
zo`{6YmUT=lQq{xeG3!)sk^E>KZ1@b-vLnp!L-d{Ym9y{A;#xJ;IvWz{966<{9zC+@
zIB#E|X%Rr=04}c#>*~4Y$F=4+QX58Yt2O;NC)`B&K=1&DAb-C@`wXqWc&9eP&8@!{
zv}zPqGS`HY4IqNJwhIk?04r%Q7K;N4dT%(O4nv~|CIo*k-{9#{EqR0I2(ZH-Sn~}z
zHekkfz#NUi>xgZ}F-Q%7IyM1rpCbWzCDO(LvQeR&Ie7T2qI`9Zm8~4vcuO^h;3t-7
zKtle5_g$}oUO4$TO8bKy7a=uYEiO?mUcXT=ANR95I|YrZ02$`<c_DXeNkvTqy9NEc
zuh}St_<Dn5Tu=e5@x6pbFfW=er{Aq*!eV;Y-C9A&ZMY0(N5e@b!f;`b=kC_pxu-$x
z<Pl`n&<Trl8pcTn>*%dUkSX}|#?hLNa21*qiqYUP!VP#xXO3vaOh62GIE1vze83f*
zLq|(t1GTsX`xmhBRkcwB0zDn9(@v7ttkV*)NwR63mL7kLmojk+O|cy$8)|z!;R+Vj
zWSnS`@2u0Z-NWIcFdjPRM5u$@bau^X3v}hFv?+GNQ^}4V?Vwfht(Be8p}B~I*Dz3l
zk%ZA;Qe(#^!8FSk1;qft79gM14P3HxiDICoHSYl&=xb0`gSf#00Mp%UphtVa<@2lf
z*W?e(#MJxbJ=&QdMJ`7>Ztr_5W`%#9nOwG&^S5cOWZ5C?tUmjI_76=A_F}yUXpZ4(
zR0*4x&uquVu<viz+F<ka@^)>!<yk!8VXdp8)qjM^oBP7Qux|n%6dCBwcA%?A`0)&1
z<aN|e$N1y8K_{F*$CLc=EI*#($EhIR?&QY{{P1sio@GD5vviK*UV-(E$uIUeUG$m*
z^y_sFz2U?b{#)4Nq<8oMf57MIJ<x32?G*&m*#{i?p_3f~`EijSA7gh8JBqx;S;~(T
z`kWuw=S<;!&J_A8@c8%E5b_s%1XFm!Gll+P>$c?#kmVEJ^-Q5}`0*`2zBA~1JMVfH
z(2xB6ub|G{41<2+8vxPwfAbrApegJ#Sj0YqDeN=&CjF+|1$)sYgDzw1(__Av=$-k9
zc1jbUdciR=g^L=Xbu`nJz-I3GR`imO?!p{k1%E2VW7=4qZ|ss|9@DNBuX<m_OXIDy
zH?>~!_T5@?Bpm8Zpwn<>0A-+ed{0JSaFdvxBlt`sgciR340#4pe?w@du+dr*I9p)~
zT?8mEI=(F$UIo0(YKkC!m?GGx6JVPv1V>FoD}IDBun}s{m0kTJ3=yfE_TaIHy08fu
zXHWyY^w@_lUL=?zQ6yoH*A&Tw{jC(Pvkk4oW-FiX_~9|F5L^t`&X`P9_%sWSv@Ag;
z$dY(?SPM}uiw}TDing43KgVsOhZO6SD|Y~4OZRF~_7QLnME@sPdxY2R)e582u!9Ik
z57gTZ4=lv1ls_OJgu#B|2%NGMBcZeGzEw+=m-lLyH|+<XA1sr8vqgIg_G=@A^t>Rt
zvUgpO{N*XF^^j65-rm4(J}TB1IHZA$_E&EVuX<yI)tgWL3(#%OGZa6T*7)uUmaNL~
z94W6psHI>Zam7I`x;4A{TM|E0IHq=d!R!Q;Z~sB9H}d`apqAZ%^H~yXeq+9T)fVP*
z`KmQ+7(<^`*#^6aYBvf=7B8&>oAFn~dp1@@7#5ZJj(RPj6KA&A`A&#HkEsF{&t@^+
zDqzC!b2W<KXfK!t_vNE{t#z|{b~mfX=_}@E8zW`1SBolYYSg}Fnf;Bj#%&s4SArKP
z5melcP6W$&UTt-!$|8G|Ju0xO$QkA2a|gzL+zQMWWJLu=Ir&lx6JoEc+!kw&K19(h
z_uAmHk#Pt_{T4cd{fe`|te3H4@CuBdud4dlQd$R&SB0MOTH;$F`h%obfd9+{HwlDP
zm;;`|_vPLJHD@+Ld=>j95PrU;Dixf6E<(&`_BD>wJ`jACCpd0CLYf<NUtO#edKqdy
zj!41}2&c#3H7ta#b78CNd<Y(=-gs~k$5(D?cn9+-CI@u0R*g5OG|ksLX!pC#<}0|d
z6~QnFaw`Naaa$7xSbW--8^$W6^9cl4#P=i0-~M7w*r8HkE^>86gSc=|Ryj)R6$i=5
z+};9Vq0a>j&&6h7f>d*&S$nf?n?uJ$EJm1F<Wl+NVXfmpN0E-!z(fye+w#_H%|}n#
zvbSEmpcCY>ZW{?N<vT|riuCN{dP>;Q@NRZ;lwOG-H-e{n`qNrB_YzpF8Q=Lz&<Wrr
zjI=nc2R@2ZgNWms%l#<56tWa9dV_e<8#|G+(32vt5T0aR%~&vLe^757w(1*y1M6q)
zo%PZc_*tHu+#9~@gB9iFeDW+&CLV#;M-GC?MS)R)DOE)*YS<Y~Q=(uOh&oI`kZhs?
zd*f)=j)ph=ZQp!ED|Am1*!~qb^a$}~nP69LzDL=BP?!MMP)5f|@IB)8*+Jerz%i7O
z6;(I(Xt{D6<<kVy=~hNP{0ddyY^q1tfN6kL1GXC2JZ}Ro!1Ue#2r43tV>HCzhKNKR
z_uaK`(Ph3SjF)}QGg`Hqu|_%Gfj<~v5AYnPNHDf0S~a1uQ4_Fs*t;|mH0v%<6L!{R
zf~1Z(^g~lh7%8nSRg5TBsb%ZPb(Fv+-c}(7pGZ<@5sN^)a4WRXaa0-@QCWfma!I?n
z8w#7PcD085A5Y05fwEyb=XUY7RWgR|9-xGd`jZq&uoRl>Yb%Re>oL51eFe7=lBQ|R
zKHpgYzJw>YunUW^yc9xOp@nH{=HQur>w-OqX||w58&$1E;zN8@2<%lVFf#g>mefQ(
zMfzW4)|d@)opSOqt+QAoHyqQF!4?jpXtf{9J76N#!#-z#K*g)h1;XaiNp+5~qkrQ>
z^vAN|eNr9xz6Bu*C!$EdOs#+cH(us*t@qi~)gZj%1E#Ag?{Qp<Yu3`jW-aAI8_XYs
z+MMK;-gsP#QRtz#k0?@^EoK5EzmJ(<qer#G$vmCw00{OT;4v-0f)TFVht<&{aX$s<
zPvF`Z$A-<=;=wTMd_53y+=574IEhpn6EbV+m2pjUa|0i{lM8-C?L{dZ%lb5HaRD?h
zZZXd=4j_dBL7Zs(!@gF$idMkEmHK317t%`|x(37qw383u^5S}Gr{kDs)*;H<484I;
zIL9H%AF|3O5<(NfN7NO9g<k6Dh9B=~|G9l$D$15729ixcT@-LA6r<pVIR-mB<6&2x
zK)GTPypgY>5^=R^Dl1HUt9%_siRZA7uhAT`Mvg;|U~btER)U?@@n|=92^SV#A~wd>
zKrOWWMsXUt2XlkfJu!nE@|BZXRPRnhvoVSeuoQmduq^LqVy$(C?+(m}SZiB&o45cw
zTSYKpc5!vBr($I3hF3jxM<-?avsx#2j~x_`qpz-V>?4QchkFrtNrKYxy^0NcvY8R<
zYF!pXcOx+s^D+&zemVwV2IYvEa2lOOqs1KPu5)RIn1`!87tqb3hSrM3R3~a_hgjM;
zC^JCVcVYe7;4+d9NyW`enGj`7ste2Iaf}5_qM&+%K=C!~b6V1m6~nH&JWfdzc$v4I
zKnHKU#9k)%mEd=YLh$^RVtCY^#3rbPnyhv+op7~&CSX`j+#$ks61{i>y~NGkv(~(|
zTX!w?r?IaJ|JNo`=)ztq6M80@`!Q#{0z=dusupU;E~<Y8H0RE~;|@*Sd5r>S5t9Jy
zJOYPN*zm%Z%ggS<b6V7h|1me3c~~{}Y1kH6-V;z*J^^!S=7Y>iWpYh!{Ms0zwK1NO
z)3DItz>d4+o}EZ#{Z2YD7xy|rQ?P~ClkN)wy?7vq?=IxWHe6iIxwbn&aUW(e>k-cN
zC_i@aC$6pDMGxTOLeB8G1JktIiN}-b&guvTT*n8lB50q3+$qX1#!*l49RLS|FaRj)
z<(NYZ;4s$?<JS?^!k*!Zk8<dk-KU-6(rV839E@modY*%)oiL$6DYer}PKbx_y0x=r
zn#oSMuh6Ru!g9SuD3{*A`BZzO`|TN4OwZ7}xH8Ai?zgY7@9it}H-kPf=tH?;J@)Vh
z-)Bp8^zGMs>;OSxqO52IUsaN^r?n1p!f9;??+$wZaax-!oJS1ht836j@2D5GABET>
zbGM0(-sNZDhipKz#h&>?@|m-+)qOHUkC7QKYvH*{@q?WNPQE5)@l(Fo8he}{@wL>S
z1C)sy#A?(f)P9w#E#F4{cfN}HQ=Bg|aYXi8F7zE=TaBDJ5@*nlGHj<vjr3hZ{j<7$
zns1)2^_`W%rTaUC9s(zYOI-Fc>t6j>=M4IT@6Rby$KhDh!eKH2ofFsM*p#{{KMvJ8
zz?&M!($t;z5WM8@SF|zmjcFn>=wnj^^06(uaQKh7zWR!mJj*G93}Hei6(LYWMW`Xd
z_`?Mq6xE3c92j9mG+0ELBASL9B1Y+=Ciq()>>lOk<r$*2Pa_Q$i8wYULg5vdpI6W^
zFE5V^!Z9+1Jh%fpa??f;?p^e%maO4a6&zNrT`gH5VRL@{HEp#V`-b+Y9Py^M(9*)+
zm7gBa+JywGz09iVydyYfwEVc87VjzhqpSSnEo~!|)Rah(Bzrs|47ulRt>cgsU_WnU
zuz3n?#4?QY?L=4#<NzQy8y7jAO^X85g6Ydpd|S#S@3{9vT!@^Lt;M>17c?@hu_W*T
zpe=*IXDyn?EY7-m8-{(542x2o0x@H*(wYCd3U6a!w<y3m^&QBfcQRxwgC%b}I2@P+
zlXNFPg2v;ZH&ZfQ5hGCB`gb(!>LZ3ZRx0EUK6a+#oepJmk|LU&eJwzJ&^afgPaMJy
zK3Y?Outm~1trBTd5rb2(><$9SYZ~qkF{lc6G83>cjo8algMh{tjkBqb3EoNk(GJy}
zLiY1A!My%=ePKQ_3V=pKNE`#J&sa5z+^^)uewoesRp{>*|HYu$psC!kLe<6Q9pjiv
zG_|o~EckNAtiHAA*c`yXGRBiT=D&pv_gi9)c-XD$0j!ChfQFT;CgSfiu$~y`Zx2G2
z#nFEY8w0x!K$(RVL#PSH1(&ubcxG{*rP@Fo3Jg$FACG{V`p{uHn15^~Zsxo@8a~!7
zDT^`9+QOU+38ez`vXc8h6?=KpRcn$_O(rIlokcZdZKDu@$EMiVKpu%;(jAw`#Kb24
zhHN^t-dbO`tt+PB!?~YakSQ!Jm8(?(C8oUzxmh(D$4Uy6T<CQcK8!~LxVmKS;0r93
zML>Xw+(BU+vU(i_7ct91&;)p1Gr-Bsnre_1HVhj}KIB^l;H*kobHo=(z~cZbBAk3T
z4>dtt)C{8TgD8ADC+0+Cpx>+r&)H8gn`s7L1!6TX&WjtqeJxc=FyC0^n;|$d`lcwM
znALbktUCaTCofhUb!&%9L5huBK3$d!J#$CH*&~!vo`t`6)rMj0&9cNXZlfiWuExCI
z0o4unJc_$$fVdkLvUM~;;KnHYfsGmBUikTMQT<>`_yFuf+yJx-1Ql{X1S^NE+|><X
z$_+~g6M!yiw)v8VG0>Cw)Zq;LR$`WTdCl<+0*7t_nG%fzz{+2Vyo?=T{I_vZ)!Lu&
z8x>*m0$Hzv4r~p2fp3QSlG;@GceS;Iu*QKGyrbeLEO0AA?TjL;pd2dz0+)NKfqy02
zm1a9i=@9M5Hy>`6fYox?gMeBN32HhZ->QMuZJh&{9%d+7<F>38s=)PZwNNE8i;Zko
zR;V4|%Wb*Xlw}t}>{I8AE>~sYZrFZwc_4%7pkj^40QI&<A{|+0sjpCVFKZt3jM3$x
z>ekh~HC!fl!CjorEq5zZ!ipUY!<k0d6lqRJ4@vC8;jG7?|2~dGSx?~b&2F5Yd6G_v
zy>v$Gr*q;!<HSw|1>-x)aiboFnMrpb=5jsGoH9A@2JQTIu%iIA9l>*L1Cc(c@@c&O
zUTK`zSJ<wLVdiP<#JWff)_4O<35KG0@b#Nvs2j3jdI0Tf03O&HQ<%tW5cw=yfJFl>
z$vZGB=^zzcK?8*sq#joa^JM>-2HpeXD-H^w6CNMl!~;10)AtjGf>ELoWx$c_&~``q
za*p!lY#&Z3hp6xXb#!%dbv{Hzhp0<Ebv;0-u3}d=_1(RGBK0_c3uX>b47bk$Y8^@}
z!Yp(HOD_RM><y57{R!*zrFiq^jvYc~3GzIFD^C8?{5D9I!>6^L6zko3L3`c6&ds7P
zwSF3g5Num?lzaN)pu@$lwGFrmV9DQcPQc;a_IIs8`_nb*)-`1(E+gM7-)O#T%Kr9^
zc2k&jpG0MYR%rd%rGxj{s;rP#Xtv-;1uAmR#T#D%*Wh0f-(BX(8<^yYF<W`3M%!Ms
zsdI04|7YIlf1*GCB)M(`{%XqJkr8rEYujb3g1Paw%}y)mXlE<+9c>(*WvjDFp9}cn
z>8a?{)BxoUP?KnD?JaC?TV(xtOz*BjTWF58D|K{=Enl4>TyuJI&`l`9zX9^(PFtM3
zSYgZZ_NlO~_Z54u%64ap<xnvDR@+5i?C?8nXMC;+@tbUsx|N~R7Ta|pYP&1;rmyaZ
zhwdM5$cv{$j7)yQ7UcbSr!B{KFI(7No5$Ceolo0Z1zFuX^1N-TuY#1bwsS&(dSS`v
zJMPN>*UjL+;VJW8#?_nWpVeL7>~pq9GE|lxFFki5F9^=8L6#!~vUo0DXO|tGgU8@E
zPum>c<ezQ%K`QUp(~De>wIX_~72n2Y!Bd&nJ*T&l3-WY__kN*2-pcCHkTCs~P%D_w
zMsF2j1@rQBcc>b`dpB)ZI0ZAxzij#Ozin+c^|l4dkms>e_-%<^ZL<nY>Z>pE4cBW!
z^?laGq~6sd^h?6ZuzIBalW!1j9H;-`%dnzcf7!|)%O>fatgN?B(*L8YuH1gp+hut$
z2<JtsW0K$Bia|M4p_fEk882Of8yT%&_6q%FUsHa+Sr7B|D0G8<wfGaBCt7&0SL%k`
zwo!Ll5T4wq4-M4$Pr1rJ?$e7J|Cp=nalc-yW=~#szuw*o?Y>{{XoWt%U+->((jL&q
zSt0iWdX*LW>H&ScLj0z`(<+BOhzmOTq^{iZpdP0!%<$~%7@__~n|w@nSt<YIlwH{>
zr6xU5mOP~Uu5cZ^6)7WfRSN!Co8&Ud46W&}wPCXsOU(bTZsmyfzTBp)e!!Re@rmc>
zm4M6gFUy}5@4A%(^R3)bE}6Z}-=5NKsP%j&f32QZLj&cFNL4itH?NxaI?D1dy#=hV
zx~LL+3M1qPJ9Sg#Zt-{FTIBwtD{^;l*4MI!Q0wSHh)r**EpbxJ$fH0;{w1|2@17*X
zw(G6sg@=3%b5`Fq9nSvj@DDYr>0jN-<x@E8Fx8+Ie`2mF6NVyTD(6b`oShsc4{!I6
z#jD%Vs%g_zk_>%VPgPLXv~xW980z7l-*cFO)vcT|%YqU*Jy_oIupi2O4+F~NIld(8
z57b%1nfB=Pr#hl@{Ifh@tVY$X{1Jblf!ninj!Tx;`5`T+10?q%t35c{n5Wuv&9P1G
z{Ow_nIRC%8l_!=oX0_uKWVkGO#E)EMj{wx^Wt`qtu6qQ-*y4}ifrj??!@;|5W&R2o
zwnOi*{;6=OKkCcf_y^9SNU<(|fx4ArSF7BfyVr)uj$3`%8~-qfi*=Z$e|0N84_Mic
zZV8eX{LuUVG5@8T!-@D;*pwS>Dc;N3OI7EaC15wi|1XrjDGfQLFmm1ZL7`<sgcf1L
zZVaSqwHzPXf!+`DX9$y@>_8itx%vU{8@=-VV{ix0DscQ4GID_6>aaQg)e%{;Q;*WV
zZc44$si&)^HT_|cE0^iS*BW6Vw_U!y(?7%i+=+(m`K&P+e~cqu5iMD=3n>e~<P=%6
zOV6}oAK=*Ae)Q#!_DuPQA^))plgyowzjJB~$*}!;1pa7>OZM=S5SRAOO}OciZ+nDE
zwCP{n$|vpq?7F<mhD$E9cKI9k&|ZL(kt$u?EuC}az_z$zx(O=g>|9f^rYold6|J!7
z;LyhbyiH0Y@JL1Pth5ggBkh+wD@|Rldhv1H;<NHQE|-^YrKu}a&t(Zy7CwO)<+=W@
z=G?+8U>yGnS~Tg-0gZ*?)D)=vE$(G)k>8lunBTgmRes~IWWA?*3XZbyaTe^qHT|nw
zxv{dT&?cNwmxFivhqGcgTE4Q{N=L79t<Ik0=v*vvOd8+l>Xg6l_OIHYCy_R@DNWt>
zYV~txP1Q#(&=LL>HMIak{}HGF!zx<<n^O~UD}4d5`YpH8_)gP5&1S&rv-OvuFYqs~
z1;`tlip9v;d;C3Gu?HYuy4eDW>thSmh&J6B%cnk?{?)CV3o^kiQ#Yz6@70r3e)?kX
z`3F!X|5{etV?(q2xPi5mT)P)_D_mE|neiZJi2GAdKKZ%&au&!n|5}pltF01zA7YG5
z+=yfB-S+v|SBqN{o0^Rg{a~A_rtuO*YX%xG3<U8^ZIOS%D=I(!{-3<C&xe=u&JEpE
zjOOsKB`ZyNlXG`eW6|O|w-zwBzHPMucSXg@bBBFx{%FRRCEI}k{A<~U13#(!o|=OJ
za<$jjh!vgx7BCuS3jVc}4L1Ga1D}bz3cDTf=f{mBuY^MI$FI0WbMk+)@}Kw-W>S`6
zRP$Th>(L_r_#hcpkJJDDYd)+Y<l8%5#BLD(I=0BaC$urYOO`n_r(E`wuac%4Hm>Z%
zOG&B_`fSQGPoWQtDJ>5nHC0l#@|xa_+2i;mgSETSoW?0kyZ98aylw>NO_fOp{lvDz
zK}3J5qO;}FgMPfcgQG7_;N02j>N0r&X{sf`AML#M6ZDjS>}u)%SGTfm2Iuc@VfG}9
zllB1?alPI_K3eYw{0v8@Y_g)23v{Gck5GVH_(=Pq;$64$*ZZ2~XEd;?xZev{8BaE*
z1<3F3(E{ZiUJNn!xrG;D3xGc#=0d5BPKAGFqfL?<v(eX%bN)EF;$h8^)x?##k|wu8
z_9<!lSGO|cgjJ~G1>9V!keBl}ZAe{7Cw6^k<u5y|w^i+Do%uj85&v2e$D4lkfj*$Q
z`2M+AYe1Oe*w*ManG$Vlo!cA*n5TSyOTE2o7+Q&6OLG6LuZW!dw4Rc}dB1pYYtQe{
z8UD3gVgsWr@Sbhmo$|{gm<uJFy#F++c1Qct_6~B&c%&&<FMm;T@<;@3)y1zR!h99e
zm_J7Lcn<k_gj(q7O#{k!kgM`{k>4Eg^Onm;P;Y#Jicgm3b8L3m`x#%wg*mS+9|HM_
ze=XPL=t5tCnk1VcFLXmG7Wk~LJpBwne&3@hhg06{#or)O{fs+`#7?D6iKa|D?W?az
z{l8N7`>=0Q0lPeQRFCnE8bez*xG6DEepCqUmHVg0qFMLhEaU&jC|S4ECdD~htgJq!
z2dQyxVM_34#k+3hdz`nzHc)<e%s+1597D|~u2=B|viou0*xz#W-3@EO3HaAiwjcSR
z%G*wEyA`*FJbv5{=*V#%kUEu^AQ#+YbI3mcO(F2VKkobXNziouwIsZ@-7@clKF~tj
z>rY@E$M55qRC(lte|f)#Xxn|q733KC;4X}ddD2JH1MF3^`(ax7*K%rqIW4!H)YIgN
zZ{hJcVlN8KJ>es{|E&2j_XG;?uTzTwy8AUQ)JF~DsgYgJMWOrrYbknd|G?Ll_-FNY
zD(_FL;}6^kF2ui<?9l#+FYoow>aDGA-uEo}^WLY3KWEF5YoGN|sV39Ol~{AclqJvW
zDQrYZR^!=ZIk{3M=^kTcg$c!1+n(nzs0A5T^a>lU_Ww2jSM2|eRc19O>=ZKSU9Ak?
zrRz>P;gmmZKBq+wu+paF>7jD#bN&fG%V}H2b6Pt#vshEzfBdEu=}=Vo*K*W)PvWd4
zssnN6g;CWI9r)M2#S9#pCbvBgO9c}EdGUEvF>eOPw3Df)^$07v+i8xT#nEZ<#!=|a
z@sW6(IqDlp_kE!OM**3C?OH&un#YAw<(_fK|KVwLMb+~S0NQ5O=)5>$<hFba(%y39
za6anKp~TIiMKbh7J>mb`^s3`9;Vw`sv;ZuuHhph2a2gw&$Uuf7hJ?^d&g^oKF%}T3
zeNF&Pc-#jPQJ5~!&B8iezk-Dgr3nZ!O{(ko^bTO|0bwl^F9FZv0_(kh0$7-Uee)Y+
z<fB;-Va*m6ZbTaUz~$O31WcnKj2b)NSAo>aP6wXTCOCc0NoX;EWD{_o91MVpoyavT
w(;KG(6Xl1K(1EGx|4sr!aVkVye)^rMEOKH2-mGjOUClsv3>eXmH-RQh07eh;yZ`_I

delta 30841
zcmbV#31AdO^KbR+?9T3HLXr)+6W9PDkOM*>0Rn_@-w<wsAOr|<6A7Roy5I#WBDhEk
zilCz4fuNCW01;3`1w;^UQNi>3Jn{HEQQoh5b~g!r@BQC<(Vgk(qq@7gy1K3z4!#)s
z-TK%@rGsp`o5)Ete8$@)>l$S`OL6bh@yfz=-Dc*s%bk+fA-6-jg4}j_L)sU1DD<}K
zQCT^s!aHEz)Hban-I4CP_S|mW+f8(J|E-0(`@JGZC8|rs?RG2mk>iqXrOrpZ6WNHX
zzad^67waO5b@|JWB>JNU$N0Zbit5+TO^v9~ppI_RsFRB#sk0lGBEEFtOII%FW>7Id
zba#`BN;s#;pdJSG6y%;Wb6&*=`Sd_h;QvXCu5O!>E98>{#1PS|IwGw^_>Z4S^~bhu
z>_5?Dkr?7Xyr-xCLeuuP81ne9{yNToXrNO*by|;VVp0tCHYtjtP0FHdlbTYFNf~^J
zp+5cxe~t5(#(Uf*Wl|nz_4Ti79H*vHke=;#?~C_8pYD(kj}S%v_nRgAA~7u@K^}&P
zl#_Hc@?hk#c$!Hzq=;g&vI8Wt_LEi?Aln|&pGKyQ8spjsccNBg#~7liHO1j6-fDz<
z0-n&E5-ACtBsVBa!IRG_OhsWD9yMx$R8#T>`!5al@6O@@*lWnK8?~&C9jHl^TdzZR
zxI;IkQ?p=)rmDwf++lP4bmX+aFN3^30ON`Nqa)+;j`IJ%JF+I$5*=m&PZnZl17D0c
zUjpM4b8`;aPp-18oEmbw0u)i#I};I6N6=GeH3F|{mk^$6LU5`stVC<9fTpl)O`2;p
zoV15&COC#+xceBmE9+U(vunt#>(xSTw|aASg?n`U1G!w*upeH;9L$(;I~e_NH;ydy
zaT#8*{{8E9;s|%*z{<7(045o0GF*mEbY3?GZK1WTLTg^ItNyriSDLtdD&y<*qJ?|W
zI#D~yN29uKxRB?f1v_X@9n@1nL+(=TYt_MKv5r9YLqete6LT+GSrgm*M^oJ7iSeiA
zmgqz!x;)=hOXRI7y#d`(y|&%JAm{n0pm7k;FP%IVplY*dc$5XHd_XIYmAdnaUxM{k
zPTHzjdD&BX22)p0`z=^Hcg7#VyxFtYS$WUQzb#mJ=Ynp*)X7C!F!koOR~a$P`c7<m
z|Ej0#MEvXPKYwkifAY<Z{mYg&>pZ}XMaXth1m(DSVcgV~@;KFw^YZzzJzqMwselSO
z)zP5N1{GDGTfRhyF8=X1-qkgRx|-mPjST8$QZaQmsKlfm)YGJ1)IG@2dUL7?H8rS@
zNqwmwKcpMf-@k5eAOEbI-rkUIbI6}M+G6ism(mh!A_C*$wIT+LD+SpY@2wyk^KZI2
zdo**Zc<eb6%Rxaa$R%#17;zINiJR-~s$|qjVj!RghQ|yh8ULlAhg77Qairmg0U{^X
zpL|P>k0GnwB33C#7O|!TA$d`1CoQ(GxC}r#13=+J0P4U!3wzLkpN1_`v8qsy(>Ie{
zdonFfuO(NZolCM)<LqD$PolQP0eM0`5^E?*+(nJWT8bBbY9iLFaTHr%!!ybPLu{^Q
zlm+IOp>UQ(Es?STXDQ%K85l|v;IkD{cHmac>mq>G8@dJ_!}!huM<;TV;3en;Ht%e>
zJ%$eUyB~-rdLUlL+^;3xy{@(YsavCbf4Z>nx4Ef1-R`10XtkT}q&1v(7hl%$WgTDc
zjvzm+=hQuJTsH7U^5eY`w2?M(>OO<+H)yj#54dq$-R`6<eBbJ%2f4~a{P3`ow(-Lw
z{P3uoVtlkcf_Bhje0kiUC)`-aC;76|O;6D-8#TFkxtn&=(*`|bP_-NA+2aN_YPiDy
zr)v4Km&d*@g7(vMZaP5E8+4FK{*Xb3o%90Nc#(TNV$e&xA6_=-6(_ywrjBWJj614z
z(s6FEou_repw|pKY0&F(f=i@T+g4@UB9U{-pwkAufl_~$JF4|Fn6CfFJ4X2jte&MG
zL{EO#Lrvs&{}xI9?^YijJ%UD>G>R6Xok?#}PlMhv>1}$)#K|^1xXXH5d#Vqhnsg)G
zWYQ2CYSKblWYQA8^rQX;oiXWMdXM*HPx(WQEzxf-OY;}68P)E6`aqB<GU+URXwv)i
z5uS2Q`k0^2af?xO-k?uRnnklsnnSn9=z&^_zht#N*`!ao)@Ss&L0_2kCH>2wf1C6b
zeJ#s3X>tCw>%IQ`yT<2?qHh??w<Zmu!6vQXL4C*2zNZTY{b16M^b^ox($DlC|AV74
z>?Zv}zxsD|Z{c^WO>q83pYy<em+b~<UH$tWNwAHgKV(U+p5x!XHU$sA`d?bx+%|{i
z%JUnwxM$Y&)8v%bfx)+K>R2uPUkM+$61GSQ4qJ^mlOlB>0(T;yPu=^fyT;f;-2uqc
zXdr$VvPuJ~;e0+~u>eHT(S#*JxeH~ZXf!t67%-zWl=JQ{&C1>b`ReWs!^BvB>4vtx
zameg|UC5G28BM^QQ#>T*b){-B%;QSIB>Q0Ba%v)7i58QPw&X8vIT=q@Svkt4AYDM(
zB`mJPps5B;GpIrcgQgQMS6zbiXXWSC6=*0xEt-M0%>SZ;Q=W-5RvOna{%qN#_gIu$
ze9$Bwy~PJ3gQMqB)#Xga6ORR)F`@)y1ZtU!iH-5!DJy&oD4DS$qRB2iVAb(tidc#i
zjcK4rP{3ITillBdkLD|c3=Vc<$EsASb*V!GmvM(y*+BHhUB@ySOv3DdhO2q{3%aS9
zK_aRGOza9C5o-QI26`i!10L)?N~8F;m(m0nOMfffM9?8>IvCQ8_W^2$JUf!<HPp=0
zobzg_MK_4f8D;yaWtl1sP-bd?TJ53MPjA2U1hQHmr6|7DP?j%Ca6UTJiBXY{QIbfd
zSP+X>G@(q<l=6j_x{7ofC7RP{kwN1`E1D!)({zzVvqerAMO|^4Ee3sP;N}H%4PA@L
zV<P`p6V9285=KlZYXE=JD9uxE$u(vW7CyvZz$6pgn>^r`p$hPTBdyVAZ=y%UV5G&f
zl*3tjDcAq~#x&8!AGxVdo4lNZ5OY1yA!0rGH56wxYF|SgYA9vsekv%-wc6Tp0#xW<
zzA2+yTL@?sIOW<x7g?<COXvo5Pl9}SCrY`T?_ADzF6TSY=l^8W{P0#<4vLvaOEHTb
z5Dl)wR<h9=2*dMe8S;{7D_yTB2Q&Kzf;cCHpcFEl`bBCEvWYjLHi4hsFvK9=AC#R`
z4`qUu17(7IOhbLq7o*<2pE{O#It8fnK|I;i0$ZDATD?u<{zw0u!~F~9iMTS%doV){
z_YB73QF2?zWH|y-LeX}L8oZy{mU(<#pj~u5_%vEIf?$w{4QvsXwZ~>sBQcxe#5_tA
zb07}Qr50ko8VfU#B%JQN$9U`+{}>hLOhyTljgxGR24E9Kpf1i9wB<1tlkoR*N2)z-
z&2vG$d20WF$T<wr;;J`C_uz9RINu()^Em8THPp=#@54L?r$imkvHt_?D9&QIwjktN
zX(gUCT18jjo{aor9O>hcZi)M|m`wpF?kKq$N;Q|R`}F$09xNw*#UTN?`lHR2hVc4_
zJrZ9%>yh`}mSo<4_o-Nw@~0l@Xl0E(GQ*Oy(M|X-9$@19-gk9R!><1GSJGb(|9M;&
zwQ}cmwzeA8%8#zIHT4fZ)xvcE-0^v=%t8N>Q$xiI{#Q<2<=TtY-3LLf*5B!Lj*{l5
zT+uaYS_~2YT7-J>=bdr-ZxvC#*IZDd7PukERT;F<jeW4ljhw|U<Xpqq*SaAVF5$~k
zCtb()Wlp-@NjGqz&mag&E9gcymilIcZee-nRwu1=(kiYB5f3t%8@Pms=b<|dTEp3p
z^48Ki&gp$OC;fa`&wwE2d05QzkYxDxa^6OM+{EqfbJG2M-^^tX7_`Nptp+{l_nl~3
zz448)!e930690uWP5p1YIYI_jiD=WHffynNhMuc)-^vv_+C1bhf4kDJzq82sFhP!K
zN1gmD-pT0Nj<%Wf2#Xm|YD`D~P*WbI?I!rdY>-@YlOCh7g6s<ylut#5F=fI0YpN<1
zn4s{n{uSF}qYZl8q$lV}gLaxMN5Tk`iB|1sH<~vC&6@NyJ!4WeJ!{Y&lWHhnQZ4O;
z;L@zr#Io(j*i3qkt~6PkH0dCx4)NtMy<pOdbi|~W=qMkoWhT8$ubA{I9W&`TZ5O1s
znNl&$q!TpOq%PD|{?JcL_y7IwR)5<29sK^rE#<0jZQbMxzuP*(yp&x1!TSlqpLq5T
zhrOzD&di1W2hUFQu}y%Pi^dp03UWP0&w6otJQX6JH=Z-=84>}~_-xW3op3#7Kyf%A
z<FR2vC%h9aI^v08bPB=<*#M%mV#yXDolODuJ>*cZ3KcNRkkkagnqsN>zzPX6(LsO^
zqhJIn0!X1|`k-Gl7KS{tR<!bUV_t**uzI*+*xExlr(jJKd<Hvf$h8U5#O`d5yM`h*
zQCzmiR0%WN6N$t|ir^tN8Y^~3M;)N3A=x>#6kVKV2Nbb8$Ch8D=ji#hFoZ!(n_Os*
zvllwj?As_J$IiKhJ_ny`NqU?;&Vkdd$VhW+qxc+0zNfJ#u7=`^oJmF!Mv;(aY@_5H
z1C0_pAuj=WN!!WE<-60I+h}l(6DK1-TVr-cd$*G-$!Wt0S?G#$Mdxm%MoGpqlv+b+
z+bIn>*~sy6Ruh#K!O5mbrbPo@Y)-T@-|D@Vybw#$q9e9p8gcd<jL3kH5}@Yj8%SG+
zBk*Cx40&xgffW)~(5)np0?Nc9<YHah0`&zDy}IB4EnzKz)pYU=fK)pO^f?$}=n$O9
zrI4V;;4B^s{ymOXLxf!i@pL1^)2)y;9|kvm1hsZRgxm=sau4|YF`P4}A-$c2IQA(m
z083v8*=q@1q@_Zm>x4-@NQ29T7c7TqV+}aoEufYXti00)(6F>(G)@4vVO+hD+Jl@v
zTnykjYMd^Ba^sliJdj3+u#mC{r~us#P~v<S`WGgY2Uz_uq`_#RV~Ve1a(N)EaoBr9
zk#b;=yJ#5oE=Bjl6~dy-WaBUL7&JosP0b7%Da1u;Y0#)K291W`kc6y0$chy)zmpLc
z3%Fn;OtkX+SP|=E<g2q1r&HY?&_YASma~ScF*m>yXkEO47Hs}_A}88Wk4kf*nb2Yi
z9XZi1l`!HA5O<N&y18<q-T6t*Xpnk~u{qHZ`AM$m92Y+%=^!xw!{;+w#!N0W<IE&y
zTm%D{tWPV9%#AbSBAL+Ru$MBdsu#{Ds(}v#%eI#0E*J&YVH@8K%JD-~TaS%<57t0p
zwoC?_u=MvqWV;`WxEVFJ;Aty82!qHrlsyg(99rbXXgwadHc_)F4(vP#a#OH|6OfAF
zNn5D#sU$BCGpi?p%gCvTphla@>kAsxfvL?H054+#hBX-}2L^fvl_O<fxHkj*KoCe9
zNQ6_7a$(4$X&Or1!8LYVf->i3<q6A-rG*A1e5^>}VFXbLgycwpOR*5YllBLCx(fZX
z`jm{{3~RZOQ;hXv7U~A?Y3a$Vp;m0&v6!nx1CQ6E0e6@N+>mxHerKs*%%){xwlL*W
z_X(qU9SJ?9np>Ke-D(b@syoe*%sf5V_YJ!<rN|Kp5px+jDs~A~Z2jLW7O09q?mPu&
zKDIBzyBesoPLUiO*tsQ14P~3^1D`z~G*SA#jEzu+(#^-N{;OvsQO`)fIFafv`FCvf
z>pzaPTr%R9hkqT4{W{dCjHuFkSWnzDS&VZkxhn9SQ+#f!q%3?uG?T?i!XaOa6K*92
z-BYuu$c1x&f7vVsad{&d5ic71{4g`JtDvtN>)+2w{oPD8P8!7b!EWsJp?n#}=V2*d
zhVx~FlSXodQ7%x$7`}|<>~VZ4<I8v_UBUMWe3{6XEBP|X4O%Pb%M^Z`>SmY7G)`9V
zWjbG=YR@oeX5hJaF<HpDiK3U1XA%tRph{+8#Oi+tBHFci^4CPM)7O&bu(XlS!iqt&
zOeoXy1lcAmgm(x_0!76HKdlOydRo$AlXXEh`Yc5gRJx9q87%)?Pd9+e@?|-#U>gxC
z-9*=-Qd<+d;ue!`<*b!_Sw+_xbQ=S1db`Pjo(ZjMjY)U$Wi8iO$CtZlsoZ+AEr|75
z$s*UkGcUrwX1mir^1YVwK(a`XzpNG+0W(Fc(wcj*8Z0>^2CbDX6g{zJQs6ww#I=>Y
zVXa8=u^F<y_>qNYi_D!6W^xn|%Y)Dg>7w91Q!WUbMavAq+G9x|(vYm-2)5=j7KT`+
zutXui!DhB2pXj15+oMB**=WfW4j65R<UAkk*`lNc776(D)+ZN30<h9-(M@}Z44m2a
zIK5EcPofMG1?;;4au?Z??2ri{hv4oEwtzndPvGY)pfuV@u4gD}J2{@A=o(luxrkeM
zIKd)AEGJx$H!v2c*8Jo~c04B{kVxQ!i9{kNBGE}w4JAXW;KxSc$5eidLbWtbL?h9J
z6EWdIHBGVuf9yA71YW5;d|kkYi^2JOKw<0&TXKJ_)Bwm0193PH!s$2+46hW&{cyA!
z0m>N(+87OvKL!H8SjfiXAOn{{q#X~_;}vusoc${>?ccFecwsC{_QmkRT*Siocx61O
z#_kyn-Ms}y*Bpb&$5CI1l#Wh!LR{&D73&Njuy{pWq$s!~{vxjsJ;Wst1&m!?4C;zZ
z=H1=c3RHXv;4aUHm8jUD?qG=Rz}Y~97~TIsQ&ImLnkrE=#nwe;lsz#nzV~j(AOm^n
z;tk6IB=6O2&E%<B!W9|zj<c^OP~1+G#aj&UzJ6k{<s=HkjTg5YilN@^@0t#s2fC7M
z+fPrFliLZW%sZmT1wJVkj~Q0WyJm_V78jLW7KlPCbI}4(Wx0j|-z*YSt)%R;KyN0`
zEfX$z!xCXBxq;P7M89D2otwlK%jIP|SaLIX5crThH5DDJHkDmOq|!SdgP6ox+z2=5
zXQY!xxj{jroiv8;W1Y-J6R6CE!tn-OVK5IhX`+)ksL34Eq{&WZq9!xZ2ryAp2M<kP
z9y(pIPkxx`X4@oR=D3*~@?}03bgy*dbb{Vn#Y%4#E4_<Z-xaLyM$i%?sO`33j(EKr
zgoUgdVBLZKS6RMbp&UL{B+2Jjie2#6XUN5?z@Q^<6T9SdjkV}FR0Q`1sq|1|zuw}}
z(YK50tXx`df*3o4pFlN)=8QxMbJv@hQ&)ir-$J*-$3{7_{6VnGyqEOmvh5ur)dnsm
zhu$GZw_QoAnD^c$$m!)^0&h33Nw?F@Cgk!vOvu<sfdw0M7c<~>^7Lq1+<;tKXVBdy
zyHiZaUmFaP{BW-cA#9Tgnd^QyQMmB~P^-CQtI0x`K@Xb{#2#VTkD9a{W5`=AI$|(8
z*xVjivs%1nOKGi$ob^VF<uaGHm~LB(OXq>QEiT;@l9d<Rr;bbKE9FvP{}!NGTa^4a
zF3k`uF5O(UwoKa&>~ofRxQvxe=F&i}1xG7`ga^tO-GW?t6G6)ZkG9yg@<Mx<KkrdS
zGq>W~+50I123ixg$w*J58fA`kKVL)9o)|npJc|Oag=TjU3ZR?$E%<EQ5J=AP`zfIe
z=E}sO(eCGrB0bt!XtxzQ;_T6$hkRh3dK|m;+;My;FvlHnvt8Wm5Zf=E=OciNZ25d;
z?J}i?QaNqUhq{+mOHKHJPY|ecUQV+(&*|J+O6O-K?K4#Mz@_|<gfpet?jbofK5D+7
zT7;*SQ8yC|xaC3ApjQ8<3gDt_DgP5YqcQ!V?s$eVI+LH(-3ERJ$xQh&+kqqJqoe?b
zcqfoVXCF9yS8(xe5T1*%1xv8wdVu(Qg3fwC^6w3hL!jUd6-2`Tu2iJaaFLEPf(c~;
z1n+BLb}WJzYKP&r8DMq6f=>q3vI?XD;u^@Z9l`diaH^C5rVeOc#a1=I@^P{^DuNb5
z6oZsSF@4ZQ2QoYYEWafg2K6<lA6(7qIzV7Zzf<}poN(a&T*Rltz{v&;!ZFb4|Bhyh
zaTfAS{z<fh713&#J+uFzIIQ?ydf72BXubH-6_#xCE%rR;0kKq5qOCJm|EXOrlyv;E
z<<q~36#3m25gBOup!nJ&o(@DjD-OiFeg_&M`_egi`j|))FUs$a39r~9qmPU1l(Z!0
zUKSh=(cB_elB<U1Z6e3b2FUYcIqtX^CyM0L$3<J;En*$56L-^Qc$Xg(_rMh>;Q-tS
zjzVC&Zmd%TU7#P(RXM!N&%=TRX&S5iBaG_~$N+2UC!}!Dg1NwXhxKet&p^FDfEA0w
z$<HlpJV@FA4UzxO(XaFyHnbBm!8ghDEzt8KvKf^Y5SGONL@qfY+6!-B=LxY#MDo!%
z*nez|-%jM0^G}NQUAlAR7Gf77DTgn)2DLG$tsAYgogBnqr*?eFcX9}WokJLODlk;6
zm;CUgNQ)5z1Ub6SoHuh}F;0P20|&?+hipS+$tS`A@1{deeqGGbZLL}lkTKImbNTJ-
zqE%KQ0=`DhU$kIqMc<ioD)7*A&itvf+srCoTyA2obP9$$AVfjteJ(Qn$6rpAH=Gi=
zPT03hFtRSP`jlwy?8>1Mu~Zy(rOLlgi5K!%$7ZG&i_Ip0KZ5jRQ=2s^i{rGK?jbEJ
zZ4cR=MiBy9<b~5B!IyvsuflSaipRnu3~e0t1zN|Q<0aTl!SN0j)V65MWh(q&ClVYS
z6XM6oI8H;{+#UqTp#q`C1sD)FjsS1c@at;3rTlUjP3pmj2*XH27=kr`Td*KZ7G34x
zH$=zMRlzCQ)G$11#u=cwmS94yz#Fm@IBU0c4bQPFtOh(sF92bFEIC@yVR15Ax5c-w
zn^IoA;nYp3xe69zEU!V2Je+*F_KfiQJeZX&2fwXk=-WxRvGQJjY+x^76l}hX!+x*B
ze|~su`QUFY)nwDaRFi;UFJPFY8_tUy1=Fg8k2aRH5p&UyJIoE_y(yMhoS^KL<HsIu
zO2of*7AH9OzKE6odK;#GNKIF8%))ruXK=)V&3ZV%InVRuAYTsg<uG4faO3hKr;c#y
zCB7U*ln=ceL9fuOFs3mKL@=D7*Er{-lV0bXQ%*X~_cxq$h6~?B5Q3XheB9kcdfT9P
z+*sJXe0i5I@40X+ydOay&{=~%bOXyMYr$nLtg?@|-N$aQg7Xpd34O|!&$x%rxzQI9
zgg}A_`Zs;Wm#+=_h6njA*ZPk8{~jR&2L0qi;05h<($9SVkCT4k`>#&=jqks^`CMbe
z%0-6xmqC9!>5`iRHVq-%upaJ3zR)6s4M7V+kDvj<j&6hl!xDyDI0Xy~eJ}&zb_x@i
z7LkA}8o6<x0YntU+)+HQQ6iczF?{jxCDss)4G{<FZ|0P8LnH`lEPptrH<qj471xTy
zfc-tOT-TEXINad@S~I!U)LO~FSrH|#KPSp$myg6`Hvr)OWJ9C`9{ETd5_YIDLJ!P3
zC%!=%%FZ|PiciFQ5mh49gavAbA=2deC-h`_>LXhm0<C6Rks5PN*l%!wt-!>oJK1Cr
znMF%TmE|<Wgr)|ma2M@Xb^${)!Cj=vou7(CUsK^VM7jw#M{`rO5E+8v`}XNEYV?Rc
zlLq%0+HX|<Nh3$~p47X~7*n*wu>+Z@PmiIKLRH4}7%`y7&|ZB^k;#RTq5NLMMh_ju
zKwAllVW!!3(ZZQ?+V(H6nlYrj(iE*}GSu?<3$F5a92O-HUDTQ;1g#fK={i$niEKk8
znJ{JLTGDiMnby466m4Ma5N&C)A@WSoPUHi3rf3g)hv>kU0#Rs+j-rzxI-8<MbOByX
z(N%OaMX~74k+sb8Jsu@To1#SYFhx(%%MiU)z=`}yXbJv5h9$bdYQ|Q0tIDsD5qVm2
zBUAJdi2p+93=;jMmZvpOFhzgS!w>^ZF%V#=N@SQ~5Z%Cs!B|_B7%Z=TR!er8hz=fV
zh+*>3Zjl&QDzId1oe{&u2t$ly{EYIKFYGI$6SZ_X@e5IuH(CJEeFWT9_0TqIK}Cf^
z+c;5Xitzv=t`HMUF;R4vr@s)r#FcV>Bdu#-_?O}{O`aPC9q*3ju*;nD>goQ6M_uVZ
z@J*I187b23dezLO6|$U3Xw+AtN_SpWu~44=R7;b6zZQA&-mk^&6UT$w2y8?c?3kEv
zC8i?eO+`wBxIj#coXrHN?i~NYe;`A3w+xBIbckn;nSs8B>mup<1}n2z?)ye$W?X|Z
zi`lnEXu}fKvbE}ynI<SfwQzhZTKE>D&~h+!$B|N^Y!%$xIuB#R&0|%Ud~C%+UYUX6
z3bu+dAmruX&SjcrnQ0lSrJVGF1Qa?9wIDS3{zEN0{U4h4{;y48fa6Z(`ESK~%y;p3
zB6~D<VFd-jC>NaXO|a6<Ma>ip_9kH2hT~)++v;#s<w5*rPq!Vyu{AiW)@`Vz@%F}b
zk^JR5u@^n;{$BL+aStP~a*1G#tD#KlkaQDp?A-+zlOO`GRe39MpsrJS1OrZJBs;>W
z+o9G#1DvgD<!#~qt(@uT|8DdYLE1$cW|03+Sb~^1k~G-k1*@q%cS!A@qF)E@-Q`^u
z#2z0U^G9h#_Fl>sgtHf-D-sXUd@J2S5Q=cEp@NMxem@mL^XUkMu9K%TA|gU%MV>C!
z6WZ2LS5G%Qg#jR~io?)RqlSv{2w#V<dw@y~a;TP*1uGj&2MF9$!u1+l*J0Z0kteYv
zn}C@65J<2EhtXEinYM{y_@zP%xC123OJoNcnxX!^EXj!OAygu@5tejN8d96EyqZE+
zD<3(^4Z3eOewTnen-u|xC=g`8vD68%kbhA6p8%~`Hm@!~?12!%^8eYKz0?El;)f`5
zKlLp0^a@b#gZrru5Y)HGj>3L|_ELXAMUEuLUdrPDryZPX$Lhg=A|uIX>_r3?e9}!e
zQbCgSaES8v(jY-AJ3Eu?iO!a@(^e)r=hxEU*|jtzSf_@Dj@?e<gHMO4#YSqGWWOB1
z<@FIVH7vY<rC69y|LC%YhS$&tEKGn#9^8Iul(I<ziFFGC1syK-G#D03;8-65MPm|_
zjAc+WR%151=mmNYF7*(-PeRRQP)(euy+OI43@q#}wKSkVH~=3(DSMcZd=%RZDkW0e
zvFTVIJgW#^2atnU_s4=8j(Lj(?r|)E)p9yoTD6`~s}GtdN8`UJ&fxHY2QIQJX=mF%
zsO2B1Xz|OZkiyxEtB-lm^GHCsq5S<c3QIS7XfBKsV{)FM){jt3Zf*^Yt)X#1(ROOY
zPi59qk)EW(Y#T(8B{`qObYF)?^ahMCXTr1z$RRpJYKLOnoCj9!L)T8=1;GrMT3h^X
zReXlIi^Yez!EGR1t5!8?@IE(rpEon;*?|au2+pboumUJUU<+1;J8;lM5l>f^B+)e7
zD{!|V!gdz6$^b}a4?A3xRtO#QM`R463nJq3;GDqWpT);EW&2!S_<?;Sw$Dhx4x{Gs
zP%F)ldB=1|;G^Gg>M7;<_qmhqI2ldEKPb7s39qbtA12bX7e$o6v?wZY_a9<!L=r@C
z5s4VYMu?M&g6$)kydnlMQXV<Yp~<*pa*Gs%Po*MsDveS^6G|0LWwfEyre&b0C5kc$
zVhm-7)(G9pq5_c(@i0ezZfK8+e7V}G4RIC1Yk(I#L?`))QyV0@$~G=-i0CPoxwPI8
z7!JF%e)ho-CWpvKw{}O`aEcHkDO!x8bTJwL%czqW3(Mj-?8EVLk6RlpNJd0xV-zm7
zo!r;@Pq3Fib^p3ai7ZBD2A+!0ezS>WxjjnD!|mNDtr)k2Xl<@51q6`_6);)e8Lj20
z7qBiyKe{k*I9mJG4d>df6f}V$I7RCU;VLavTOE@RUeq3o+yOdR0d$o@xi3|lA-c%s
zY1-sCw%&I~jS}dWJ#d)x1pW7t_oiucL|<ud0x$z*&nDV6k!(X6hLdF|RH9OOw25{i
zvN`IvfW<un5Sqy?O|=ecN;$(aZ+jMRY&@IH%iG!4#EqlrUMB|(Z3@MOyXgUr2)EN#
z8yTm-0Cjp0Q8sR*w({j+H@p+u`0|LGqkRn85dlr@F*iNVap7)y0z1+L$$qCnPw~q`
zfWM3HyZQ388(W|n6z!xMw+{#l7*uPpobRQ5E&$!nN&$4OJ_I#uAgft}IY_(@9pWwz
z^W_DDUUagGHIP-TEIP_5OT{u+#mZw9t53c7aE>KE<p!FeWwoc%Jlr=p)fGd9!qtV|
z<O@`;E*xy0M{n~5I#(XO%WB$toO*&&?;G?1Pk_}f|8)b?!F&+R?HVv|TE(@amQ+?4
z^l?ZVscEMDN5t<i=p2xzcoe329%0_N1|nK$xi&j}iWv1kw@j_65ZBA$?~6>{L(%e~
zR$Bi^XXX5=nG0vmhd&bQ+`Y9{DqozaCCbg0MB^AlMnnB+bJbE6Hje0Ue~&;D*&<s@
zj|ql}bND&}#Y^cc3E?zOu05fpLel@5<HWzE@Av{O?}7=-(~oQ;@kW|3H2sX>nlLgA
z=K%J2dFKnZbgAWNDKQ)^Zo;bcyFq`LusvS%Pg>SZ&d$*~*h=XyS@M$#cTdUX9{)D!
z68VB!#r3e>gP-_JA=uHmjBYoDhCv9M&=H89DO=}i1&Mb09W8~!gcjyBgv%6e5dj0Y
zJfEp$D8}OEa+87HB@aESb&&tc)rulZ1%_QJpeRHEZ;;5_z(r6hVoc%TK4OJ#BG4($
z6!8LzLn3Q7$s$E5B-qX#yD3tk*bUFqPWhu6HxFFbPHSiLKm2Q~Qdi`6zlj#|WC!g(
z@>qe^NB&f(O_MM9^{z2}uxlV6fYvShW2AiGJ1t3G=%^XlBT-@*?i0WO36U=$>DJCp
zZ#=o+xf_SpBLdAjX<5R@EtW#7Cy1Y#3bUAmonH>S(iB)Wrh#^^g3Wdo%(1gU=W_sY
z9%{}<$znWR1M&VkT$kX#rI4bQ;pnY|0CF|<<^milRUp?zkf#>oFu#V5pzh14`6}vu
zhV++oy-y9X5{#xd_Fx%I7CO2~hAuK5OkfoD$^>-hK%*24ZW2m2g40b_sU4UqJoCtV
zHCW2J%fB&j>jeH1gRH<*b^4rrOqiYn5lS<lz1T1z95WR8w~sR6LN{oRL31Mujh+V0
zn}pv`gXRx1sPYo<<%A<XTnEh%EsG1+Ol7B|ep#_AsPl|K)o$pCMGjY1_A}J1mYVZf
zuA$?Q{&8DNHGk_Ab^%o^3GpIXLEj8(2``rXzDP?^Yh~#z>s2#L3v!2>u|3y{6?X%}
z<I$4~SY`$6AA`yGFN4|gKa9n=E;yLAT^bDLiT~@tEc@?+X<18|mt&^pe;SOX-i?s$
zx@xKY>M(k^-e4lan288uCIY0z0+9=wd3g{sPk}^OHtQwVchx%h_ETONzqi^hNw1}R
z9+*unS-h%r0tE0Hj<(m~DLKVlU3F;bcfG<LvSMj1MXz9o?91jwv7uceD02lmWh#Ve
zlWn0^@HMZK?B7irt=7631Tun)T%&g&Z@!BS@&V4(vjFe`&V;kDetleTwjC9@qXCq_
zn?XqI-z#AEB<|c%6S7@GQ=6jYl?IeM0Sdp!^u6E@)V>XM*!0^NqgmDhdZtA-mB?g&
zl6OCKVTW1Qp}AQQP@#Sm%O1sAtQx7MqkoN--#|zIR*fplY8FO`H;fVv+FGsFAb~xf
zK!KZCYp_FBfw{xh2pI3ak>YVH>5+|V5AR;;snhmL|HeZv&MDbTy?r(a*`Xy;!SOgs
z40tQ}C`rK4k&Kh037kUA3wg(wU^eixqJQ)OLD!Evr`9XgoiW7$`boh3J&1g;kkAZw
z{&UAtpIYi$O>wMA3MPxu;3{00h(;T5*T4b?;NA>i@*oKG2cUtt4}xSo822H#r$TPn
zbGaalHsQZ?8M9AoQSZ%q6+7_5FzxqX@YHwXv_n>uRpduImVSaD!cQ8>lBcxx^1@Qh
z9{72>_G2T<@ATCot%ui2w%wq;X;;3Mg2U~moQ#1KEu-((#$0)|3xu)2penY~st0)4
zZTwo=VmELJN4t%#g`1sl<X^`QcQC|d{Cs@`OeH?PEa!?V3=S>#(9Le9JFd7Awd9fe
zwMsdBv$j!gc|l9|XW!mfo?D@%$kK<kMuE%+v~`++0K9F0DWl&KMS<V9YU70yu3{4y
zY$t!{2*-NmLt2`%4I!WjFMIK;^%BOxglH2Q$fNA&hO7hE_iTCkA?>bMtOpl7gO>qK
zoIZ%%&X<)BYt4q@MgJE#FdN|`M>saSi;1@ddU0%a4Zl`5*u<HN%PVw%L$wj~aEzn1
z;mkQfkD1u^`xxv=zPyfC_V{(Ql7M%cHp6Bc(0Yul9Ij=`XSQp-lQ2-VmKZ+nYzSk=
zw~{G3YVp|x$h3~>380cIv61VJ>5lM|F=^#P9^?k4JG7SVxq+qqO-6%qK%9boI#o5a
z4iENJa6^vk=3a&TexEHXZTf$OZM`qoL0;IQCHv|IGYh0TyB<te7$&}o04)H*pyi_l
z+pA|)6Bh4gIM!B}SFcko2%|SjHIP3;=!+B}Akfl5Zh6~d+F4N`r$4R@^D!cpV(muY
zkmnu|<paYQzq02*@MkVI7Ke4N0Sta^kGbQS@EFhV0s=ngVHMFw49o??AQRevkB6|A
zM*w#nVA~mqQaet@ao`E1fV=`FqXF6QDG`YgYBe2;rdG9as8$BhIRpn!69Z8$f3xy`
z1z6~WKPcmG%H}+%!WY?7Iq?sQ<mgdr(pO?oldR<jgsy<jgKUIQUD&7Wh!{Yy7PAwe
zs%|Yd0R#8d!(*f1JcSZ@tWeWGF+4uErpm9L&{{)tYy6~^ABkY`?U!C)fN+S)QBP`_
zzA4a-7`1##6@d3l0cv%4(|Rb>+89I_jZ=}Hh7m>sO^iJ2jUbNEvvS5FhcVHJw7)1_
z%^DaefuzM9A|4Vywi8=RY{zoBn46E6e?O@;5#40kPOW81fLaGAtH{X+;D~ugaKtu@
zc+yTS+cyLjpfRu`l)-SqBWLSo3+xGAVjE_Vqn4OCY)i;<voNnh@`8AwZ=;Syu1p@|
z!}A&sag{Bui6EYNiadB0cw_V;UZMK4!2~qQosTjNqHJf}`Kub)Ae&C8?ZGqt8oKlX
zg->b6A{1lY_oQiCg*90fh}@^Os$&a*Ne4jXnZrQay!aXFs}Yzy5a8HPZVo6h@MDDF
zJ#txoP-`YK<jn`Qu~r<x#|O2}N{L?8<Ur(5FG4rG^88IAk)0@sve7G=OHMkZMfryF
zS00Y=1<GkVI_jjC`Qa6VbxQ{w<A>vXIl&kB0Z-EFd^yDzD5!6+O8O>iqi@pNocE5w
zo;?q%p&okQg$mF?J#^MhAJRu|2q5S9a-OSx5<$LC>9Yt3*`IUf7j8)S|KiKP`SKO~
zTC6L7%LBtDo!x!u^gUlL@Z|@`!3&IB=8fs}BVT^v%g?qhn~ovxYx)mgesR*T2D<^%
z!*0Nf^cO?^+dYjgv0{`i;Na8vZgUG=*f|ByUphR0xUi1r6mGo5Eh6~YRD(Cz1K0}~
zV<%uQ_5k)`-(NcW{yr2QL&TyB<@=MBX|^VTLx;5^n)pVYSn}Wz?RyXOJaYHBc4|c>
zYtB`04$4C>X&dxJcIc@XgX@oK`Et)u?OO3}pv}wLQcbj%;v22I{MW0RHx9eaWbMpk
zJqT2yUi6kLmTQT{WpurX{W!`57el2HRH__h$#f4#2hTD^ib#c*(1amtjv<<w!fQGA
z;NfcyH8S)@_h=KIzLuuQ<R+~cT$yNXh%8fNiyQ;5g~Qd>))aZ@&GNIw!_NjMr05_D
zOi?I08Y0mYooJdVIx~nO(b53hnS2X${4gOx`CiZCn$6+O8&M7NHZItuCSr#+2i=m7
z(v$3@!(h6>O-B=l&iBrlKkurF1>U(83$B_u?<()2O7E1J^Qye_=Xo=#yw}WJIK!JU
z%{#rKeBq)66;<ByDlQ&7bDEW3?44VFt*Q_^e}Q-Y)TxUqXI4z}&YU-G=G2NR*0N(U
zFt9d*{b(iZ#p}uUUZ&_R`WT|GDf)^2{6g7gc&)tXGNUlmYo}?!kxcipBh8ypIISo&
z5G$vmYGE6%cV?A0qfP$woKRVnw-WQ2!XYFmC@4b7-1$`tz0)e@RRKzy0%TN8oxi|J
zn=q`_D&Jy>7-+%{22?%9uc+a}1~TetEaMHwwRmS42X&Xx6#wb&sj~LC*4{Hn42H)M
zivk`z-4sLNc;pyDTbU@8KfDDta^^n}ptd|K;^p-xv^V^(-{g?PUel7r07NR<5(78B
zrVYhTD4z;MRkg^^%afHoL=$=FcNk%Odk`8E+uW8ZC+-!K<klNCue|>&ku7_56P@Mq
z8}$g;_;oF15CjhJR0lXEzemr!lg|#mgJ9JiG^_5QV$>b2(qmB0<_I>YBM2YdTF@18
z>FZj0dwzG`l6W@YSh)k6dL3*M_re$u5|yGMIF;jxqiEPod4)Xry4F)nkTIvU?6zEw
zrF{fegzL3agJb^NE_k%DbCVCdEBvQB#AkA!mISk{UO)D5d!SeKv*VPOR=|}l8EyyU
z+{aY|OB6H14V3pDO@un5c!+<Tg6dq?&(3=NxP#3E?;Ie8UvcHn<nVa;oN(?&LW4ad
zG{~pxH4Y8Z!YiM1vvJBPAmTQhjtK0fry0mzij?b5Yu6RrQr^)S?~Hfgt0nF@_a-u)
zp}5`rHe5Vj5DMVcw+Q_Jo@_}GBWo#v`?k>p+4~I~&bQJr$Y95zIh}yAa}oikudAVO
zMA<gTE%R~wM*!E;aDbMg^T{9rb_qM6i)O>5u>gnp8U%YUL@6Ropf)W+%7&aZ&|6Ef
z-WY5ciBCgsELB)|8Y(pel6)v0>h0g?zhKc9mslP~0I=XcU1xMzSYSW$GANdeckdak
ziNd?JWbXhW@77zg$Z&6w&}Fzc_5#o1rT|xe72K8!xo#Cc_%Ev<Xt_npXxteS<1s~K
z$pdG!sE&1X^D!oWu3qbiFaaRWIJkDnKp#7yu93DkwGIPpg}Swf$?*5#?SsAWU4X~7
z;8ZI6`#kIg#9IYD_}aKo&&IohY1=Qoo9&I$Zz)Hb<w9Oa-<w(&xYvJC{j;%u9JU+F
zZ4Sss*&N`Ep3!d@G1n+l0>}OKf^T{O#@7)-6EC5z)+3U%zoDK(AYSzc1hkMffz1e(
zwhv5-K>+Up*rgoJ+eCJLOB;~SjxcXWN4z5)q{sr0lc_L0&IuyiOA+7@@s6H|zTJN5
zOla`cZ)xp)6NHN@1){+P%!2~a=78HD$Qli8|4v116QKX|0G-eq@<3Zmi>zOS_{LJ$
z9M)odabTS5P^v*Q>7XWOQ&Y+%{SpiVu$Vyay&E(EpZ6s|P#GDVqag=sOcP!F0V=6K
zXB>e{J@WRqwV8_W6f+Z1@CQZ12^0ellSd8NVp8M6gBpkJAl0CHf!&p3P)zN?(6T-l
zN(mDu&Q%7F5;kP;hRL>dg+-*rUIJ+hfm>J+MgkJ4!pDR$GzC_h#*@2lzw|{4(=dnt
zS8G)GP!e2WP2mb_4GDq~$;KOw{oVwMV3SFjq6m%q367LMDRdu@CW7OGHUI-UFFt=2
z4hMa3e4R+g$BxjjQ543Pv2r%9EmzLpg`9fKOf*y7TGDZ&U>AZ9!8$N?P+BS|ng4{s
zK@_3$Ap}Ju1A$NsMlN|*D-<i`?sqjW&WY3SYE6AypUJ;1P;noe#Rg1!scPB|oHqsG
zjli!UmZL6z6yhP&W3zHkyb${!(8CK9Px@aJ1<F|U7YC|tv#A#d`87dE6u~*PM_tB*
zD+@S4#Ao&{FqsBeXkTyqt}qth=L$mTf?oMdGqN&mwUl$&i6A<v=GG}>ga-^CnHsR0
z#fc!G-Iyp3MO4V|-_w$OgLzf-ZPXk4Pux;VZPq_PoocDAC(k%Q?aFK@&L3H(bFErx
zKeCJuB%x9cRIa5CI3x;cs8D~>R!3aQ8YjAd+`E!TbfY$+n94;7O%*-s&9efpdiof(
zj7EI3B^ufop9_!>t-*={>|kbsk7O0;KwHPob{<)$Xr~%lv^UK@qn0{b!^0@~OB0#a
z*utYUke5|UMI+0cNOe)EXh&!rYJFT1Z6pG4QE(=S_Ds2%+A}PD3uU8m*NxCqDV+yV
zOWg+DY!hkGO>o}rr{Xe?iZR)KX`1D)w>DQBoPs3!kxjsA0p2%T45JmI6z`6Vpp{}Y
ztrlbGE>T8Xgl{}OB-qno>F6%-fSp(~W>=2@Jr?0T5?n{ldkQGB;lGCvV^9vFVBFyg
zAr}>Ix=71ye^4^NG7Z@=h{WkYA;WuYKc%bB*)VSGGXm6OaCTPlv;10xD;s%ke!W65
zlAd8kvWe1plKZI_LKk{_`qWTgR6#gL-x}(N;2Qbx2U>xz|8`2n`xu@wFq&9qHc2{j
z9I){k>c9Qcu52b(U9p`am=8!y233@UUrm9leH!J63V6k*!}~vzCWu*hHFq}MAm-9d
zVjkTgu0|}*0(>`NVHio%v2UMI?CAy=0jkM21HduAAinTP3Yh-NG5Z|I03&gpX|TCk
z%)!d3K@Q`sDLMY4k&4N$y2$>ZZ6)|!<S*WB`<)UQjgZVOG~$?Kw1aV`JZB%0lUz#!
z_R~PNiw_!5w;Po?TwDtm+!8GOQZ;0Y&wCK7vk%Rg^X^yNlizE84skA8v=g=twLj(4
zi)``rq{Q3n?G`&?RX`4?d<YRe|CCR}<?_jYX^eTlfDv$H{$$+U0Fw^IdQt<6bR<IZ
zCzY%#icLXKU5Y&Xq1N3u&;_slBYb((4RL?F8x#rG`(S!J0$6`C0z~^1Uv?R^n`^<}
z?xJVh`~?lJwTCY?{1}J;LJ>Iw&%28~@9L9m`{{r|&%1FP18_C_-QzgCxeXnL)z%F=
zzHhseKhc)V297pt;7C>m4!*y_7nnGXvG@HLop7VVYxq7J1A3i52nPelY3>oO_iDCr
zxadtD^IHb{+uP7PcwfO_BS#y0pS}7Ya1StYw4o23VLQhJm-3-I=~Hh1nHzq^FF5t3
z8y_8jjl)G>xgqa;&23@oI7Z((`K@D^Iq)GguJZ$5e#F~yPPS~|g$lm>VkmottQ@bU
z%I?qLwZZE?)^g=r@ghb(_OUildal*_%SjtVW2IvUrk>N<2{+=h*&S}spMeL?Yd;F{
zd|=(DTBR^9s;|%Omd?+$*8f!M4o}pQ<$%w%*tY5ebLzt+;5*8({uevXC-D2h@Q1^W
z0@&<0Uk0DKuwAC)w=mhRVnX!#+YpAy@ziK5-0G!d6Yg^p_7v7Wn|n-w5FCTu;<2K!
zDdK3kEFY*PM_GPxG)gofEJFk9o;oh6$Fy=-PZn2r=i@ea=F)ODrFiF0_nIP!qkdBm
z`ivB0-Dt}suy6*90W;>$nWkze3kXIOwt(>Ho6|(LqQD2=g5TzwATkWmQkH+A#YJX{
zR(Lm0v{tdl{<S?UGe%QJ!y-Wm!vGc*gbElIs=SM<+IZ(xT!W;tCSWQ#K$yrCZP*|)
zlHc)LB@cd~jfynrBI1>U!OCIdiyZl-=Dni5=wJkmDX^r7&IZR97m6-0hH$V#H@+0(
zb1|Yjj1y#QmzQS*1D3g3Z;lei%ZNt1y!;M%Tn4*Eq3DOWWZI%$*bZFyQfse4wMXDZ
zd-+whX3D8wX}9|0Zs=@AsJT>3<9A!+u7EAymfEVf|G`<A^0mA5B5$LaRlO^wEV`=C
zyz(h?DyHGXQ<V#5&ReMBO9srFK3{(Mm9`_&tXf#UuxjL@xpT`GERk32MWj>nJ)*yS
z=WCdtl{xF(Mv?V88Z}ZL^6OpX)Ni$W*pd(<H>^Ru?C-QzzBb?oIAm}xxxovEgBK++
zDsfMSm0&c=#vsi$f@Cn>GNk$S29|oGP{!va-qFVsyH;45u|qltzC6RhX_l_n568j)
z#po>Q(hmMP1*L+9O@R8(K@Wt`1k03O$e;4n@y%j{ojL>!&D{%u9n!WJ38w(hb>Z~K
zA<0Ku1YgXt+b?}+8QUkxcHe6WzR3W`hW6$dBnKeqxVKaR2z+i727Bieha&K>3n9$U
z#z5vEZH<x3Sc#v995@y)0fdf_1o&%Jw7~+x>kk>wq!~F2@dLRACvuoy@#b^BJ-e_Y
zA^-nBPXmoD%gYD=8cAL(MLrf|GV*y|-tfH4dh==@nwJl&u~->ocwKmk^(V$T92gs(
z7%MhBF>7uOCUy-F0Ct2cuModD$odDq1%TyYjB7ZtLW?{O1GByr=Cj0Kj$z?dW@8D*
zrZ9ovdkH~c_u<Z~Q3Qf%w2ADtCwWovp5<N$t27vkuU0)33AO4mLvpb6wdBE<18h&?
zB~aaxa(Mt&SinS(=47Zu<vjl>@D)r`2;)<~H5Qel75FWzW{MDa(TjouWK2Yo?z%`O
z9Pv#!fK-QCusk?YHmq@Vm}g=kO<AWqcWW7{W~yE+qA?UiA5*qvKwz}73ya&U_fTJE
zVKojiT>N&5;gmJ&1iS`o8F=ad&#ed8TW_caX)!|#s6`g>-3;IyVqmQizhsYBt|8^c
zlFJf@>L4zt2a$c4)*b}Yv>L-q<$#<|wEr1O+DJ9{Xw`m7##mE4{LLhc6yHf|>S23E
zruCV|8fuP85LNLFA&8^!sql9ieY}pD_)O!_;N<o~sU;4i4K6CeI)ba3Rm5tHJIP){
z*}Su1IsxdMA|scBoO8i<wqH8FpW2jV<p!v2k&fOxdG`AH5OaWhC=B=(Vsl9TS(vjA
z%g2+pMBJOP{cb_H`>hCYUx|Rv+YqjOI~0LCV6s>ZW#CS_U96!y#ag;o+)XuNJ?#}6
z6r$MUwFG|1+rUEDMd?CBMg`8~*^pQ706}v=S0_bHI=pxb5KzH!P4|*79oCO27z8jG
z#4@MWU?=kjJ1q`lHJFPAOn%tyo&KU!#a$2z(h;9TMBSAtB7<otKS)FTB{6*RvEZ_m
z75CL!S$xtdw6q*#q3A;3D;R2NPiA`xZNOg*bsSrNb=Y2^Hry^G)P~Das|O`QEsSm}
zv@@zZDQ;#3x~uxjIM7`+=v-uEdy1eE`dD4fg)+#|J5cnxLLua~-D;_LWSN7L-Bqp;
z2W=0f&N!5#Ds)IZMLYQGS|wImeQB#?rD{HZ4d?O5l`U%d_De%8a}+OjOH7c&BZ%;Q
z)Q8pHj+Nd)UBqK>2tQ6oAd?>xPtj{)S9oP7g5`7Q3BRCxlUhXwR`YInPWZs+0@UAw
z^~Jlc@H_Hq+i(o*QT3Pyo(wPS$)L4}ml1Dn!k6g-7r6uh)KEP2UOXAJ5r!PtX24x9
z!LkS=1-LDj@G+txkN}?(uvQ9O)>3aA{4#2f%ebtlM#S^8Y7YNovYWww1=JEB0(gVK
zQURR*uyM4VxU(BP#JCDkb*nN<Fom(ni2rztF?BVl3DS8eeAJH!!fX@EW`~~D*pj_G
zy=zri4wTVqn$yA4-!njE46LC+)uidwl*BW!fLccz`*OQL+9&~c?g@|&qg;dWdKm8P
z?`08wD6acOwD4U1A*9~l7aggNB&v>-F>P$h2isJ>`<sXgocl$)6)v2>Rex)JG~u{s
zr|mh2&a;FqOV|SU2-_v=`$_zDkPcQ%&MDn?V~q0be(}JT;_uP@_hfx@9{+SiV0paF
zWqn&daA$(8BAEOo(H0qJH8<MWnpz*E4`k)r+6OC?x3_H!e&FNpZnhpa6*+(3+s%F{
zv%y<}Upw_hC2z4UA+W56ZFcapAz$~l?Xv!bjlcr~ZPC{EC^4e&UxI3eXkPo}p-EpZ
zwtX6GdUBcV&U9;ZlOC|0v#ZWWW^IYM1>Bu~*5{l~U1M_x=5Mn-5R7Pw*=dUl&E#p@
zb;0PR#}3-cf*(C`ylnFaC$;&sEh@t5V)t3wlwim4pV(dtBH+tUZ6CF=D!gjy*9Yqk
zOw@n0KF<;;X{DcxR%p+=A${CYEauTj{@_Z6-131QEzL;X6&T-He>u_ulF?81MO(=o
zWA$x8e3f3IF9?qQ=v4h_Yi$G9P1i35hjHBu{byax-*;cM{Q!0Y|1!f^l;<jRx9t0g
zEh<nwSASkpAq34Xwdk@M+hMhA`iZTHJoS^!6?mjlzu$t%A9N|OMsmXf{nsGImRzGR
zOt5l)SfghL2j!LeiQrH#Y|>+b6R<y_Um1jT)suQBHN1OoJ^$IykQ({dRSj?JQ+kWQ
zzn|0{4xRs17x~jOddKj;>mu{3^&-`stgP1CSgD7qb&ICu`_+0^D=+z3eT<b__^e)G
zr9OOCAFD85_fNLSo_q8GR_R@P^kmK1+<*10jpe~Ty7h0t$n%_A(voxir*aZy>l)qq
za*pg?gOV>YRY|FC$i?B3)vBabu*8&0_JrDe#3dhRTP4=N?BMZogx%xIPxg2VE5yHy
zFx3-nTX9t$*S@<jRu-SpO;vfQIbrS_sKmdP4J!NEb7i?Vpj&}=^1}c+yr+XKuhm=n
zON!!U+nOMd)a-*ld=^8*zpMs8zUsx*J6TxSTnhm4V*oZqzF8ZZ$5)&iJzkbSr?-$f
z{2J@2Ne;Q{y&&x77dOB4Zx9myx;KFR>;%=uKXFQ$eEy|i^{0lHuHOdUz&|UJ%xbf3
z#UEE%)wj%W`Y&AXmht;SBgol@5&SYGRNPoD-=_z^>9&@Oe#Tp;Jh)T)^h|3ee{%M1
zGp!bbm&MAV`*jQXlS<x>JqyJ1k0abef7@2X&sEj$UhMMUwJ_fQ(E^8@dptPmYo6bb
z`8ZneuQ=2M_iZc2Rt8(8%C9jhwPvr+o^m1sreyxLYEXM(71u6QwHo|`8Vzc_vJ^{@
z=f8e=qP+B65N6%KnLr4rs{7lv;{D~Q-K%b~8+?bjt|G?zj#O|z-W;uJTd4Z#08q8$
zag?-`oz8?%KInPm4tXBAjqiRtBByka&>Gv^>Yra?5<WBa$Lm8gY~7%bRlznf@}1{H
zz`kW*KfmNZeLOj<AsqgYadU73{^d1*)9NUyC*q65jpdSq!L@7f(c%WRHy#hxcFN<Z
zZO!cbL3G>wLsjzp8MkbDC<tfAlDpdA9U6X*1|FHZzilhTIXV20-c7Af-A4y6A6w$r
zTr1oE`j3V@cPK=P7Y?Dj9lvo=OT{1*6p!}Kjay87X$!xcx~{Mq#X??vL~qQWcb0#<
z@ovZJ!@=>@s|JBA^cPLv_Q-6c+E`yPeD`pWHX3}zFjO7)Z7U|$)mL9NjF7Ef4vn|R
z3mEUN#4y0PdR(o|X_Vd4ip$cnFI)R(s@6ot#=~5TU&9$4Pz_RLNqc;2vek>Bbtrxj
zz>>Q&uvB@)aJ(J2DqQvem#rUem93qKcFcGb=IXvtcKIqbE3@k3gJtQe>i0t$y><o8
zeEzj>fPhBjT(w72Cfa%h5CubjjMja!@`#=wqZi{9zY9l#2-@(Ho-BT@S2D2*U-zng
zDMUn-6(4!P#Q4`S1Pu3WD`wB-KI0i&bH(^9ay$um+8dQ}-BCU59~;Vk>+zbuAl>t?
z;YNtKnp@<^Nk@YnkC>M8`(W@P{&jB9@tEtlYCnY%Tlhn31R=BTZ`+FBuUA8m{M9wt
z?Bx*6eX;x0W^~TKh6C7jyH$Vk?fAUJ@^Jk+keAWuru97i#_AI|@<f>2&c2N4p9+@2
zd>AX6zM`k8u{QWrQ9ZcZRt%LEtjbsP3{^G!`5tB(4XVC!q^?{3cSYozuY?Hjt5?wV
z{m1IcrT0}mS;1@YZIlM^l3xqfZ-v*M5>JO<ZAMGY{5A|LLVowNaLA8d4Q;FMxWe}r
z!WHlhpJPEBH+ZYGL1(xBXw{$GT5E>)OC5FGyzULn%a7OHHu=MGu$l|UbnD;3<5w|d
z)8l%)=5(o|4Ia%cuR0#=z3#=x%Y}eui58@TsA|z_yAzm1llGjQEGuUq?z8elXoAbR
zY+_wmjC}b-sNIKLHXffD54MYu7ZPC`&U-C5hPu}X2#RRk-?kM^28J6Y%MGY+4dlt!
z0B}rMxGYZo8ZL`GiL(2a)GI@%f3%!&GT2%8^#e#f4Ufz!U&BU?IIPtn+k_YKTL!&#
zUBg;-+3ocp=x_io$dChvLx0;=%-bAp<dVxLXi*9~iq=Av_}(=9!2D<6!&akn9fk1^
z)=(FVd-;A}8EoeLMoX!if$FQyr*`>v&N&k9%Ol?j;i^vAyqufe`EkAa$9~ZwWs}n(
zQc%alJ>PPL#`t%7T$dje7jCI>b_VwO*YKcyE!ZgdFBe*DyiPa0oI*!lvijia&{m_B
zd~8uj_8VBL+exl?M~%TF=e`kKq!VA19Nym;W-0s{65CHARNX7rydSI{H~ZWTC&A45
z*N~y(Xi(4(4UCj6&*&|zj)$H>$C)XrNJ7bNEGOsN47o8Rxwp?by<`w14F2Iggz(?C
z6<4NOjSh{lrOGYsZPs?Tphmn2sDHK$L-oolPKD4to6AbF!et2(YPC}?`!H1D7+3gL
zZn#36+}aP&<K7Bwr|h>-;lB34?g!eE<ejxaAtF3JBoe|8Or5(Xas4E@_pM;n+mB9+
zxDk}kzlJjOXThp#SK6{<tG7el7Qc;dk5^b=P?jvCuE)3zyb}a?%Yk>--vMC!>k{sp
z&(aAi!;PY3@q<|TU*FbaEL55AFgorGSFlSc`cZO8APDt8=X<|>7>mijhN?!R4cw@|
z1?Dvdlevj=noAS^)`r?ZNjLs$HEfk^ue|J_d31-><Eeu-^B<el?yQ*A2Mf->hKp|7
zEq{0ynt#brEkYg)b@NT-kGY3XmwyEfX1-}3*YBkUb7Xku*W!U2{^d8QwezLmVDQZ*
zdE<M*!T)?~YW1Da$N1M!X^cG{uJ4j>qrQcMZ{7nAK09HRHTulvk!M5tUAI?X{5TI0
zCj4tCTOIw6RsVtsxh}kx|DM*-^LP|gCHxu+9{q$|cUDiA^IPdI`4zUE>U+Zn=(<}>
zxG1h}0qcw=m$Q$kW>$TeQBtiL9Aq%f@~rHO;q272$bO`RvoqzSvqAjy9lK$9OUR1+
zYlzPwy}4?M!g{j#@raoaviR3<CNB<_Kb#F&AKHD0F0LKMIhk_zbiMwDT&>FRs`**0
z{Gkfn+KUQ$16xglHLV=O^_$C9ABC2u_#<?&pp1u>q)at(S-4Dzl|8SJ<sa*5^7uy~
zR`3BgxlU!blx-L49@+Ne;OuX^@$J$)Y;pc&HRyTqWCjzm27C<p_d7ZHV=P(IyYzo5
zfyuYOELUM~@vkAAG&tWs*1a<CoStYc&){<y&bK#N*_Ybrc6sNy;BYRu``qLKq4KZc
zVrc8FS>-(e4jeW()EhBkFhhULsHWXxVQ9^hx~ow`MN<wtj{$70t6<1AJN3lWHdE)6
YS5=h`vO5^ijreaCmioK<*s<}y0ERHh(EtDd

diff --git a/settings/repository/org.broad/tribble-110.xml b/settings/repository/org.broad/tribble-119.xml
similarity index 79%
rename from settings/repository/org.broad/tribble-110.xml
rename to settings/repository/org.broad/tribble-119.xml
index 84a550b27..08037b20e 100644
--- a/settings/repository/org.broad/tribble-110.xml
+++ b/settings/repository/org.broad/tribble-119.xml
@@ -1,3 +1,3 @@
 <ivy-module version="1.0">
-    <info organisation="org.broad" module="tribble" revision="110" status="integration" />
+    <info organisation="org.broad" module="tribble" revision="119" status="integration" />
 </ivy-module>

From 73157ae3d38e1010d13c5374d0fea3e249b432da Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Fri, 2 Nov 2012 12:02:59 -0400
Subject: [PATCH 048/236] Allow each pipeline test the max of 10 hours to run

The runtime of these tests is extremely variable -- sometimes they will complete almost instantly,
other times they will wait in an LSF queue for 5-10+ hours. Minimize timeout errors by setting the
timeout for these tests to the maximum of 10 hours.
---
 .../pipeline/DataProcessingPipelineTest.scala |  4 ++--
 .../PacbioProcessingPipelineTest.scala        |  2 +-
 .../examples/DevNullOutputPipelineTest.scala  |  2 +-
 .../ExampleCountLociPipelineTest.scala        |  2 +-
 .../ExampleCountReadsPipelineTest.scala       |  2 +-
 .../ExampleReadFilterPipelineTest.scala       |  2 +-
 .../ExampleRetryMemoryLimitPipelineTest.scala |  2 +-
 .../ExampleUnifiedGenotyperPipelineTest.scala |  8 ++++----
 .../examples/HelloWorldPipelineTest.scala     | 20 +++++++++----------
 9 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
index 944ef7977..60c9d9a59 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/DataProcessingPipelineTest.scala
@@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.BaseTest
 
 class DataProcessingPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testSimpleBAM {
     val projectName = "test1"
     val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
@@ -45,7 +45,7 @@ class DataProcessingPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testBWAPEBAM {
     val projectName = "test2"
     val testOut = projectName + ".exampleBAM.bam.clean.dedup.recal.bam"
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala
index 3e9af3e68..dd07cbfdc 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/PacbioProcessingPipelineTest.scala
@@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.BaseTest
 
 class PacbioProcessingPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testPacbioProcessingPipeline {
     val testOut = "exampleBAM.recal.bam"
     val spec = new PipelineTestSpec
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala
index 92c40acb1..6bc6b56db 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/DevNullOutputPipelineTest.scala
@@ -53,7 +53,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class DevNullOutputPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testDevNullOutput() {
     val spec = new PipelineTestSpec
     spec.name = "devnulloutput"
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala
index 9d885dda2..f52632a7f 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountLociPipelineTest.scala
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class ExampleCountLociPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testCountLoci() {
     val testOut = "count.out"
     val spec = new PipelineTestSpec
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala
index 1b965d8d2..c23c12719 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleCountReadsPipelineTest.scala
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class ExampleCountReadsPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testCountReads() {
     val spec = new PipelineTestSpec
     spec.name = "countreads"
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala
index c6e4c3507..4ffaf7b5c 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleReadFilterPipelineTest.scala
@@ -77,7 +77,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class ExampleReadFilterPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testExampleReadFilter() {
     val spec = new PipelineTestSpec
     spec.name = "examplereadfilter"
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
index a9a5928fc..0215a389c 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleRetryMemoryLimitPipelineTest.scala
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class ExampleRetryMemoryLimitPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testRetryMemoryLimit() {
     val spec = new PipelineTestSpec
     spec.name = "RetryMemoryLimit"
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
index f6fcd7c12..67ac68c28 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/ExampleUnifiedGenotyperPipelineTest.scala
@@ -29,7 +29,7 @@ import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 import org.broadinstitute.sting.BaseTest
 
 class ExampleUnifiedGenotyperPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testUnifiedGenotyper() {
     val spec = new PipelineTestSpec
     spec.name = "unifiedgenotyper"
@@ -51,7 +51,7 @@ class ExampleUnifiedGenotyperPipelineTest {
       Array("vcf_intervals", BaseTest.validationDataLocation + "intervalTest.1.vcf")
     ).asInstanceOf[Array[Array[Object]]]
 
-  @Test(dataProvider = "ugIntervals")
+  @Test(dataProvider = "ugIntervals", timeOut=36000000)
   def testUnifiedGenotyperWithIntervals(intervalsName: String, intervalsPath: String) {
     val spec = new PipelineTestSpec
     spec.name = "unifiedgenotyper_with_" + intervalsName
@@ -64,7 +64,7 @@ class ExampleUnifiedGenotyperPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testUnifiedGenotyperNoGCOpt() {
     val spec = new PipelineTestSpec
     spec.name = "unifiedgenotyper_no_gc_opt"
@@ -80,7 +80,7 @@ class ExampleUnifiedGenotyperPipelineTest {
   @DataProvider(name="resMemReqParams")
   def getResMemReqParam = Array(Array("mem_free"), Array("virtual_free")).asInstanceOf[Array[Array[Object]]]
 
-  @Test(dataProvider = "resMemReqParams")
+  @Test(dataProvider = "resMemReqParams", timeOut=36000000)
   def testUnifiedGenotyperResMemReqParam(reqParam: String) {
     val spec = new PipelineTestSpec
     spec.name = "unifiedgenotyper_" + reqParam
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
index a43727ba6..50fc529dd 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
@@ -28,7 +28,7 @@ import org.testng.annotations.Test
 import org.broadinstitute.sting.queue.pipeline.{PipelineTest, PipelineTestSpec}
 
 class HelloWorldPipelineTest {
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorld() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorld"
@@ -37,7 +37,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithRunName() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithRunName"
@@ -47,7 +47,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithMemoryLimit() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldMemoryLimit"
@@ -57,7 +57,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithPriority() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithPriority"
@@ -67,7 +67,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithLsfResource() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithLsfResource"
@@ -77,7 +77,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithLsfResourceAndMemoryLimit() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithLsfResourceAndMemoryLimit"
@@ -87,7 +87,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithLsfEnvironment() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithLsfEnvironment"
@@ -97,7 +97,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithGridEngineResource() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithGridEngineResource"
@@ -107,7 +107,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithGridEngineResourceAndMemoryLimit() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithGridEngineResourceAndMemoryLimit"
@@ -117,7 +117,7 @@ class HelloWorldPipelineTest {
     PipelineTest.executeTest(spec)
   }
 
-  @Test
+  @Test(timeOut=36000000)
   def testHelloWorldWithGridEngineEnvironment() {
     val spec = new PipelineTestSpec
     spec.name = "HelloWorldWithGridEngineEnvironment"

From 0ab4022f2301ffa5b68346835de64a5992356018 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 2 Nov 2012 14:30:20 -0400
Subject: [PATCH 049/236] Final r119 tribble jar

---
 settings/repository/org.broad/tribble-119.jar | Bin 319935 -> 319935 bytes
 1 file changed, 0 insertions(+), 0 deletions(-)

diff --git a/settings/repository/org.broad/tribble-119.jar b/settings/repository/org.broad/tribble-119.jar
index ab456938aba23540eb8aedbac454841f220c6bba..c74bea398312b68b2f27a60b2061ef2ebf076ff6 100644
GIT binary patch
delta 2458
zcmYjRXHZpF5WeT$1*u9^bPS?05ersCQ9#<DU_=cXjl_V01&9r77&MxwnGsZ&r`|Cj
zg0ajfaWD_K3L=l9ARry-AS5c5k<lpDklZ~>X7U5Rv*ql5`<=ZnzsNGb$kJrVLJ5on
z!9Wmn(TOJG#k}amSxKULuD8SgeV?oUHMVTB3fce`uZP&C&p37(H=ClJGrV9h;$8=5
z2{CKpd8VQ!$gv;IpviB=!Mf<Ro>x{?Z}I>^@Bq1MlxU+_GSou0Q$SyvFwGw@vu);3
zUf}g?D=eTjniDt#G1zZ{1!7##J03q2d`?7r{8}H5Tf#nYf8@qb+zyY_aJzqV9LKn*
zc^oTt0LR+h;e*VPQJB@UNS&TKyZYy=>DoO~6LQc>8@qp*p80WMFiQ}uS)wqEoe;*c
zlbWtW`>^Sn!>+>+&z`x!N%>~fTB^2YM6nCHmlsD2LbITJI+}0Fw{Wf2*92mgHup~*
zr?~gAwJ{gvOxd=-y-Wqc%T&WEMru5sSZb?Y1)B00>-+U>fci8623#=Nt^jPZLfA?J
zwUUx!z#f~Z2iYlw*~{+3nZA6_ni*lJ*_s6!<6-h>R*wqGr$Dlot0S19@&Ddiah=A3
zhGO37`D`~1qgbwD@kn1uTf?66!_jbd8T-jDQEWCwqNCUjR8e`lgRR4ZyflW*LdTI9
z=EwUt{KV35N@e$9R)s5~_?=)+3^7+F>x7Po6c)=<dQ#aQNA!eMvJ#X^ajYK*UAfOJ
zY|)ednsrHBk^RoCdVx$30vReYR@OO)gXF1Yps#eiW2<p#6t@q|-kc|!z0BV8QQfzX
zph*n2krPbBamrO=(U^D1H4|-68l_>Bm_CN*&wL^DcbN!6mx){*3jBhFa>Gk}!)4A`
zEV^NS?_%*Ie}LyJ#X>$@#(L42m+ILedgEuKIGz&k^DcVHq9Yg9>8e=4pG3?Nn=vG}
z=ZkYOcZ5zn{TUvz+Qmj2JRIv2_3?wkS93;3B}fNpq(>=d-FpQ==oRP_()k-G`-e(~
zeDb(q(guEbZ6gh_#*2EBrP26hDGt-6nLHFcLrUUr^vO-~=FjNqDP81;CqB|OPCD6N
zvY3SZGj>Wd@b<%<(nB7ph?S0yM$hULshfK|&r20tjUm5D!>H9YX)m%@F6B!>e1)Jy
zI>~z`)kx-CgsW{*7{6}SEmd(|5C4!JIN+@k6ZnDWEU<ysTvc}ms5ZlpnG2Zn%46rl
z0q(iD0*HG+5Nzi(ibFxm_bO&R^z+Iw8=()0UD~z@BvbtA$NXbIH(p($czIN}WGh$y
zB^fFgwn8$d<1*XJsZ+pE-Wmm*b!-&8=acQ)1v~N6S9*_uBkyJ_LnYVl#V=sV8A(?l
zgx6V@4s$R!Egb^%vGB{^!A1Rfl_zPz1MSUPn2MOH1$<<=Nek|XHkq&zF)9-_Ahu@0
z*NDzpupIGd7A!&R%z|$a-LhdJ;*o6fw`IduXgeve6!B*TzC>(QU_GMub&4llr+You
zArk$|Zx9yVpu7n=uonHtav&V>eGc(in+wrsH|9bFqW4W$X7JySseJY(e1~4sJlKR7
zn@3qac@T`Y-!1S%JaLPrd3FoD(4LY{(;Uqom@c1K&(J{-#uIcDf1xAxz6GRGY619Q
z{80g{K%7@dYE7zvk#cq+Oh9LMAtk#O!MA897m?jpM8yM(iCkteEWmhwF^#(6HiV*`
z(4iLVxeXK1u)ae~qVK>uv|H}LDn#cJ@~ezS${8hq+bp+~kcjT3L{Bb-#pv%TrSV+v
z!fLdW?vnrcT^i4)j2<Dqj2>lpIfP)|_vI9?E2pMYDqs=%_f^m&trf)F=N^Qi|MESW
z%(xP^pdDLD&+@F2;!CTDUse?@gIG;Ng;&!sCDo*WDyJ{r-(Ev~YHEnb_*%jPwN&qM
zE#-evN6&Dnj(8i^lk(y9G;e7=@fzPiy8PGx-dMk+ftJhlKGjLPPxBZxQeJ2y=~CVZ
zftYXCM7nKjqCRy^um=6k&D8HyGwJ-anbys-h5DXrA^rMW=w3)G&6PKRBig86bQ{&F
zYa7tJo#r~yPWg}9iHB<k^-(uvKK5_!fQ^XWoup4vC-r|dVEaEHJM970XI<cr`HQ+B
F>R&Ah+~5EJ

delta 2458
zcmX|BXHZmE6n^);LtQ|M$m$S8#fSyFC<=r&AQ(}z8jZw&fCY^W1jV4yM9qvM!mRO)
z!2-rIqsGB3@D$6^6cwcRz9b5k(a|8*klb@G$q)EWyQh5Tyww-k>5J?v7S5BvTo6nJ
zAt5NuVyqY+lr}S6bn#28b;kdFpVI$YCo++OE|er2A+{P5h8?OVOVsm*7FZ$fbaj^y
zbH<+`6t!V){iudbd?Q-9_(l3&Twb}s2L!<fWW#V#tz9_8R(4jvSeH5_1TeC7#t@dE
zch+D`pfjJX8H5-fJkA#JVAxyc-xq#bM15>z0K-ku@0or5+K)_+S+8Y!|Hgw155|7R
zuq+-JR&S3nv560g^lf+<(wT202>2JMHEeg(M(iD|OWM8I$R<UwB8LTgk}5b5tuTfh
z(Hi#cqV(ui?)P*+Jayu%En;9+wH<9nJFM6Pds<Sw&O{vTr!6R*it6jqO^mD6RT_-a
zUH>y*q;B2LZhMv0XiCb7`&<6@vlIkBOD!pL)cQQO(^b6ESTY~!`}IvIT}`MdBTRNK
z0Ci3TS!PNzDS4)(+ky)6^pnIz_8v-%<p=h}99>QJM597s;s{cU49Z7Aa*-?75i`~Q
z`=)NsDI{zN#vKbH+mT_#ZaGOdK_PP`dBTK4G2{Z)lRaX|EcC?1l6YiMc@j@np&&0x
zBs0-)Adv*K`gK2&Ok9MreLtzd6H$VXkjG|pZftfs8rGd8Ni3u%gY0laOLRFYL8=tD
z+5y+adSdH@7JV-<NbH1mySEr=h#+VPGBQfecNI<K$@jrnX@5&r;L#{v?}>{I3$}il
zyW<1Bw-2CA3s=jj7UCG?vPx93D%Y(=HBzJ04HqwsWOUDXF7z8L1i@e-S4IJQV5?m7
z6JJ|k=UEFxFN(^G1>y%&6rL>;3oTK|S}nQ{LZK&K^mj(V?WkC9go4p=(ajcx>6gVa
z6$)aW*n}>*O)t(t!?1ku*e59Dw22LB6jJ&`V=D}yTXV-2%R$=9M|yPfl=llk5MBuU
z6Uq-URdx@N%-G}y9i%l(=v7OD>@kzkL}>(mS&HjaX$EtJPm|Kwjy`%x{%noDzS21+
zJPweqFlvs6NVXHOepZ4s4PQS@kRCEmS(0>k1SVc_QtDzB-!oDfV`I>7k^`X2^@_9;
zyI0QZr7(6vP$K=zYNl67Hta4hw@T6M`QR?8f?)8&Kcr4q6iO`Mdq&h;HS{u8y<MTw
z8eP^NV8b$x3W7b>*x0$HKzpaI&@kA_+9-|!9e!X+;%ew;nG@GSA2xP=%Lb4v@sk_*
zkIS57dPK?c@Gi+ouKY_fQ_gON;~0*|>>_7O0yBAYESO`aq*!>zCfl|R5**O-A_d%7
zH76O$S^ej~fF0{mx(E?0&#Fr>8)GvsL8!5q5G23+9X#mog*-tAKD2jvqYfq`X6OKy
zEH~=F8&RDN%MfF;VGUwSHhhKXo&!q|59Po@#Eu;J8qq5k<{=))<@VNG_!9N$3M@kW
zNr5jATNGH0=zo>_)35S(Jy&5p+Lv78Sa^-cjmv{bw5Q}j4C1>y-cRIph(o>MI;=zV
zzX6L)|NAkOPu+lTaZ1aZumLgYCXJ%la}&ZbI`|d@BObX$r=fH|y#;=#Ptx;g4(SJ`
z)6=f0e_B3-A*SY2KgH+yy!*fc&QeAJ1mOEe1+WzHvqH|+geq{9a|>Y{8oLU4uxAl`
zgZl9zuJ;wu<W$!b^X9UPVJ_<Z#eCE?w;>Ak)OMPz=QfN-#r_WOH0}<pLcRG8EJt)N
z;T_VB9ObMMaAnRCPNMf+-ky9H7NEW7E?<i0Jy?Ny`aN!cc8`x2aGx*X(tW;^p`{Rk
zao?3vKc%6jloq8psSM^L?keLnwUp7$sUGkEq7g4Vpp#KlmBS{)q;k3zicibAe^CYP
zkLo!U^cX0Lm2@bIF_nCnl1j<|)hV5USbu93XRNA<_c^wj<DP1sue+Kv_IV9o!}%KC
zw^=P`KBkt>d$*P|KDLhXLi7Jn2mYA9q>dk#XFac*S<g3N-oWFc8YnL`zO(@}h|Z0i
zw=IplPE8}M#60dzyx!3!enwB4___Hub3RTtbN>39={uSyqJ{H!6Oq0j*2?R}wbDG)
zU(-6kcN?9H>Id3*e0Lk~!?T^&p_dYb{-$<Vi|F6M_m$qk>%SV%Lpr&h*~#-01B77w
Id;`S(3mt0_!vFvP


From 2ec3852acd4aa250a8d024d861f713630c71fd5b Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Sun, 4 Nov 2012 23:42:02 -0500
Subject: [PATCH 050/236] Scala classes were only returning direct subclasses
 (confirmed when inspected in debugger) so changed PluginManager to allow
 specifying the explicit subclass. Removed some generics from PluginManager
 for now until able to figure out syntax for requesting explicit subclass.
 QStatusMessenger uses a slightly more primitive Map[String, Seq[RemoteFile]]
 instead of Map[ArgumentSource, Seq[RemoteFile]]. Added an
 QCommandPlugin.initScript utility method for handling specialized script
 types.

---
 .../org/broadinstitute/sting/gatk/WalkerManager.java  |  4 ++--
 .../sting/utils/classloader/PluginManager.java        | 11 ++++++-----
 .../org/broadinstitute/sting/queue/QCommandLine.scala | 11 ++++++++---
 .../broadinstitute/sting/queue/QCommandPlugin.scala   |  2 ++
 .../src/org/broadinstitute/sting/queue/QScript.scala  |  8 ++++++--
 .../sting/queue/engine/QStatusMessenger.scala         |  3 +--
 6 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
index fbacbddc4..28b5f918d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@@ -350,11 +350,11 @@ public class WalkerManager extends PluginManager<Walker> {
      * @return A name for this type of walker.
      */
     @Override
-    public String getName(Class<? extends Walker> walkerType) {
+    public String getName(Class walkerType) {
         String walkerName = "";
 
         if (walkerType.getAnnotation(WalkerName.class) != null)
-            walkerName = walkerType.getAnnotation(WalkerName.class).value().trim();
+            walkerName = ((WalkerName)walkerType.getAnnotation(WalkerName.class)).value().trim();
         else
             walkerName = super.getName(walkerType);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index 43cc800d8..b39aae8ab 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -101,7 +101,7 @@ public class PluginManager<PluginType> {
      * Create a new plugin manager.
      * @param pluginType Core type for a plugin.
      */
-    public PluginManager(Class<PluginType> pluginType) {
+    public PluginManager(Class pluginType) {
         this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), null);
     }
 
@@ -110,7 +110,7 @@ public class PluginManager<PluginType> {
      * @param pluginType Core type for a plugin.
      * @param classpath Custom class path to search for classes.
      */
-    public PluginManager(Class<PluginType> pluginType, List<URL> classpath) {
+    public PluginManager(Class pluginType, List<URL> classpath) {
         this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), classpath);
     }
 
@@ -120,7 +120,7 @@ public class PluginManager<PluginType> {
      * @param pluginCategory Provides a category name to the plugin.  Must not be null.
      * @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name.  Can be null.
      */
-    public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix) {
+    public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix) {
         this(pluginType, pluginCategory, pluginSuffix, null);
     }
 
@@ -131,7 +131,7 @@ public class PluginManager<PluginType> {
      * @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name.  Can be null.
      * @param classpath Custom class path to search for classes.
      */
-    public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
+    public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
         this.pluginCategory = pluginCategory;
         this.pluginSuffix = pluginSuffix;
 
@@ -149,6 +149,7 @@ public class PluginManager<PluginType> {
         }
 
         // Load all classes types filtering them by concrete.
+        @SuppressWarnings("unchecked")
         Set<Class<? extends PluginType>> allTypes = reflections.getSubTypesOf(pluginType);
         for( Class<? extends PluginType> type: allTypes ) {
             // The plugin manager does not support anonymous classes; to be a plugin, a class must have a name.
@@ -325,7 +326,7 @@ public class PluginManager<PluginType> {
      * @param pluginType The type of plugin.
      * @return A name for this type of plugin.
      */
-    public String getName(Class<? extends PluginType> pluginType) {
+    public String getName(Class pluginType) {
         String pluginName = "";
 
         if (pluginName.length() == 0) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 65abaf7be..637174557 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -92,13 +92,19 @@ class QCommandLine extends CommandLineProgram with Logging {
   private lazy val qScriptPluginManager = {
     qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
     qScriptManager.loadScripts(scripts, qScriptClasses)
-    new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
+    new PluginManager[QScript](qPluginType, Seq(qScriptClasses.toURI.toURL))
   }
 
   private lazy val qCommandPlugin = {
     new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
   }
 
+  private lazy val allCommandPlugins = qCommandPlugin.createAllTypes()
+
+  private lazy val qPluginType: Class[_ <: QScript] = {
+    allCommandPlugins.map(_.qScriptClass).headOption.getOrElse(classOf[QScript])
+  }
+
   /**
    * Takes the QScripts passed in, runs their script() methods, retrieves their generated
    * functions, and then builds and runs a QGraph based on the dependencies.
@@ -106,8 +112,6 @@ class QCommandLine extends CommandLineProgram with Logging {
   def execute = {
     ClassFieldCache.parsingEngine = this.parser
 
-    val allCommandPlugins = qCommandPlugin.createAllTypes()
-
     if (settings.qSettings.runName == null)
       settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
     if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory))
@@ -138,6 +142,7 @@ class QCommandLine extends CommandLineProgram with Logging {
     for (script <- allQScripts) {
       logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
       loadArgumentsIntoObject(script)
+      allCommandPlugins.foreach(_.initScript(script))
       // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now.
       //if (settings.run)
       script.pullInputs()
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
index 499c31554..eae6a6a92 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
@@ -6,4 +6,6 @@ import util.RemoteFileConverter
 trait QCommandPlugin {
   def statusMessenger: QStatusMessenger = null
   def remoteFileConverter: RemoteFileConverter = null
+  def qScriptClass: Class[_ <: QScript] = classOf[QScript]
+  def initScript(script: QScript) {}
 }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 8c834696c..eb8be183a 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -149,13 +149,17 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
    * List out the remote outputs
    * @return the RemoteFile outputs by argument source
    */
-  def remoteInputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(inputFields)
+  def remoteInputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(inputFields))
 
   /**
    * List out the remote outputs
    * @return the RemoteFile outputs by argument source
    */
-  def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(outputFields)
+  def remoteOutputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(outputFields))
+
+  private def tagMap(remoteFieldMap: Map[ArgumentSource, Seq[RemoteFile]]): Map[String, Seq[RemoteFile]] = {
+    remoteFieldMap.collect{ case (k, v) => ClassFieldCache.fullName(k) -> v }.toMap
+  }
 
   private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = {
     fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
index c4151dafc..a1133b944 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
@@ -1,6 +1,5 @@
 package org.broadinstitute.sting.queue.engine
 
-import org.broadinstitute.sting.commandline.ArgumentSource
 import org.broadinstitute.sting.queue.util.RemoteFile
 
 /**
@@ -8,7 +7,7 @@ import org.broadinstitute.sting.queue.util.RemoteFile
  */
 trait QStatusMessenger {
   def started()
-  def done(inputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]], outputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]])
+  def done(inputs: Seq[Map[String, Seq[RemoteFile]]], outputs: Seq[Map[String, Seq[RemoteFile]]])
   def exit(message: String)
 
   def started(job: String)

From 2c0bf89961e653caa8c21316db22a0edc306f3bd Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 24 Oct 2012 16:57:08 -0400
Subject: [PATCH 054/236] Co-Reduction implementation in ReduceReads

ReduceReads now co-reduces bams if they're passed in toghether with multiple -I. Co-reduction forces every variant region in one sample to be a variant region in all samples.
Also:
  * Added integrationtest for co-reduction
  * Fixed bug with new no-recalculation implementation of the marksites object where the last object wasn't being removed after finalizing a variant region (updated MD5's accordingly)

DEV-200 #resolve #time 8m
---
 .../reducereads/CompressionStash.java         | 38 ++++++++
 .../reducereads/MultiSampleCompressor.java    | 49 ++++++----
 .../compression/reducereads/ReduceReads.java  |  2 +-
 .../reducereads/SingleSampleCompressor.java   | 38 ++++----
 .../reducereads/SlidingWindow.java            | 89 ++++++++++---------
 .../ReduceReadsIntegrationTest.java           | 10 +--
 .../reducereads/SimpleGenomeLoc.java          | 43 +++++++++
 7 files changed, 185 insertions(+), 84 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
index 714a4df18..a6e5b6c5b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
 import org.broadinstitute.sting.utils.GenomeLocComparator;
 
+import java.util.Collection;
 import java.util.TreeSet;
 
 /**
@@ -18,4 +19,41 @@ public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
     public CompressionStash() {
         super(new GenomeLocComparator());
     }
+
+    /**
+     * Adds a SimpleGenomeLoc to the stash and merges it with any overlapping (and contiguous) existing loc
+     * in the stash.
+     *
+     * @param insertLoc the new loc to be inserted
+     * @return true if the loc, or it's merged version, wasn't present in the list before.
+     */
+    @Override
+    public boolean add(SimpleGenomeLoc insertLoc) {
+        TreeSet<SimpleGenomeLoc> removedLocs = new TreeSet<SimpleGenomeLoc>();
+        for (SimpleGenomeLoc existingLoc : this) {
+            if (existingLoc.isPast(insertLoc)) {
+                break;                                          // if we're past the loc we're done looking for overlaps.
+            }
+            if (existingLoc.equals(insertLoc)) {
+                return false;                                   // if this loc was already present in the stash, we don't need to insert it.
+            }
+            if (existingLoc.contiguousP(insertLoc)) {
+                removedLocs.add(existingLoc);                   // list the original loc for merging
+            }
+        }
+        for (SimpleGenomeLoc loc : removedLocs) {
+            this.remove(loc);                                   // remove all locs that will be merged
+        }
+        removedLocs.add(insertLoc);                             // add the new loc to the list of locs that will be merged
+        return super.add(SimpleGenomeLoc.merge(removedLocs));   // merge them all into one loc and add to the stash
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends SimpleGenomeLoc> locs) {
+        boolean result = false;
+        for (SimpleGenomeLoc loc : locs) {
+            result |= this.add(loc);
+        }
+        return result;
+    }
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
index 2c3439010..f348225ca 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
@@ -3,13 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 import net.sf.samtools.SAMFileHeader;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.SortedSet;
+import java.util.Set;
 import java.util.TreeSet;
 
 /*
@@ -41,7 +42,7 @@ import java.util.TreeSet;
  *
  * @author depristo
  */
-public class MultiSampleCompressor implements Compressor {
+public class MultiSampleCompressor {
     protected static final Logger logger = Logger.getLogger(MultiSampleCompressor.class);
 
     protected Map<String, SingleSampleCompressor> compressorsPerSample = new HashMap<String, SingleSampleCompressor>();
@@ -55,30 +56,44 @@ public class MultiSampleCompressor implements Compressor {
                                  final int minBaseQual,
                                  final ReduceReads.DownsampleStrategy downsampleStrategy,
                                  final int nContigs,
-                                 final boolean allowPolyploidReduction,
-                                 final CompressionStash compressionStash) {
+                                 final boolean allowPolyploidReduction) {
         for ( String name : SampleUtils.getSAMFileSamples(header) ) {
             compressorsPerSample.put(name,
                     new SingleSampleCompressor(contextSize, downsampleCoverage,
-                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction, compressionStash));
+                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
         }
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
-        String sample = read.getReadGroup().getSample();
-        SingleSampleCompressor compressor = compressorsPerSample.get(sample);
+    public Set<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
+        String sampleName = read.getReadGroup().getSample();
+        SingleSampleCompressor compressor = compressorsPerSample.get(sampleName);
         if ( compressor == null )
-            throw new ReviewedStingException("No compressor for sample " + sample);
-        return compressor.addAlignment(read);
+            throw new ReviewedStingException("No compressor for sample " + sampleName);
+        Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = compressor.addAlignment(read);
+        Set<GATKSAMRecord> reads = readsAndStash.getFirst();
+        CompressionStash regions = readsAndStash.getSecond();
+
+        reads.addAll(closeVariantRegionsInAllSamples(regions));
+
+        return reads;
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> close() {
-        SortedSet<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
-        for ( SingleSampleCompressor comp : compressorsPerSample.values() )
-            for ( GATKSAMRecord read : comp.close() )
-                reads.add(read);
+    public Set<GATKSAMRecord> close() {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        for ( SingleSampleCompressor sample : compressorsPerSample.values() ) {
+            Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = sample.close();
+            reads = readsAndStash.getFirst();
+        }
+        return reads;
+    }
+
+    private Set<GATKSAMRecord> closeVariantRegionsInAllSamples(CompressionStash regions) {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        if (!regions.isEmpty()) {
+            for (SingleSampleCompressor sample : compressorsPerSample.values()) {
+                reads.addAll(sample.closeVariantRegions(regions));
+            }
+        }
         return reads;
     }
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index b6761f4a6..a05992cb4 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -330,7 +330,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
      */
     @Override
     public ReduceReadsStash reduceInit() {
-        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION, compressionStash));
+        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
     }
 
     /**
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
index 82a433300..ac3388795 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
@@ -1,8 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
+import java.util.Set;
 import java.util.TreeSet;
 
 /**
@@ -10,7 +12,7 @@ import java.util.TreeSet;
  * @author carneiro, depristo
  * @version 3.0
  */
-public class SingleSampleCompressor implements Compressor {
+public class SingleSampleCompressor {
     final private int contextSize;
     final private int downsampleCoverage;
     final private int minMappingQuality;
@@ -20,11 +22,11 @@ public class SingleSampleCompressor implements Compressor {
     final private ReduceReads.DownsampleStrategy downsampleStrategy;
     final private int nContigs;
     final private boolean allowPolyploidReduction;
-    final CompressionStash compressionStash;
 
     private SlidingWindow slidingWindow;
     private int slidingWindowCounter;
 
+    public static Pair<Set<GATKSAMRecord>, CompressionStash> emptyPair = new Pair<Set<GATKSAMRecord>,CompressionStash>(new TreeSet<GATKSAMRecord>(), new CompressionStash());
 
     public SingleSampleCompressor(final int contextSize,
                                   final int downsampleCoverage,
@@ -34,8 +36,7 @@ public class SingleSampleCompressor implements Compressor {
                                   final int minBaseQual,
                                   final ReduceReads.DownsampleStrategy downsampleStrategy,
                                   final int nContigs,
-                                  final boolean allowPolyploidReduction,
-                                  final CompressionStash compressionStash) {
+                                  final boolean allowPolyploidReduction) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
         this.minMappingQuality = minMappingQuality;
@@ -46,15 +47,11 @@ public class SingleSampleCompressor implements Compressor {
         this.downsampleStrategy = downsampleStrategy;
         this.nContigs = nContigs;
         this.allowPolyploidReduction = allowPolyploidReduction;
-        this.compressionStash = compressionStash;
     }
 
-    /**
-     * @{inheritDoc}
-     */
-    @Override
-    public Iterable<GATKSAMRecord> addAlignment( GATKSAMRecord read ) {
-        TreeSet<GATKSAMRecord> result = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+    public Pair<Set<GATKSAMRecord>, CompressionStash> addAlignment( GATKSAMRecord read ) {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        CompressionStash stash = new CompressionStash();
         int readOriginalStart = read.getUnclippedStart();
 
         // create a new window if:
@@ -63,22 +60,27 @@ public class SingleSampleCompressor implements Compressor {
               (readOriginalStart - contextSize > slidingWindow.getStopLocation()))) {  // this read is too far away from the end of the current sliding window
 
             // close the current sliding window
-            result.addAll(slidingWindow.close());
+            Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = slidingWindow.close();
+            reads = readsAndStash.getFirst();
+            stash = readsAndStash.getSecond();
             slidingWindow = null;                                                      // so we create a new one on the next if
         }
 
         if ( slidingWindow == null) {                                                  // this is the first read
-            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction, compressionStash);
+            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
             slidingWindowCounter++;
         }
 
-        result.addAll(slidingWindow.addRead(read));
-        return result;
+        stash.addAll(slidingWindow.addRead(read));
+        return new Pair<Set<GATKSAMRecord>, CompressionStash>(reads, stash);
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> close() {
-        return (slidingWindow != null) ? slidingWindow.close() : new TreeSet<GATKSAMRecord>();
+    public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
+        return (slidingWindow != null) ? slidingWindow.close() : emptyPair;
+    }
+
+    public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
+        return slidingWindow.closeVariantRegions(regions);
     }
 
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
index 24cacd997..24a3ba3cb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@@ -6,8 +6,10 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -55,7 +57,8 @@ public class SlidingWindow {
     private final int nContigs;
 
     private boolean allowPolyploidReductionInGeneral;
-    private CompressionStash compressionStash;
+
+    private static CompressionStash emptyRegions = new CompressionStash();
 
     /**
      * The types of synthetic reads to use in the finalizeAndAdd method
@@ -87,7 +90,7 @@ public class SlidingWindow {
     }
 
 
-    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction, CompressionStash compressionStash) {
+    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
 
@@ -124,7 +127,6 @@ public class SlidingWindow {
         this.nContigs = nContigs;
 
         this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
-        this.compressionStash = compressionStash;
     }
 
     /**
@@ -138,7 +140,7 @@ public class SlidingWindow {
      * @param read the read
      * @return a list of reads that have been finished by sliding the window.
      */
-    public List<GATKSAMRecord> addRead(GATKSAMRecord read) {
+    public CompressionStash addRead(GATKSAMRecord read) {
         addToHeader(windowHeader, read);                                                                                // update the window header counts
         readsInWindow.add(read);                                                                                        // add read to sliding reads
         return slideWindow(read.getUnclippedStart());
@@ -152,8 +154,9 @@ public class SlidingWindow {
      * @param variantSite  boolean array with true marking variant regions
      * @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
      */
-    private SimpleGenomeLoc getNextVariantRegion(int from, int to, boolean[] variantSite) {
+    private SimpleGenomeLoc findNextVariantRegion(int from, int to, boolean[] variantSite, boolean forceClose) {
         boolean foundStart = false;
+        final int windowHeaderStart = getStartLocation(windowHeader);
         int variantRegionStartIndex = 0;
         for (int i=from; i<to; i++) {
             if (variantSite[i] && !foundStart) {
@@ -161,10 +164,12 @@ public class SlidingWindow {
                 foundStart = true;
             }
             else if(!variantSite[i] && foundStart) {
-                return(new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, i-1, true));
+                return(new SimpleGenomeLoc(contig, contigIndex, windowHeaderStart + variantRegionStartIndex, windowHeaderStart + i - 1, true));
             }
         }
-        return (foundStart) ? new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, to-1, false) : null;
+        final int refStart = windowHeaderStart + variantRegionStartIndex;
+        final int refStop  = windowHeaderStart + to - 1;
+        return (foundStart && forceClose) ? new SimpleGenomeLoc(contig, contigIndex, refStart, refStop, true) : null;
     }
 
     /**
@@ -173,19 +178,20 @@ public class SlidingWindow {
      * @param from         beginning window header index of the search window (inclusive)
      * @param to           end window header index of the search window (exclusive)
      * @param variantSite  boolean array with true marking variant regions
-     * @return a list with start/stops of variant regions following getNextVariantRegion description
+     * @return a list with start/stops of variant regions following findNextVariantRegion description
      */
-    private CompressionStash getVariantRegionsFromThisSample(int from, int to, boolean[] variantSite) {
+    private CompressionStash findVariantRegions(int from, int to, boolean[] variantSite, boolean forceClose) {
         CompressionStash regions = new CompressionStash();
         int index = from;
         while(index < to) {
-            SimpleGenomeLoc result = getNextVariantRegion(index, to, variantSite);
+            SimpleGenomeLoc result = findNextVariantRegion(index, to, variantSite, forceClose);
             if (result == null)
                 break;
 
             regions.add(result);
-            if (result.getStop() < 0)
+            if (!result.isFinished())
                 break;
+
             index = result.getStop() + 1;
         }
         return regions;
@@ -201,25 +207,25 @@ public class SlidingWindow {
      * @param incomingReadUnclippedStart the incoming read's start position. Must be the unclipped start!
      * @return all reads that have fallen to the left of the sliding window after the slide
      */
-    protected List<GATKSAMRecord> slideWindow(final int incomingReadUnclippedStart) {
-        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
-
+    protected CompressionStash slideWindow(final int incomingReadUnclippedStart) {
         final int windowHeaderStartLocation = getStartLocation(windowHeader);
+        CompressionStash regions = emptyRegions;
+        boolean forceClose = true;
 
         if (incomingReadUnclippedStart - contextSize > windowHeaderStartLocation) {
             markSites(incomingReadUnclippedStart);
             int readStartHeaderIndex = incomingReadUnclippedStart - windowHeaderStartLocation;
             int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0);                                       // this is the limit of what we can close/send to consensus (non-inclusive)
 
-            CompressionStash regions = getVariantRegionsFromThisSample(0, breakpoint, markedSites.getVariantSiteBitSet());
-            finalizedReads = closeVariantRegions(regions, false);
-
-            while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
-                readsInWindow.pollFirst();
-            }
+            regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
         }
 
-        return finalizedReads;
+        // todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
+        while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
+                readsInWindow.pollFirst();
+        }
+
+        return regions;
     }
 
 
@@ -623,31 +629,27 @@ public class SlidingWindow {
         result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
         result.addAll(finalizeAndAdd(ConsensusType.BOTH));
 
-        return result;                                      // finalized reads will be downsampled if necessary
+        return result; // finalized reads will be downsampled if necessary
     }
 
-
-    private List<GATKSAMRecord> closeVariantRegions(CompressionStash regions, boolean forceClose) {
-        List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
+    public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
+        TreeSet<GATKSAMRecord> allReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
         if (!regions.isEmpty()) {
             int lastStop = -1;
+            int windowHeaderStart = getStartLocation(windowHeader);
+
             for (SimpleGenomeLoc region : regions) {
-                int start = region.getStart();
-                int stop = region.getStop();
+                if (region.isFinished() && region.getContig() == contig && region.getStart() >= windowHeaderStart && region.getStop() <= windowHeaderStart + windowHeader.size()) {
+                    int start = region.getStart() - windowHeaderStart;
+                    int stop = region.getStop() - windowHeaderStart;
 
-                if (!region.isFinished()) {
-                    if(forceClose)                          // region is unfinished but we're forcing the close of this window
-                        stop = windowHeader.size() - 1;
-                    else
-                        continue;                           // region is unfinished and we're not forcing the close of this window
+                    allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1)); // todo -- add condition here dependent on dbSNP track
+                    lastStop = stop;
                 }
-
-                allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
-                lastStop = stop;
             }
 
-            for (int i = 0; i < lastStop; i++)              // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
-                windowHeader.remove();                      // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
+            for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
+                windowHeader.remove();
         }
         return allReads;
     }
@@ -681,23 +683,24 @@ public class SlidingWindow {
      *
      * @return All reads generated
      */
-    public List<GATKSAMRecord> close() {
+    public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
         // mark variant regions
-        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
+        Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        CompressionStash regions = new CompressionStash();
+        boolean forceCloseUnfinishedRegions = true;
 
         if (!windowHeader.isEmpty()) {
             markSites(getStopLocation(windowHeader) + 1);
-            CompressionStash regions = getVariantRegionsFromThisSample(0, windowHeader.size(), markedSites.getVariantSiteBitSet());
-            finalizedReads = closeVariantRegions(regions, true);
+            regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), forceCloseUnfinishedRegions);
+            finalizedReads = closeVariantRegions(regions);
 
             if (!windowHeader.isEmpty()) {
                 finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), false));
                 finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH));                                              // if it ended in running consensus, finish it up
             }
-
         }
 
-        return finalizedReads;
+        return new Pair<Set<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
     }
 
     /**
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 50500536f..1e539dc9d 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -26,23 +26,23 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
 
     @Test(enabled = true)
     public void testDefaultCompression() {
-        RRTest("testDefaultCompression ", L, "46ea88e32bae3072f5cd68a0db4b55f1");
+        RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da");
     }
 
     @Test(enabled = true)
     public void testMultipleIntervals() {
         String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
-        RRTest("testMultipleIntervals ", intervals, "c3784a0b42f5456b705f9b152a4b697a");
+        RRTest("testMultipleIntervals ", intervals, "c5dcdf4edf368b5b897d66f76034d9f0");
     }
 
     @Test(enabled = true)
     public void testHighCompression() {
-        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e385eb0ae5768f8507671d5303a212d5");
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "27cb99e87eda5e46187e56f50dd37f26");
     }
 
     @Test(enabled = true)
     public void testLowCompression() {
-        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "6b5546be9363e493b9838542f5dc8cae");
+        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "4e7f111688d49973c35669855b7a2eaf");
     }
 
     @Test(enabled = true)
@@ -83,7 +83,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     @Test(enabled = true)
     public void testCoReduction() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
-        executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("")));
+        executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
     }
 
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
index 45e105751..51d8aad63 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
@@ -1,6 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.SortedSet;
 
 /**
  * GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
@@ -27,4 +31,43 @@ public class SimpleGenomeLoc extends GenomeLoc {
     public boolean isFinished() {
         return finished;
     }
+
+    @Requires("a != null && b != null")
+    public static SimpleGenomeLoc merge(SimpleGenomeLoc a, SimpleGenomeLoc b) throws ReviewedStingException {
+        if(GenomeLoc.isUnmapped(a) || GenomeLoc.isUnmapped(b)) {
+            throw new ReviewedStingException("Tried to merge unmapped genome locs");
+        }
+
+        if (!(a.contiguousP(b))) {
+            throw new ReviewedStingException("The two genome locs need to be contiguous");
+        }
+
+
+        return new SimpleGenomeLoc(a.getContig(), a.contigIndex,
+                Math.min(a.getStart(), b.getStart()),
+                Math.max(a.getStop(), b.getStop()),
+                a.isFinished());
+    }
+
+    /**
+     * Merges a list of *sorted* *contiguous* locs into one
+     *
+     * @param sortedLocs a sorted list of contiguous locs
+     * @return one merged loc
+     */
+    public static SimpleGenomeLoc merge(SortedSet<SimpleGenomeLoc> sortedLocs) {
+        SimpleGenomeLoc previousLoc = null;
+        for (SimpleGenomeLoc loc : sortedLocs) {
+            if (loc.isUnmapped()) {
+                throw new ReviewedStingException("Tried to merge unmapped genome locs");
+            }
+            if (previousLoc != null && !previousLoc.contiguousP(loc)) {
+                throw new ReviewedStingException("The genome locs need to be contiguous");
+            }
+            previousLoc = loc;
+        }
+        SimpleGenomeLoc firstLoc = sortedLocs.first();
+        SimpleGenomeLoc lastLoc = sortedLocs.last();
+        return merge(firstLoc, lastLoc);
+    }
 }

From b07106b3a74505d827a1041870d51616d4a6a24f Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 6 Nov 2012 14:39:58 -0800
Subject: [PATCH 057/236] Reimplement the allele biased downsampling to be
 smarter.  Now we don't blindly pull n% of reads off of each allele.  Instead,
 we try all possible genotype conformations for the contaminating sample and
 choose the one that provides the best genotype for the target sample (based
 heuristically on allele balance).  This method allows us to save some of the
 reads that belong to the target sample, which should make Daniel M happy. 
 Added unit tests to test the biased downsampling functionality.

---
 .../AlleleBiasedDownsamplingUtils.java        | 114 ++++++++++++++++--
 ...AlleleBiasedDownsamplingUtilsUnitTest.java | 108 +++++++++++++++++
 2 files changed, 209 insertions(+), 13 deletions(-)
 create mode 100755 protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
index 59357e1c4..1a7b4da51 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
@@ -67,7 +67,6 @@ public class AlleleBiasedDownsamplingUtils {
                 alleleStratifiedElements[baseIndex].add(pe);
         }
 
-        // Down-sample *each* allele by the contamination fraction applied to the entire pileup.
         // Unfortunately, we need to maintain the original pileup ordering of reads or FragmentUtils will complain later.
         int numReadsToRemove = (int)(pileup.getNumberOfElements() * downsamplingFraction); // floor
         final TreeSet<PileupElement> elementsToKeep = new TreeSet<PileupElement>(new Comparator<PileupElement>() {
@@ -78,12 +77,21 @@ public class AlleleBiasedDownsamplingUtils {
             }
         });
 
+        // make a listing of allele counts
+        final int[] alleleCounts = new int[4];
+        for ( int i = 0; i < 4; i++ )
+            alleleCounts[i] = alleleStratifiedElements[i].size();
+
+        // do smart down-sampling
+        final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
+
         for ( int i = 0; i < 4; i++ ) {
             final ArrayList<PileupElement> alleleList = alleleStratifiedElements[i];
-            if ( alleleList.size() <= numReadsToRemove )
-                logAllElements(alleleList, log);
+            // if we don't need to remove any reads, keep them all
+            if ( alleleList.size() <= targetAlleleCounts[i] )
+                elementsToKeep.addAll(alleleList);
             else
-                elementsToKeep.addAll(downsampleElements(alleleList, numReadsToRemove, log));
+                elementsToKeep.addAll(downsampleElements(alleleList, alleleList.size() - targetAlleleCounts[i], log));
         }
 
         // clean up pointers so memory can be garbage collected if needed
@@ -93,6 +101,59 @@ public class AlleleBiasedDownsamplingUtils {
         return new ReadBackedPileupImpl(pileup.getLocation(), new ArrayList<PileupElement>(elementsToKeep));
     }
 
+    private static int scoreAlleleCounts(final int[] alleleCounts) {
+        final int maxIndex = MathUtils.maxElementIndex(alleleCounts);
+        final int maxCount = alleleCounts[maxIndex];
+
+        int nonMaxCount = 0;
+        for ( int i = 0; i < 4; i++ ) {
+            if ( i != maxIndex )
+                nonMaxCount += alleleCounts[i];
+        }
+
+        // try to get the best score: in the het case the counts should be equal and in the hom case the non-max should be zero
+        return Math.min(Math.abs(maxCount - nonMaxCount), Math.abs(nonMaxCount));
+    }
+
+    /**
+     * Computes an allele biased version of the given pileup
+     *
+     * @param alleleCounts              the original pileup
+     * @param numReadsToRemove          fraction of total reads to remove per allele
+     * @return allele biased pileup
+     */
+    protected static int[] runSmartDownsampling(final int[] alleleCounts, final int numReadsToRemove) {
+        final int numAlleles = alleleCounts.length;
+
+        int maxScore = scoreAlleleCounts(alleleCounts);
+        int[] alleleCountsOfMax = alleleCounts;
+
+        final int numReadsToRemovePerAllele = numReadsToRemove / 2;
+
+        for ( int i = 0; i < numAlleles; i++ ) {
+            for ( int j = i; j < numAlleles; j++ ) {
+                final int[] newCounts = alleleCounts.clone();
+
+                // split these cases so we don't lose on the floor (since we divided by 2)
+                if ( i == j ) {
+                    newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemove);
+                } else {
+                    newCounts[i] = Math.max(0, newCounts[i] - numReadsToRemovePerAllele);
+                    newCounts[j] = Math.max(0, newCounts[j] - numReadsToRemovePerAllele);
+                }
+
+                final int score = scoreAlleleCounts(newCounts);
+
+                if ( score < maxScore ) {
+                    maxScore = score;
+                    alleleCountsOfMax = newCounts;
+                }
+            }
+        }
+
+        return alleleCountsOfMax;
+    }
+
     /**
      * Performs allele biased down-sampling on a pileup and computes the list of elements to keep
      *
@@ -102,7 +163,15 @@ public class AlleleBiasedDownsamplingUtils {
      * @return the list of pileup elements TO KEEP
      */
     private static List<PileupElement> downsampleElements(final ArrayList<PileupElement> elements, final int numElementsToRemove, final PrintStream log) {
+        if ( numElementsToRemove == 0 )
+            return elements;
+
         final int pileupSize = elements.size();
+        if ( numElementsToRemove == pileupSize ) {
+            logAllElements(elements, log);
+            return new ArrayList<PileupElement>(0);
+        }
+
         final BitSet itemsToRemove = new BitSet(pileupSize);
         for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
             itemsToRemove.set(selectedIndex);
@@ -132,15 +201,25 @@ public class AlleleBiasedDownsamplingUtils {
         for ( final List<GATKSAMRecord> reads : alleleReadMap.values() )
             totalReads += reads.size();
 
-        // Down-sample *each* allele by the contamination fraction applied to the entire pileup.
         int numReadsToRemove = (int)(totalReads * downsamplingFraction);
-        final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove * alleleReadMap.size());
-        for ( final List<GATKSAMRecord> reads : alleleReadMap.values() ) {
-            if ( reads.size() <= numReadsToRemove ) {
-                readsToRemove.addAll(reads);
-                logAllReads(reads, log);
-            } else {
-                readsToRemove.addAll(downsampleReads(reads, numReadsToRemove, log));
+
+        // make a listing of allele counts
+        final List<Allele> alleles = new ArrayList<Allele>(alleleReadMap.keySet());
+        alleles.remove(Allele.NO_CALL);    // ignore the no-call bin
+        final int numAlleles = alleles.size();
+        final int[] alleleCounts = new int[numAlleles];
+        for ( int i = 0; i < numAlleles; i++ )
+            alleleCounts[i] = alleleReadMap.get(alleles.get(i)).size();
+
+        // do smart down-sampling
+        final int[] targetAlleleCounts = runSmartDownsampling(alleleCounts, numReadsToRemove);
+
+        final List<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numReadsToRemove);
+        for ( int i = 0; i < numAlleles; i++ ) {
+            final List<GATKSAMRecord> alleleBin = alleleReadMap.get(alleles.get(i));
+
+            if ( alleleBin.size() > targetAlleleCounts[i] ) {
+                readsToRemove.addAll(downsampleReads(alleleBin, alleleBin.size() - targetAlleleCounts[i], log));
             }
         }
 
@@ -156,13 +235,22 @@ public class AlleleBiasedDownsamplingUtils {
      * @return the list of pileup elements TO REMOVE
      */
     private static List<GATKSAMRecord> downsampleReads(final List<GATKSAMRecord> reads, final int numElementsToRemove, final PrintStream log) {
+        final ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(numElementsToRemove);
+
+        if ( numElementsToRemove == 0 )
+            return readsToRemove;
+
         final int pileupSize = reads.size();
+        if ( numElementsToRemove == pileupSize ) {
+            logAllReads(reads, log);
+            return reads;
+        }
+
         final BitSet itemsToRemove = new BitSet(pileupSize);
         for ( Integer selectedIndex : MathUtils.sampleIndicesWithoutReplacement(pileupSize, numElementsToRemove) ) {
             itemsToRemove.set(selectedIndex);
         }
 
-        ArrayList<GATKSAMRecord> readsToRemove = new ArrayList<GATKSAMRecord>(pileupSize - numElementsToRemove);
         for ( int i = 0; i < pileupSize; i++ ) {
             if ( itemsToRemove.get(i) ) {
                 final GATKSAMRecord read = reads.get(i);
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java
new file mode 100755
index 000000000..7c2f5619a
--- /dev/null
+++ b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+/**
+ * Basic unit test for AlleleBiasedDownsamplingUtils
+ */
+public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
+
+
+    @Test
+    public void testSmartDownsampling() {
+
+        final int[] idealHetAlleleCounts = new int[]{0, 50, 0, 50};
+        final int[] idealHomAlleleCounts = new int[]{0, 100, 0, 0};
+
+        // no contamination, no removal
+        testOneCase(0, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+
+        // hom sample, het contaminant, different alleles
+        testOneCase(5, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+        testOneCase(0, 0, 5, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+        testOneCase(0, 0, 0, 5, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+
+        // hom sample, hom contaminant, different alleles
+        testOneCase(10, 0, 0, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+        testOneCase(0, 0, 10, 0, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+        testOneCase(0, 0, 0, 10, 0.1, 100, idealHomAlleleCounts, idealHomAlleleCounts);
+
+        // het sample, het contaminant, different alleles
+        testOneCase(5, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 0, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+
+        // het sample, hom contaminant, different alleles
+        testOneCase(10, 0, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 0, 10, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+
+        // hom sample, het contaminant, overlapping alleles
+        final int[] enhancedHomAlleleCounts = new int[]{0, 105, 0, 0};
+        testOneCase(5, 5, 0, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
+        testOneCase(0, 5, 5, 0, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
+        testOneCase(0, 5, 0, 5, 0.1, 100, idealHomAlleleCounts, enhancedHomAlleleCounts);
+
+        // hom sample, hom contaminant, overlapping alleles
+        testOneCase(0, 10, 0, 0, 0.1, 100, idealHomAlleleCounts, new int[]{0, 110, 0, 0});
+
+        // het sample, het contaminant, overlapping alleles
+        testOneCase(5, 5, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 5, 5, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 5, 0, 5, 0.1, 100, idealHetAlleleCounts, new int[]{0, 55, 0, 55});
+        testOneCase(5, 0, 0, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 0, 5, 5, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+
+        // het sample, hom contaminant, overlapping alleles
+        testOneCase(0, 10, 0, 0, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+        testOneCase(0, 0, 0, 10, 0.1, 100, idealHetAlleleCounts, idealHetAlleleCounts);
+    }
+
+    private static void testOneCase(final int addA, final int addC, final int addG, final int addT, final double contaminationFraction,
+                                    final int pileupSize, final int[] initialCounts, final int[] targetCounts) {
+
+        final int[] actualCounts = initialCounts.clone();
+        actualCounts[0] += addA;
+        actualCounts[1] += addC;
+        actualCounts[2] += addG;
+        actualCounts[3] += addT;
+
+        final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction));
+        Assert.assertTrue(countsAreEqual(actualCounts, targetCounts));
+    }
+
+    private static boolean countsAreEqual(final int[] counts1, final int[] counts2) {
+        for ( int i = 0; i < 4; i++ ) {
+            if ( counts1[i] != counts2[i] )
+                return false;
+        }
+        return true;
+    }
+}

From 0a2dded09395f7f09f157f86791ad92433403d47 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 6 Nov 2012 16:07:40 -0800
Subject: [PATCH 058/236] Fixes for bugs uncovered by unit tests

---
 .../AlleleBiasedDownsamplingUtils.java        | 25 ++++++++++++-------
 ...AlleleBiasedDownsamplingUtilsUnitTest.java |  2 +-
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
index 1a7b4da51..a61614481 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
@@ -102,17 +102,24 @@ public class AlleleBiasedDownsamplingUtils {
     }
 
     private static int scoreAlleleCounts(final int[] alleleCounts) {
-        final int maxIndex = MathUtils.maxElementIndex(alleleCounts);
-        final int maxCount = alleleCounts[maxIndex];
+        if ( alleleCounts.length < 2 )
+            return 0;
 
-        int nonMaxCount = 0;
-        for ( int i = 0; i < 4; i++ ) {
-            if ( i != maxIndex )
-                nonMaxCount += alleleCounts[i];
-        }
+        // sort the counts (in ascending order)
+        final int[] alleleCountsCopy = alleleCounts.clone();
+        Arrays.sort(alleleCountsCopy);
 
-        // try to get the best score: in the het case the counts should be equal and in the hom case the non-max should be zero
-        return Math.min(Math.abs(maxCount - nonMaxCount), Math.abs(nonMaxCount));
+        final int maxCount = alleleCountsCopy[alleleCounts.length - 1];
+        final int nextBestCount = alleleCountsCopy[alleleCounts.length - 2];
+
+        int remainderCount = 0;
+        for ( int i = 0; i < alleleCounts.length - 2; i++ )
+            remainderCount += alleleCountsCopy[i];
+
+        // try to get the best score:
+        //    - in the het case the counts should be equal with nothing else
+        //    - in the hom case the non-max should be zero
+        return Math.min(maxCount - nextBestCount + remainderCount, Math.abs(nextBestCount + remainderCount));
     }
 
     /**
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java
index 7c2f5619a..be19d3ef4 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtilsUnitTest.java
@@ -95,7 +95,7 @@ public class AlleleBiasedDownsamplingUtilsUnitTest extends BaseTest {
         actualCounts[3] += addT;
 
         final int[] results = AlleleBiasedDownsamplingUtils.runSmartDownsampling(actualCounts, (int)(pileupSize * contaminationFraction));
-        Assert.assertTrue(countsAreEqual(actualCounts, targetCounts));
+        Assert.assertTrue(countsAreEqual(results, targetCounts));
     }
 
     private static boolean countsAreEqual(final int[] counts1, final int[] counts2) {

From 2da76db9452c50277e1269c3fa4d8248e97dcd8a Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 6 Nov 2012 22:23:05 -0800
Subject: [PATCH 059/236] Updating integration tests

---
 .../UnifiedGenotyperIntegrationTest.java      | 68 +++++++++----------
 .../HaplotypeCallerIntegrationTest.java       | 18 ++---
 2 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 9212d0e53..d3e77e002 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -30,7 +30,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultiSamplePilot1() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
-                Arrays.asList("cdec335abc9ad8e59335e39a73e0e95a"));
+                Arrays.asList("847605f4efafef89529fe0e496315edd"));
         executeTest("test MultiSample Pilot1", spec);
     }
 
@@ -38,7 +38,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testWithAllelesPassedIn1() {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("efddb5e258f97fd4f6661cff9eaa57de"));
+                Arrays.asList("5b31b811072a4df04524e13604015f9b"));
         executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
     }
 
@@ -46,7 +46,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testWithAllelesPassedIn2() {
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
-                Arrays.asList("24532eb381724cd74e99370da28d49ed"));
+                Arrays.asList("d9992e55381afb43742cc9b30fcd7538"));
         executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
     }
 
@@ -54,7 +54,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testSingleSamplePilot2() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
-                Arrays.asList("062a946160eec1d0fc135d58ca654ff4"));
+                Arrays.asList("fea530fdc8677e10be4cc11625fa5376"));
         executeTest("test SingleSample Pilot2", spec);
     }
 
@@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultipleSNPAlleles() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
-                Arrays.asList("94dc17d76d841f1d3a36160767ffa034"));
+                Arrays.asList("704888987baacff8c7b273b8ab9938d0"));
         executeTest("test Multiple SNP alleles", spec);
     }
 
@@ -78,7 +78,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testReverseTrim() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
-                Arrays.asList("9106d01ca0d0a8fedd068e72d509f380"));
+                Arrays.asList("e14c9b1f9f34d6c16de445bfa385be89"));
         executeTest("test reverse trim", spec);
     }
 
@@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMismatchedPLs() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
-                Arrays.asList("d847acf841ba8ba653f996ce4869f439"));
+                Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
         executeTest("test mismatched PLs", spec);
     }
 
@@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     //
     // --------------------------------------------------------------------------------------------------------------
 
-    private final static String COMPRESSED_OUTPUT_MD5 = "6792419c482e767a3deb28913ed2b1ad";
+    private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d";
 
     @Test
     public void testCompressedOutput() {
@@ -149,7 +149,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMinBaseQualityScore() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
-                Arrays.asList("56157d930da6ccd224bce1ca93f11e41"));
+                Arrays.asList("6ee6537e9ebc1bfc7c6cf8f04b1582ff"));
         executeTest("test min_base_quality_score 26", spec);
     }
 
@@ -157,7 +157,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testSLOD() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b36KGReference + " --computeSLOD --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("6ccb9bd88934e4272d0ce362dd35e603"));
+                Arrays.asList("55760482335497086458b09e415ecf54"));
         executeTest("test SLOD", spec);
     }
 
@@ -165,7 +165,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testNDA() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("480437dd6e2760f4ab3194431519f331"));
+                Arrays.asList("938e888a40182878be4c3cc4859adb69"));
         executeTest("test NDA", spec);
     }
 
@@ -173,7 +173,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testCompTrack() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
-                Arrays.asList("22c039412fd387dde6125b07c9a74a25"));
+                Arrays.asList("7dc186d420487e4e156a24ec8dea0951"));
         executeTest("test using comp track", spec);
     }
 
@@ -187,17 +187,17 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
 
     @Test
     public void testOutputParameterSitesOnly() {
-        testOutputParameters("-sites_only", "40aeb4c9e31fe7046b72afc58e7599cb");
+        testOutputParameters("-sites_only", "f99c7471127a6fb6f72e136bc873b2c9");
     }
 
     @Test
     public void testOutputParameterAllConfident() {
-        testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "c706ca93b25ff83613cb4e95dcac567c");
+        testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "9dbc9389db39cf9697e93e0bf529314f");
     }
 
     @Test
     public void testOutputParameterAllSites() {
-        testOutputParameters("--output_mode EMIT_ALL_SITES", "8a263fd0a94463ce1de9990f2b8ec841");
+        testOutputParameters("--output_mode EMIT_ALL_SITES", "81fff490c0f59890f1e75dc290833434");
     }
 
     private void testOutputParameters(final String args, final String md5) {
@@ -211,7 +211,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testConfidence() {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
-                Arrays.asList("df524e98903d96ab9353bee7c16a69de"));
+                Arrays.asList("4af83a883ecc03a23b0aa6dd4b8f1ceb"));
         executeTest("test confidence 1", spec1);
     }
 
@@ -222,12 +222,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     // --------------------------------------------------------------------------------------------------------------
     @Test
     public void testHeterozyosity1() {
-        testHeterozosity( 0.01, "8e61498ca03a8d805372a64c466b3b42" );
+        testHeterozosity( 0.01, "8dd37249e0a80afa86594c3f1e720760" );
     }
 
     @Test
     public void testHeterozyosity2() {
-        testHeterozosity( 1.0 / 1850, "668d06b5173cf3b97d052726988e1d7b" );
+        testHeterozosity( 1.0 / 1850, "040d169e20fda56f8de009a6015eb384" );
     }
 
     private void testHeterozosity(final double arg, final String md5) {
@@ -251,7 +251,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -o %s" +
                         " -L 1:10,000,000-10,100,000",
                 1,
-                Arrays.asList("908eb5e21fa39e7fb377cf4a9c4c7835"));
+                Arrays.asList("0e4713e4aa44f4f8fcfea7138295a627"));
 
         executeTest(String.format("test multiple technologies"), spec);
     }
@@ -270,7 +270,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -L 1:10,000,000-10,100,000" +
                         " -baq CALCULATE_AS_NECESSARY",
                 1,
-                Arrays.asList("c814558bb0ed2e19b12e1a2bf4465d52"));
+                Arrays.asList("46ea5d1ceb8eed1d0db63c3577915d6c"));
 
         executeTest(String.format("test calling with BAQ"), spec);
     }
@@ -289,7 +289,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -o %s" +
                         " -L 1:10,000,000-10,500,000",
                 1,
-                Arrays.asList("3593495aab5f6204c65de0b073a6ff65"));
+                Arrays.asList("50329e15e5139be9e3b643f0b3ba8a53"));
 
         executeTest(String.format("test indel caller in SLX"), spec);
     }
@@ -304,7 +304,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                         " -minIndelCnt 1" +
                         " -L 1:10,000,000-10,100,000",
                 1,
-                Arrays.asList("8b486a098029d5a106b0a37eff541c15"));
+                Arrays.asList("2b85e3bd6bf981afaf7324666740d74b"));
 
         executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
     }
@@ -317,7 +317,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                          " -o %s" +
                          " -L 1:10,000,000-10,500,000",
                  1,
-                 Arrays.asList("18efedc50cae2aacaba372265e38310b"));
+                 Arrays.asList("a6fd46eff78827060451a62cffd698a7"));
 
          executeTest(String.format("test indel calling, multiple technologies"), spec);
      }
@@ -327,7 +327,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("3ff8c7c80a518aa3eb8671a21479de5f"));
+                Arrays.asList("b8129bf754490cc3c76191d8cc4ec93f"));
         executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
     }
 
@@ -337,7 +337,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
                 baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
                         + privateTestDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
                         "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
-                Arrays.asList("578c0540f4f2052a634a829bcb9cc27d"));
+                Arrays.asList("591332fa0b5b22778cf820ee257049d2"));
         executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
     }
 
@@ -345,13 +345,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultiSampleIndels1() {
         WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("f7d0d0aee603df25c1f0525bb8df189e"));
+                Arrays.asList("a4761d7f25e7a62f34494801c98a0da7"));
         List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
 
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
                         "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("fc91d457a16b4ca994959c2b5f3f0352"));
+                Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
         executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
     }
 
@@ -407,7 +407,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMinIndelFraction0() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 assessMinIndelFraction + " -minIndelFrac 0.0", 1,
-                Arrays.asList("857b8e5df444463ac27f665c4f67fbe2"));
+                Arrays.asList("90adefd39ed67865b0cb275ad0f07383"));
         executeTest("test minIndelFraction 0.0", spec);
     }
 
@@ -415,7 +415,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMinIndelFraction25() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 assessMinIndelFraction + " -minIndelFrac 0.25", 1,
-                Arrays.asList("81d4c7d9010fd6733b2997bc378e7471"));
+                Arrays.asList("2fded43949e258f8e9f68893c61c1bdd"));
         executeTest("test minIndelFraction 0.25", spec);
     }
 
@@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testNsInCigar() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
-                Arrays.asList("bd7984a374f0ae5d277bd5fc5065f64f"));
+                Arrays.asList("d6d40bacd540a41f305420dfea35e04a"));
         executeTest("test calling on reads with Ns in CIGAR", spec);
     }
 
@@ -451,18 +451,18 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testReducedBam() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("9a7cd58b9e3d5b72608c0d529321deba"));
+                Arrays.asList("c1077662411164182c5f75478344f83d"));
         executeTest("test calling on a ReducedRead BAM", spec);
     }
 
     @Test
     public void testReducedBamSNPs() {
-        testReducedCalling("SNP", "e7fc11baf208a1bca7b462d3148c936e");
+        testReducedCalling("SNP", "f5ccbc96d0d66832dd9b3c5cb6507db4");
     }
 
     @Test
     public void testReducedBamINDELs() {
-        testReducedCalling("INDEL", "132a4e0ccf9230b5bb4b56c649e2bdd5");
+        testReducedCalling("INDEL", "3c02ee5187933bed44dc416a2e28511f");
     }
 
 
@@ -483,7 +483,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testContaminationDownsampling() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --contamination_fraction_to_filter 0.20", 1,
-                Arrays.asList("27dd04159e06d9524fb8a4eef41f96ae"));
+                Arrays.asList("1f9071466fc40f4c6a0f58ac8e9135fb"));
         executeTest("test contamination_percentage_to_filter 0.20", spec);
     }
 
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index d00f5b61d..6828dbcb5 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -21,17 +21,17 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "aa1df35d6e64d7ca93feb4d2dd15dd0e");
+        HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
     }
 
     @Test
     public void testHaplotypeCallerSingleSample() {
-        HCTest(NA12878_BAM, "", "186c7f322978283c01249c6de2829215");
+        HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
     }
 
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "de9e78a52207fe62144dba5337965469");
+        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
     }
 
     private void HCTestComplexVariants(String bam, String args, String md5) {
@@ -42,7 +42,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "000dbb1b48f94d017cfec127c6cabe8f");
+        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
     }
 
     private void HCTestSymbolicVariants(String bam, String args, String md5) {
@@ -53,7 +53,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "d86fae2d1b504b422b7b0cfbbdecc2c4");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
     }
 
     private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -64,20 +64,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "b369c2a6cb5c99a424551b33bae16f3b");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
     }
 
     @Test
     public void HCTestProblematicReadsModifiedInActiveRegions() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("f6326adfdf5bc147626b30a89ce06d56"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
         executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
     }
 
     @Test
     public void HCTestStructuralIndels() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("b6c67ee8e99cc8f53a6587bb26028047"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
         executeTest("HCTestStructuralIndels: ", spec);
     }
 
@@ -91,7 +91,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     public void HCTestReducedBam() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("4beb9f87ab3f316a9384c3d0dca6ebe9"));
+                Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
         executeTest("HC calling on a ReducedRead BAM", spec);
     }
 }

From 17ab3a39d55ce389c45f24c81349263e8cd4dad7 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 8 Nov 2012 14:35:23 -0500
Subject: [PATCH 061/236] Make the --intermediate_csv_file argument un-hidden.

---
 .../gatk/walkers/bqsr/RecalibrationArgumentCollection.java     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index fc7d8a8a4..e5704a1e2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -75,8 +75,9 @@ public class RecalibrationArgumentCollection {
 
     /**
      * If not provided, then a temporary file is created and then deleted upon completion.
+     * For advanced users only.
      */
-    @Hidden
+    @Advanced
     @Argument(fullName = "intermediate_csv_file", shortName = "intermediate", doc = "The intermediate csv file to create", required = false)
     public File RECAL_CSV_FILE = null;
 

From e9183d9fe0ada286ed91bced4feb1e004e10ad56 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 8 Nov 2012 15:07:47 -0500
Subject: [PATCH 062/236] Fix bugs as reported on the forum: BED needs to be
 explicitly set as the default output format and the output didn't actually
 adhere to the BED spec.

---
 .../sting/gatk/walkers/coverage/CallableLoci.java         | 4 ++--
 .../coverage/CallableLociWalkerIntegrationTest.java       | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
index 58ddd0879..48019efea 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CallableLoci.java
@@ -191,7 +191,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
      */
     @Advanced
     @Argument(fullName = "format", shortName = "format", doc = "Output format", required = false)
-    OutputFormat outputFormat;
+    OutputFormat outputFormat = OutputFormat.BED;
 
     public enum OutputFormat {
         /**
@@ -297,7 +297,7 @@ public class CallableLoci extends LocusWalker<CallableLoci.CallableBaseState, Ca
         }
 
         public String toString() {
-            return String.format("%s %d %d %s", loc.getContig(), loc.getStart(), loc.getStop(), state);
+            return String.format("%s\t%d\t%d\t%s", loc.getContig(), loc.getStart()-1, loc.getStop(), state);
         }
     }
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
index c9e91e664..ec69a893c 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/coverage/CallableLociWalkerIntegrationTest.java
@@ -38,7 +38,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
     public void testCallableLociWalkerBed() {
         String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -summary %s";
         WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
-                Arrays.asList("9e4ec9c23f21a8162d27a39ab057398c", SUMMARY_MD5));
+                Arrays.asList("42e86c06c167246a28bffdacaca75ffb", SUMMARY_MD5));
         executeTest("formatBed", spec);
     }
 
@@ -46,7 +46,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
     public void testCallableLociWalkerPerBase() {
         String gatk_args = commonArgs + " -format STATE_PER_BASE -L 1:10,000,000-11,000,000 -summary %s";
         WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
-                Arrays.asList("e6044b4495ef24f542403e6a94437068", SUMMARY_MD5));
+                Arrays.asList("d66c525d9c70f62df8156261d3e535ad", SUMMARY_MD5));
         executeTest("format_state_per_base", spec);
     }
     
@@ -54,7 +54,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
     public void testCallableLociWalker2() {
         String gatk_args = commonArgs + " -format BED -L 1:10,000,000-10,000,100 -L 1:10,000,110-10,000,120 -summary %s";
         WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
-                Arrays.asList("c671f65712d9575b8b3e1f1dbedc146e", "d287510eac04acf5a56f5cde2cba0e4a"));
+                Arrays.asList("330f476085533db92a9dbdb3a127c041", "d287510eac04acf5a56f5cde2cba0e4a"));
         executeTest("formatBed by interval", spec);
     }
 
@@ -62,7 +62,7 @@ public class CallableLociWalkerIntegrationTest extends WalkerTest {
     public void testCallableLociWalker3() {
         String gatk_args = commonArgs + " -format BED -L 1:10,000,000-11,000,000 -minDepth 10 -maxDepth 100 --minBaseQuality 10 --minMappingQuality 20 -summary %s";
         WalkerTestSpec spec = new WalkerTestSpec(gatk_args, 2,
-                Arrays.asList("b7d26a470ef906590249f2fa45fd6bdd", "da431d393f7c2b2b3e27556b86c1dbc7"));
+                Arrays.asList("46a53379aaaf9803276a0a34b234f6ab", "da431d393f7c2b2b3e27556b86c1dbc7"));
         executeTest("formatBed lots of arguments", spec);
     }
 }

From e93d46191004f554bdd75b42d07b098241b9715e Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Fri, 9 Nov 2012 09:11:04 -0500
Subject: [PATCH 063/236] Adding integration test to BQSR for the csv file

---
 .../gatk/walkers/bqsr/BQSRIntegrationTest.java    | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
index b839382dc..f6ec47760 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@@ -90,6 +90,21 @@ public class BQSRIntegrationTest extends WalkerTest {
         executeTest("testBQSRFailWithoutDBSNP", spec);
     }
 
+    @Test
+    public void testBQSRCSV() {
+        WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
+                " -T BaseRecalibrator" +
+                        " -R " + b36KGReference +
+                        " -I " + validationDataLocation + "NA12892.SLX.SRP000031.2009_06.selected.bam" +
+                        " -knownSites " + b36dbSNP129 +
+                        " -L 1:10,000,000-10,200,000" +
+                        " -o /dev/null" +
+                        " --plot_pdf_file /dev/null" +
+                        " --intermediate_csv_file %s",
+                Arrays.asList("d1c38a3418979400630e2bca1140689c"));
+        executeTest("testBQSR-CSVfile", spec);
+    }
+
     @Test
     public void testBQSRFailWithSolidNoCall() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(

From 95a4ba57bf1d13ee48402231495c19a0eed35546 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 13 Nov 2012 01:18:37 -0500
Subject: [PATCH 066/236] Implementation of BySampleSAMFileWriter ReduceReads
 now works with the n-way-out capability, splitting by sample. DEV-27 #resolve
 #time 3m

---
 .../compression/reducereads/ReduceReads.java  |  37 +-
 .../utils/sam/BySampleSAMFileWriter.java      |  70 ++++
 .../sting/utils/sam/NWaySAMFileWriter.java    | 374 +++++++++---------
 3 files changed, 290 insertions(+), 191 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index a05992cb4..3712e4524 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -25,6 +25,9 @@
 
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMProgramRecord;
 import net.sf.samtools.util.SequenceUtil;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
@@ -45,6 +48,7 @@ import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.sam.BySampleSAMFileWriter;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 
@@ -86,7 +90,8 @@ import java.util.*;
 public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
 
     @Output
-    private StingSAMFileWriter out;
+    private StingSAMFileWriter out = null;
+    private SAMFileWriter writerToUse = null;
 
     /**
      * The number of bases to keep around mismatches (potential variation)
@@ -196,6 +201,10 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
     @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false)
     private int nContigs = 2;
 
+    @Hidden
+    @Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
+    private boolean nwayout = false;
+
     @Hidden
     @Argument(fullName = "", shortName = "dl", doc = "", required = false)
     private int debugLevel = 0;
@@ -227,6 +236,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
     SortedSet<GenomeLoc> intervalList;
     
     private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads";   // The name that will go in the @PG tag
+    private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam";
 
     /**
      * Basic generic initialization of the readNameHash and the intervalList. Output initialization
@@ -242,10 +252,24 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         if (toolkit.getIntervals() != null)
             intervalList.addAll(toolkit.getIntervals());
 
-        if (!NO_PG_TAG)
-            Utils.setupWriter(out, toolkit, false, true, this, PROGRAM_RECORD_NAME);
-        else
+
+        // todo -- rework the whole NO_PG_TAG thing
+        final boolean preSorted = true;
+        final boolean indexOnTheFly = true;
+        final boolean generateMD5 = true;
+        final boolean keep_records = true;
+        final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
+        if (nwayout) {
+            SAMProgramRecord programRecord = NO_PG_TAG ? null : Utils.createProgramRecord(toolkit, this, PROGRAM_RECORD_NAME);
+            writerToUse = new BySampleSAMFileWriter(toolkit, PROGRAM_FILENAME_EXTENSION, sortOrder, preSorted, indexOnTheFly, NO_PG_TAG, programRecord, true);
+        }
+        else {
+            writerToUse = out;
             out.setPresorted(false);
+            if (!NO_PG_TAG) {
+                Utils.setupWriter(out, toolkit, !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
+            }
+        }
     }
 
     /**
@@ -386,6 +410,9 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         // output any remaining reads in the compressor
         for (GATKSAMRecord read : stash.close())
             outputRead(read);
+
+        if (nwayout)
+            writerToUse.close();
     }
 
     /**
@@ -554,7 +581,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         if (!DONT_COMPRESS_READ_NAMES)
             compressReadName(read);
 
-        out.addAlignment(read);
+        writerToUse.addAlignment(read);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java
new file mode 100644
index 000000000..6bad58d9f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: carneiro
+ * Date: Nov 13
+ */
+public class BySampleSAMFileWriter extends NWaySAMFileWriter{
+
+    private final Map<String, SAMReaderID> sampleToWriterMap;
+
+    public BySampleSAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        super(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, pRecord, keep_records);
+
+        sampleToWriterMap = new HashMap<String, SAMReaderID>(toolkit.getSAMFileHeader().getReadGroups().size() * 2);
+
+        for (SAMReaderID readerID : toolkit.getReadsDataSource().getReaderIDs()) {
+            for (SAMReadGroupRecord rg : toolkit.getReadsDataSource().getHeader(readerID).getReadGroups()) {
+                String sample = rg.getSample();
+                if (sampleToWriterMap.containsKey(sample) && sampleToWriterMap.get(sample) != readerID) {
+                    throw new ReviewedStingException("The same sample appears in multiple files, this input cannot be multiplexed using the BySampleSAMFileWriter, try NWaySAMFileWriter instead.");
+                }
+                else {
+                    sampleToWriterMap.put(sample, readerID);
+                }
+            }
+        }
+    }
+
+    @Override
+    public void addAlignment(SAMRecord samRecord) {
+        super.addAlignment(samRecord, sampleToWriterMap.get(samRecord.getReadGroup().getSample()));
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
index fa07523f3..83d1c99bf 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
@@ -1,186 +1,188 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.sam;
-
-import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.text.TextFormattingUtils;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: May 31, 2011
- * Time: 3:52:49 PM
- * To change this template use File | Settings | File Templates.
- */
-public class NWaySAMFileWriter implements SAMFileWriter {
-
-    private Map<SAMReaderID,SAMFileWriter> writerMap = null;
-    private boolean presorted ;
-    GenomeAnalysisEngine toolkit;
-    boolean KEEP_ALL_PG_RECORDS = false;
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                             boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
-        this.presorted = presorted;
-        this.toolkit = toolkit;
-        this.KEEP_ALL_PG_RECORDS = keep_records;
-        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
-        this.presorted = presorted;
-        this.toolkit = toolkit;
-        this.KEEP_ALL_PG_RECORDS = keep_records;
-        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                             boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
-        this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
-        this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
-    }
-
-    /**
-     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
-     * from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
-     * onto a unique output file name.
-     * @param toolkit
-     * @param in2out
-     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
-        if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
-        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
-
-            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
-
-            String outName;
-            if ( ! in2out.containsKey(fName) )
-                    throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
-            outName = in2out.get(fName);
-
-            if ( writerMap.containsKey( rid ) )
-                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
-                        "map file likely contains multiple entries for this input file");
-
-            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
-        }
-
-    }
-
-    /**
-     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
-     * from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
-     * input file name and adding ext instead (e.g. ".cleaned.bam").
-     * onto a unique output file name.
-     * @param toolkit
-     * @param ext
-     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
-        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
-
-            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
-
-            String outName;
-            int pos ;
-            if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
-            else {
-                if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
-                else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
-            }
-            String prefix = fName.substring(0,pos);
-            outName = prefix+ext;
-
-            if ( writerMap.containsKey( rid ) )
-                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
-            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
-        }
-
-    }
-
-    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
-                           boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
-        File f = new File(outName);
-        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
-        header.setSortOrder(order);
-
-        if ( programRecord != null )  {
-            // --->> add program record
-            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
-            for ( SAMProgramRecord record : oldRecords ) {
-                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
-                    newRecords.add(record);
-            }
-            newRecords.add(programRecord);
-            header.setProgramRecords(newRecords);
-            // <-- add program record ends here
-        }
-        SAMFileWriterFactory factory = new SAMFileWriterFactory();
-        factory.setCreateIndex(indexOnTheFly);
-        factory.setCreateMd5File(generateMD5);
-        SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
-        writerMap.put(id,sw);
-    }
-
-    public Collection<SAMFileWriter> getWriters() {
-        return writerMap.values();
-    }
-
-    public void addAlignment(SAMRecord samRecord) {
-        final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
-        String rg = samRecord.getStringAttribute("RG");
-        if ( rg != null ) {
-            String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
-            samRecord.setAttribute("RG",rg_orig);
-        }
-        writerMap.get(id).addAlignment(samRecord);
-    }
-
-    public SAMFileHeader getFileHeader() {
-        return toolkit.getSAMFileHeader();
-    }
-
-    public void close() {
-        for ( SAMFileWriter w : writerMap.values() ) w.close();
-    }
-}
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.*;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: asivache
+ * Date: May 31, 2011
+ * Time: 3:52:49 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class NWaySAMFileWriter implements SAMFileWriter {
+
+    private Map<SAMReaderID,SAMFileWriter> writerMap = null;
+    private boolean presorted ;
+    GenomeAnalysisEngine toolkit;
+    boolean KEEP_ALL_PG_RECORDS = false;
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                             boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        this.presorted = presorted;
+        this.toolkit = toolkit;
+        this.KEEP_ALL_PG_RECORDS = keep_records;
+        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
+        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        this.presorted = presorted;
+        this.toolkit = toolkit;
+        this.KEEP_ALL_PG_RECORDS = keep_records;
+        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
+        setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                             boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
+        this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
+        this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
+    }
+
+    /**
+     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
+     * from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
+     * onto a unique output file name.
+     * @param toolkit
+     * @param in2out
+     */
+    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
+        if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
+        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
+
+            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
+
+            String outName;
+            if ( ! in2out.containsKey(fName) )
+                    throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
+            outName = in2out.get(fName);
+
+            if ( writerMap.containsKey( rid ) )
+                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
+                        "map file likely contains multiple entries for this input file");
+
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
+        }
+
+    }
+
+    /**
+     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
+     * from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
+     * input file name and adding ext instead (e.g. ".cleaned.bam").
+     * onto a unique output file name.
+     * @param toolkit
+     * @param ext
+     */
+    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
+        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
+
+            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
+
+            String outName;
+            int pos ;
+            if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
+            else {
+                if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
+                else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
+            }
+            String prefix = fName.substring(0,pos);
+            outName = prefix+ext;
+
+            if ( writerMap.containsKey( rid ) )
+                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
+        }
+
+    }
+
+    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
+                           boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
+        File f = new File(outName);
+        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
+        header.setSortOrder(order);
+
+        if ( programRecord != null )  {
+            // --->> add program record
+            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
+            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
+            for ( SAMProgramRecord record : oldRecords ) {
+                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
+                    newRecords.add(record);
+            }
+            newRecords.add(programRecord);
+            header.setProgramRecords(newRecords);
+            // <-- add program record ends here
+        }
+        SAMFileWriterFactory factory = new SAMFileWriterFactory();
+        factory.setCreateIndex(indexOnTheFly);
+        factory.setCreateMd5File(generateMD5);
+        SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
+        writerMap.put(id,sw);
+    }
+
+    public Collection<SAMFileWriter> getWriters() {
+        return writerMap.values();
+    }
+
+    public void addAlignment(SAMRecord samRecord) {
+        final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
+        String rg = samRecord.getStringAttribute("RG");
+        if ( rg != null ) {
+            String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
+            samRecord.setAttribute("RG",rg_orig);
+        }
+        addAlignment(samRecord, id);
+    }
+
+    public void addAlignment(SAMRecord samRecord, SAMReaderID readerID) {
+        writerMap.get(readerID).addAlignment(samRecord);
+    }
+
+    public SAMFileHeader getFileHeader() {
+        return toolkit.getSAMFileHeader();
+    }
+
+    public void close() {
+        for ( SAMFileWriter w : writerMap.values() ) w.close();
+    }
+}

From cab8ba7c7528b212012fe8955df2fbaab75b7c8b Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 13 Nov 2012 15:21:57 -0500
Subject: [PATCH 067/236] Breaking the utility to write @PG tags for
 SAMFileWriters and StingSAMFileWriters

---
 .../compression/reducereads/ReduceReads.java  |  3 +-
 .../org/broadinstitute/sting/utils/Utils.java | 59 +++++++++++++++++--
 .../sting/utils/sam/NWaySAMFileWriter.java    | 21 ++-----
 3 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 3712e4524..3cdf3d75e 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -256,7 +256,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         // todo -- rework the whole NO_PG_TAG thing
         final boolean preSorted = true;
         final boolean indexOnTheFly = true;
-        final boolean generateMD5 = true;
         final boolean keep_records = true;
         final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
         if (nwayout) {
@@ -267,7 +266,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
             writerToUse = out;
             out.setPresorted(false);
             if (!NO_PG_TAG) {
-                Utils.setupWriter(out, toolkit, !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
+                Utils.setupWriter(out, toolkit, toolkit.getSAMFileHeader(), !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index f4a200af0..b780d0966 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -687,23 +687,69 @@ public class Utils {
             array[i] = value;
     }
 
-    public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
-        final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
-
-        SAMFileHeader header = toolkit.getSAMFileHeader();
+    /**
+     * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
+     * up the writer with the header and presorted status.
+     *
+     * @param toolkit             the engine
+     * @param originalHeader      original header
+     * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+     * @param programRecord       the program record for this program
+     */
+    public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, SAMProgramRecord programRecord) {
+        SAMFileHeader header = originalHeader.clone();
         List<SAMProgramRecord> oldRecords = header.getProgramRecords();
         List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
         for ( SAMProgramRecord record : oldRecords )
-            if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS )
+            if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
                 newRecords.add(record);
 
         newRecords.add(programRecord);
         header.setProgramRecords(newRecords);
+        return header;
+    }
 
+    /**
+    * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and returns
+    * the new header to be added to the BAM writer.
+    *
+    * @param toolkit             the engine
+    * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+    * @param walker              the walker object (so we can extract the command line)
+    * @param PROGRAM_RECORD_NAME the name for the PG tag
+    * @return a pre-filled header for the bam writer
+    */
+    public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
+        final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
+        return setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, programRecord);
+    }
+
+    /**
+     * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
+     * up the writer with the header and presorted status.
+     *
+     * @param writer              BAM file writer
+     * @param toolkit             the engine
+     * @param preSorted           whether or not the writer can assume reads are going to be added are already sorted
+     * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+     * @param walker              the walker object (so we can extract the command line)
+     * @param PROGRAM_RECORD_NAME the name for the PG tag
+     */
+    public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
+        SAMFileHeader header = setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, walker, PROGRAM_RECORD_NAME);
         writer.writeHeader(header);
         writer.setPresorted(preSorted);
     }
-    
+
+
+    /**
+     * Creates a program record (@PG) tag
+     *
+     * @param toolkit             the engine
+     * @param walker              the walker object (so we can extract the command line)
+     * @param PROGRAM_RECORD_NAME the name for the PG tag
+     * @return a program record for the tool
+     */
     public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) {
         final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
         final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
@@ -858,4 +904,5 @@ public class Utils {
         }
         return subLists;
     }
+
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
index 83d1c99bf..cdf70884c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
@@ -28,11 +28,14 @@ package org.broadinstitute.sting.utils.sam;
 import net.sf.samtools.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 
 import java.io.File;
-import java.util.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Created by IntelliJ IDEA.
@@ -138,21 +141,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
     private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
                            boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
         File f = new File(outName);
-        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
-        header.setSortOrder(order);
-
-        if ( programRecord != null )  {
-            // --->> add program record
-            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
-            for ( SAMProgramRecord record : oldRecords ) {
-                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
-                    newRecords.add(record);
-            }
-            newRecords.add(programRecord);
-            header.setProgramRecords(newRecords);
-            // <-- add program record ends here
-        }
+        SAMFileHeader header = Utils.setupWriter(toolkit, toolkit.getSAMFileHeader(id), KEEP_ALL_PG_RECORDS, programRecord);
         SAMFileWriterFactory factory = new SAMFileWriterFactory();
         factory.setCreateIndex(indexOnTheFly);
         factory.setCreateMd5File(generateMD5);

From 6d59dd34558fb6cc564eabab9b6c5a278a341a26 Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Sun, 4 Nov 2012 23:42:02 -0500
Subject: [PATCH 069/236] Scala classes were only returning direct subclasses
 (confirmed when inspected in debugger) so changed PluginManager to allow
 specifying the explicit subclass. Removed some generics from PluginManager
 for now until able to figure out syntax for requesting explicit subclass.
 QStatusMessenger uses a slightly more primitive Map[String, Seq[RemoteFile]]
 instead of Map[ArgumentSource, Seq[RemoteFile]]. Added an
 QCommandPlugin.initScript utility method for handling specialized script
 types.

---
 .../org/broadinstitute/sting/gatk/WalkerManager.java  |  4 ++--
 .../sting/utils/classloader/PluginManager.java        | 11 ++++++-----
 .../org/broadinstitute/sting/queue/QCommandLine.scala | 11 ++++++++---
 .../broadinstitute/sting/queue/QCommandPlugin.scala   |  2 ++
 .../src/org/broadinstitute/sting/queue/QScript.scala  |  8 ++++++--
 .../sting/queue/engine/QStatusMessenger.scala         |  3 +--
 6 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
index fbacbddc4..28b5f918d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@@ -350,11 +350,11 @@ public class WalkerManager extends PluginManager<Walker> {
      * @return A name for this type of walker.
      */
     @Override
-    public String getName(Class<? extends Walker> walkerType) {
+    public String getName(Class walkerType) {
         String walkerName = "";
 
         if (walkerType.getAnnotation(WalkerName.class) != null)
-            walkerName = walkerType.getAnnotation(WalkerName.class).value().trim();
+            walkerName = ((WalkerName)walkerType.getAnnotation(WalkerName.class)).value().trim();
         else
             walkerName = super.getName(walkerType);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
index 43cc800d8..b39aae8ab 100644
--- a/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
+++ b/public/java/src/org/broadinstitute/sting/utils/classloader/PluginManager.java
@@ -101,7 +101,7 @@ public class PluginManager<PluginType> {
      * Create a new plugin manager.
      * @param pluginType Core type for a plugin.
      */
-    public PluginManager(Class<PluginType> pluginType) {
+    public PluginManager(Class pluginType) {
         this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), null);
     }
 
@@ -110,7 +110,7 @@ public class PluginManager<PluginType> {
      * @param pluginType Core type for a plugin.
      * @param classpath Custom class path to search for classes.
      */
-    public PluginManager(Class<PluginType> pluginType, List<URL> classpath) {
+    public PluginManager(Class pluginType, List<URL> classpath) {
         this(pluginType, pluginType.getSimpleName().toLowerCase(), pluginType.getSimpleName(), classpath);
     }
 
@@ -120,7 +120,7 @@ public class PluginManager<PluginType> {
      * @param pluginCategory Provides a category name to the plugin.  Must not be null.
      * @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name.  Can be null.
      */
-    public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix) {
+    public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix) {
         this(pluginType, pluginCategory, pluginSuffix, null);
     }
 
@@ -131,7 +131,7 @@ public class PluginManager<PluginType> {
      * @param pluginSuffix Provides a suffix that will be trimmed off when converting to a plugin name.  Can be null.
      * @param classpath Custom class path to search for classes.
      */
-    public PluginManager(Class<PluginType> pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
+    public PluginManager(Class pluginType, String pluginCategory, String pluginSuffix, List<URL> classpath) {
         this.pluginCategory = pluginCategory;
         this.pluginSuffix = pluginSuffix;
 
@@ -149,6 +149,7 @@ public class PluginManager<PluginType> {
         }
 
         // Load all classes types filtering them by concrete.
+        @SuppressWarnings("unchecked")
         Set<Class<? extends PluginType>> allTypes = reflections.getSubTypesOf(pluginType);
         for( Class<? extends PluginType> type: allTypes ) {
             // The plugin manager does not support anonymous classes; to be a plugin, a class must have a name.
@@ -325,7 +326,7 @@ public class PluginManager<PluginType> {
      * @param pluginType The type of plugin.
      * @return A name for this type of plugin.
      */
-    public String getName(Class<? extends PluginType> pluginType) {
+    public String getName(Class pluginType) {
         String pluginName = "";
 
         if (pluginName.length() == 0) {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 65abaf7be..637174557 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -92,13 +92,19 @@ class QCommandLine extends CommandLineProgram with Logging {
   private lazy val qScriptPluginManager = {
     qScriptClasses = IOUtils.tempDir("Q-Classes-", "", settings.qSettings.tempDirectory)
     qScriptManager.loadScripts(scripts, qScriptClasses)
-    new PluginManager[QScript](classOf[QScript], Seq(qScriptClasses.toURI.toURL))
+    new PluginManager[QScript](qPluginType, Seq(qScriptClasses.toURI.toURL))
   }
 
   private lazy val qCommandPlugin = {
     new PluginManager[QCommandPlugin](classOf[QCommandPlugin])
   }
 
+  private lazy val allCommandPlugins = qCommandPlugin.createAllTypes()
+
+  private lazy val qPluginType: Class[_ <: QScript] = {
+    allCommandPlugins.map(_.qScriptClass).headOption.getOrElse(classOf[QScript])
+  }
+
   /**
    * Takes the QScripts passed in, runs their script() methods, retrieves their generated
    * functions, and then builds and runs a QGraph based on the dependencies.
@@ -106,8 +112,6 @@ class QCommandLine extends CommandLineProgram with Logging {
   def execute = {
     ClassFieldCache.parsingEngine = this.parser
 
-    val allCommandPlugins = qCommandPlugin.createAllTypes()
-
     if (settings.qSettings.runName == null)
       settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
     if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory))
@@ -138,6 +142,7 @@ class QCommandLine extends CommandLineProgram with Logging {
     for (script <- allQScripts) {
       logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
       loadArgumentsIntoObject(script)
+      allCommandPlugins.foreach(_.initScript(script))
       // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now.
       //if (settings.run)
       script.pullInputs()
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
index 499c31554..eae6a6a92 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandPlugin.scala
@@ -6,4 +6,6 @@ import util.RemoteFileConverter
 trait QCommandPlugin {
   def statusMessenger: QStatusMessenger = null
   def remoteFileConverter: RemoteFileConverter = null
+  def qScriptClass: Class[_ <: QScript] = classOf[QScript]
+  def initScript(script: QScript) {}
 }
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 8c834696c..eb8be183a 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -149,13 +149,17 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
    * List out the remote outputs
    * @return the RemoteFile outputs by argument source
    */
-  def remoteInputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(inputFields)
+  def remoteInputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(inputFields))
 
   /**
    * List out the remote outputs
    * @return the RemoteFile outputs by argument source
    */
-  def remoteOutputs: Map[ArgumentSource, Seq[RemoteFile]] = remoteFieldMap(outputFields)
+  def remoteOutputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(outputFields))
+
+  private def tagMap(remoteFieldMap: Map[ArgumentSource, Seq[RemoteFile]]): Map[String, Seq[RemoteFile]] = {
+    remoteFieldMap.collect{ case (k, v) => ClassFieldCache.fullName(k) -> v }.toMap
+  }
 
   private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = {
     fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
index c4151dafc..a1133b944 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
@@ -1,6 +1,5 @@
 package org.broadinstitute.sting.queue.engine
 
-import org.broadinstitute.sting.commandline.ArgumentSource
 import org.broadinstitute.sting.queue.util.RemoteFile
 
 /**
@@ -8,7 +7,7 @@ import org.broadinstitute.sting.queue.util.RemoteFile
  */
 trait QStatusMessenger {
   def started()
-  def done(inputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]], outputs: Seq[Map[ArgumentSource, Seq[RemoteFile]]])
+  def done(inputs: Seq[Map[String, Seq[RemoteFile]]], outputs: Seq[Map[String, Seq[RemoteFile]]])
   def exit(message: String)
 
   def started(job: String)

From a17cd54b6883427454c6ae0bb379fee9ef7f460b Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 24 Oct 2012 16:57:08 -0400
Subject: [PATCH 073/236] Co-Reduction implementation in ReduceReads

ReduceReads now co-reduces bams if they're passed in toghether with multiple -I. Co-reduction forces every variant region in one sample to be a variant region in all samples.
Also:
  * Added integrationtest for co-reduction
  * Fixed bug with new no-recalculation implementation of the marksites object where the last object wasn't being removed after finalizing a variant region (updated MD5's accordingly)

DEV-200 #resolve #time 8m
---
 .../reducereads/CompressionStash.java         | 38 ++++++++
 .../reducereads/MultiSampleCompressor.java    | 49 ++++++----
 .../compression/reducereads/ReduceReads.java  |  2 +-
 .../reducereads/SingleSampleCompressor.java   | 38 ++++----
 .../reducereads/SlidingWindow.java            | 89 ++++++++++---------
 .../ReduceReadsIntegrationTest.java           | 10 +--
 .../reducereads/SimpleGenomeLoc.java          | 43 +++++++++
 7 files changed, 185 insertions(+), 84 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
index 714a4df18..a6e5b6c5b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompressionStash.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
 import org.broadinstitute.sting.utils.GenomeLocComparator;
 
+import java.util.Collection;
 import java.util.TreeSet;
 
 /**
@@ -18,4 +19,41 @@ public class CompressionStash extends TreeSet<SimpleGenomeLoc> {
     public CompressionStash() {
         super(new GenomeLocComparator());
     }
+
+    /**
+     * Adds a SimpleGenomeLoc to the stash and merges it with any overlapping (and contiguous) existing loc
+     * in the stash.
+     *
+     * @param insertLoc the new loc to be inserted
+     * @return true if the loc, or it's merged version, wasn't present in the list before.
+     */
+    @Override
+    public boolean add(SimpleGenomeLoc insertLoc) {
+        TreeSet<SimpleGenomeLoc> removedLocs = new TreeSet<SimpleGenomeLoc>();
+        for (SimpleGenomeLoc existingLoc : this) {
+            if (existingLoc.isPast(insertLoc)) {
+                break;                                          // if we're past the loc we're done looking for overlaps.
+            }
+            if (existingLoc.equals(insertLoc)) {
+                return false;                                   // if this loc was already present in the stash, we don't need to insert it.
+            }
+            if (existingLoc.contiguousP(insertLoc)) {
+                removedLocs.add(existingLoc);                   // list the original loc for merging
+            }
+        }
+        for (SimpleGenomeLoc loc : removedLocs) {
+            this.remove(loc);                                   // remove all locs that will be merged
+        }
+        removedLocs.add(insertLoc);                             // add the new loc to the list of locs that will be merged
+        return super.add(SimpleGenomeLoc.merge(removedLocs));   // merge them all into one loc and add to the stash
+    }
+
+    @Override
+    public boolean addAll(Collection<? extends SimpleGenomeLoc> locs) {
+        boolean result = false;
+        for (SimpleGenomeLoc loc : locs) {
+            result |= this.add(loc);
+        }
+        return result;
+    }
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
index 2c3439010..f348225ca 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
@@ -3,13 +3,14 @@ package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 import net.sf.samtools.SAMFileHeader;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
 import java.util.HashMap;
 import java.util.Map;
-import java.util.SortedSet;
+import java.util.Set;
 import java.util.TreeSet;
 
 /*
@@ -41,7 +42,7 @@ import java.util.TreeSet;
  *
  * @author depristo
  */
-public class MultiSampleCompressor implements Compressor {
+public class MultiSampleCompressor {
     protected static final Logger logger = Logger.getLogger(MultiSampleCompressor.class);
 
     protected Map<String, SingleSampleCompressor> compressorsPerSample = new HashMap<String, SingleSampleCompressor>();
@@ -55,30 +56,44 @@ public class MultiSampleCompressor implements Compressor {
                                  final int minBaseQual,
                                  final ReduceReads.DownsampleStrategy downsampleStrategy,
                                  final int nContigs,
-                                 final boolean allowPolyploidReduction,
-                                 final CompressionStash compressionStash) {
+                                 final boolean allowPolyploidReduction) {
         for ( String name : SampleUtils.getSAMFileSamples(header) ) {
             compressorsPerSample.put(name,
                     new SingleSampleCompressor(contextSize, downsampleCoverage,
-                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction, compressionStash));
+                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, allowPolyploidReduction));
         }
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
-        String sample = read.getReadGroup().getSample();
-        SingleSampleCompressor compressor = compressorsPerSample.get(sample);
+    public Set<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
+        String sampleName = read.getReadGroup().getSample();
+        SingleSampleCompressor compressor = compressorsPerSample.get(sampleName);
         if ( compressor == null )
-            throw new ReviewedStingException("No compressor for sample " + sample);
-        return compressor.addAlignment(read);
+            throw new ReviewedStingException("No compressor for sample " + sampleName);
+        Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = compressor.addAlignment(read);
+        Set<GATKSAMRecord> reads = readsAndStash.getFirst();
+        CompressionStash regions = readsAndStash.getSecond();
+
+        reads.addAll(closeVariantRegionsInAllSamples(regions));
+
+        return reads;
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> close() {
-        SortedSet<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
-        for ( SingleSampleCompressor comp : compressorsPerSample.values() )
-            for ( GATKSAMRecord read : comp.close() )
-                reads.add(read);
+    public Set<GATKSAMRecord> close() {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        for ( SingleSampleCompressor sample : compressorsPerSample.values() ) {
+            Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = sample.close();
+            reads = readsAndStash.getFirst();
+        }
+        return reads;
+    }
+
+    private Set<GATKSAMRecord> closeVariantRegionsInAllSamples(CompressionStash regions) {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        if (!regions.isEmpty()) {
+            for (SingleSampleCompressor sample : compressorsPerSample.values()) {
+                reads.addAll(sample.closeVariantRegions(regions));
+            }
+        }
         return reads;
     }
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index b6761f4a6..a05992cb4 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -330,7 +330,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
      */
     @Override
     public ReduceReadsStash reduceInit() {
-        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION, compressionStash));
+        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy, nContigs, USE_POLYPLOID_REDUCTION));
     }
 
     /**
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
index 82a433300..ac3388795 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
@@ -1,8 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 
+import java.util.Set;
 import java.util.TreeSet;
 
 /**
@@ -10,7 +12,7 @@ import java.util.TreeSet;
  * @author carneiro, depristo
  * @version 3.0
  */
-public class SingleSampleCompressor implements Compressor {
+public class SingleSampleCompressor {
     final private int contextSize;
     final private int downsampleCoverage;
     final private int minMappingQuality;
@@ -20,11 +22,11 @@ public class SingleSampleCompressor implements Compressor {
     final private ReduceReads.DownsampleStrategy downsampleStrategy;
     final private int nContigs;
     final private boolean allowPolyploidReduction;
-    final CompressionStash compressionStash;
 
     private SlidingWindow slidingWindow;
     private int slidingWindowCounter;
 
+    public static Pair<Set<GATKSAMRecord>, CompressionStash> emptyPair = new Pair<Set<GATKSAMRecord>,CompressionStash>(new TreeSet<GATKSAMRecord>(), new CompressionStash());
 
     public SingleSampleCompressor(final int contextSize,
                                   final int downsampleCoverage,
@@ -34,8 +36,7 @@ public class SingleSampleCompressor implements Compressor {
                                   final int minBaseQual,
                                   final ReduceReads.DownsampleStrategy downsampleStrategy,
                                   final int nContigs,
-                                  final boolean allowPolyploidReduction,
-                                  final CompressionStash compressionStash) {
+                                  final boolean allowPolyploidReduction) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
         this.minMappingQuality = minMappingQuality;
@@ -46,15 +47,11 @@ public class SingleSampleCompressor implements Compressor {
         this.downsampleStrategy = downsampleStrategy;
         this.nContigs = nContigs;
         this.allowPolyploidReduction = allowPolyploidReduction;
-        this.compressionStash = compressionStash;
     }
 
-    /**
-     * @{inheritDoc}
-     */
-    @Override
-    public Iterable<GATKSAMRecord> addAlignment( GATKSAMRecord read ) {
-        TreeSet<GATKSAMRecord> result = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+    public Pair<Set<GATKSAMRecord>, CompressionStash> addAlignment( GATKSAMRecord read ) {
+        Set<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        CompressionStash stash = new CompressionStash();
         int readOriginalStart = read.getUnclippedStart();
 
         // create a new window if:
@@ -63,22 +60,27 @@ public class SingleSampleCompressor implements Compressor {
               (readOriginalStart - contextSize > slidingWindow.getStopLocation()))) {  // this read is too far away from the end of the current sliding window
 
             // close the current sliding window
-            result.addAll(slidingWindow.close());
+            Pair<Set<GATKSAMRecord>, CompressionStash> readsAndStash = slidingWindow.close();
+            reads = readsAndStash.getFirst();
+            stash = readsAndStash.getSecond();
             slidingWindow = null;                                                      // so we create a new one on the next if
         }
 
         if ( slidingWindow == null) {                                                  // this is the first read
-            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction, compressionStash);
+            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities(), nContigs, allowPolyploidReduction);
             slidingWindowCounter++;
         }
 
-        result.addAll(slidingWindow.addRead(read));
-        return result;
+        stash.addAll(slidingWindow.addRead(read));
+        return new Pair<Set<GATKSAMRecord>, CompressionStash>(reads, stash);
     }
 
-    @Override
-    public Iterable<GATKSAMRecord> close() {
-        return (slidingWindow != null) ? slidingWindow.close() : new TreeSet<GATKSAMRecord>();
+    public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
+        return (slidingWindow != null) ? slidingWindow.close() : emptyPair;
+    }
+
+    public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
+        return slidingWindow.closeVariantRegions(regions);
     }
 
 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
index 24cacd997..24a3ba3cb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@@ -6,8 +6,10 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMFileHeader;
 import org.broadinstitute.sting.gatk.downsampling.ReservoirDownsampler;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.recalibration.EventType;
+import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
@@ -55,7 +57,8 @@ public class SlidingWindow {
     private final int nContigs;
 
     private boolean allowPolyploidReductionInGeneral;
-    private CompressionStash compressionStash;
+
+    private static CompressionStash emptyRegions = new CompressionStash();
 
     /**
      * The types of synthetic reads to use in the finalizeAndAdd method
@@ -87,7 +90,7 @@ public class SlidingWindow {
     }
 
 
-    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction, CompressionStash compressionStash) {
+    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader samHeader, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReads.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities, int nContigs, boolean allowPolyploidReduction) {
         this.contextSize = contextSize;
         this.downsampleCoverage = downsampleCoverage;
 
@@ -124,7 +127,6 @@ public class SlidingWindow {
         this.nContigs = nContigs;
 
         this.allowPolyploidReductionInGeneral = allowPolyploidReduction;
-        this.compressionStash = compressionStash;
     }
 
     /**
@@ -138,7 +140,7 @@ public class SlidingWindow {
      * @param read the read
      * @return a list of reads that have been finished by sliding the window.
      */
-    public List<GATKSAMRecord> addRead(GATKSAMRecord read) {
+    public CompressionStash addRead(GATKSAMRecord read) {
         addToHeader(windowHeader, read);                                                                                // update the window header counts
         readsInWindow.add(read);                                                                                        // add read to sliding reads
         return slideWindow(read.getUnclippedStart());
@@ -152,8 +154,9 @@ public class SlidingWindow {
      * @param variantSite  boolean array with true marking variant regions
      * @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
      */
-    private SimpleGenomeLoc getNextVariantRegion(int from, int to, boolean[] variantSite) {
+    private SimpleGenomeLoc findNextVariantRegion(int from, int to, boolean[] variantSite, boolean forceClose) {
         boolean foundStart = false;
+        final int windowHeaderStart = getStartLocation(windowHeader);
         int variantRegionStartIndex = 0;
         for (int i=from; i<to; i++) {
             if (variantSite[i] && !foundStart) {
@@ -161,10 +164,12 @@ public class SlidingWindow {
                 foundStart = true;
             }
             else if(!variantSite[i] && foundStart) {
-                return(new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, i-1, true));
+                return(new SimpleGenomeLoc(contig, contigIndex, windowHeaderStart + variantRegionStartIndex, windowHeaderStart + i - 1, true));
             }
         }
-        return (foundStart) ? new SimpleGenomeLoc(contig, contigIndex, variantRegionStartIndex, to-1, false) : null;
+        final int refStart = windowHeaderStart + variantRegionStartIndex;
+        final int refStop  = windowHeaderStart + to - 1;
+        return (foundStart && forceClose) ? new SimpleGenomeLoc(contig, contigIndex, refStart, refStop, true) : null;
     }
 
     /**
@@ -173,19 +178,20 @@ public class SlidingWindow {
      * @param from         beginning window header index of the search window (inclusive)
      * @param to           end window header index of the search window (exclusive)
      * @param variantSite  boolean array with true marking variant regions
-     * @return a list with start/stops of variant regions following getNextVariantRegion description
+     * @return a list with start/stops of variant regions following findNextVariantRegion description
      */
-    private CompressionStash getVariantRegionsFromThisSample(int from, int to, boolean[] variantSite) {
+    private CompressionStash findVariantRegions(int from, int to, boolean[] variantSite, boolean forceClose) {
         CompressionStash regions = new CompressionStash();
         int index = from;
         while(index < to) {
-            SimpleGenomeLoc result = getNextVariantRegion(index, to, variantSite);
+            SimpleGenomeLoc result = findNextVariantRegion(index, to, variantSite, forceClose);
             if (result == null)
                 break;
 
             regions.add(result);
-            if (result.getStop() < 0)
+            if (!result.isFinished())
                 break;
+
             index = result.getStop() + 1;
         }
         return regions;
@@ -201,25 +207,25 @@ public class SlidingWindow {
      * @param incomingReadUnclippedStart the incoming read's start position. Must be the unclipped start!
      * @return all reads that have fallen to the left of the sliding window after the slide
      */
-    protected List<GATKSAMRecord> slideWindow(final int incomingReadUnclippedStart) {
-        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
-
+    protected CompressionStash slideWindow(final int incomingReadUnclippedStart) {
         final int windowHeaderStartLocation = getStartLocation(windowHeader);
+        CompressionStash regions = emptyRegions;
+        boolean forceClose = true;
 
         if (incomingReadUnclippedStart - contextSize > windowHeaderStartLocation) {
             markSites(incomingReadUnclippedStart);
             int readStartHeaderIndex = incomingReadUnclippedStart - windowHeaderStartLocation;
             int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0);                                       // this is the limit of what we can close/send to consensus (non-inclusive)
 
-            CompressionStash regions = getVariantRegionsFromThisSample(0, breakpoint, markedSites.getVariantSiteBitSet());
-            finalizedReads = closeVariantRegions(regions, false);
-
-            while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
-                readsInWindow.pollFirst();
-            }
+            regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
         }
 
-        return finalizedReads;
+        // todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
+        while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
+                readsInWindow.pollFirst();
+        }
+
+        return regions;
     }
 
 
@@ -623,31 +629,27 @@ public class SlidingWindow {
         result.addAll(addToSyntheticReads(windowHeader, 0, stop, false));
         result.addAll(finalizeAndAdd(ConsensusType.BOTH));
 
-        return result;                                      // finalized reads will be downsampled if necessary
+        return result; // finalized reads will be downsampled if necessary
     }
 
-
-    private List<GATKSAMRecord> closeVariantRegions(CompressionStash regions, boolean forceClose) {
-        List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
+    public Set<GATKSAMRecord> closeVariantRegions(CompressionStash regions) {
+        TreeSet<GATKSAMRecord> allReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
         if (!regions.isEmpty()) {
             int lastStop = -1;
+            int windowHeaderStart = getStartLocation(windowHeader);
+
             for (SimpleGenomeLoc region : regions) {
-                int start = region.getStart();
-                int stop = region.getStop();
+                if (region.isFinished() && region.getContig() == contig && region.getStart() >= windowHeaderStart && region.getStop() <= windowHeaderStart + windowHeader.size()) {
+                    int start = region.getStart() - windowHeaderStart;
+                    int stop = region.getStop() - windowHeaderStart;
 
-                if (!region.isFinished()) {
-                    if(forceClose)                          // region is unfinished but we're forcing the close of this window
-                        stop = windowHeader.size() - 1;
-                    else
-                        continue;                           // region is unfinished and we're not forcing the close of this window
+                    allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1)); // todo -- add condition here dependent on dbSNP track
+                    lastStop = stop;
                 }
-
-                allReads.addAll(closeVariantRegion(start, stop, regions.size() > 1));
-                lastStop = stop;
             }
 
-            for (int i = 0; i < lastStop; i++)              // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
-                windowHeader.remove();                      // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
+            for (int i = 0; i <= lastStop; i++) // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
+                windowHeader.remove();
         }
         return allReads;
     }
@@ -681,23 +683,24 @@ public class SlidingWindow {
      *
      * @return All reads generated
      */
-    public List<GATKSAMRecord> close() {
+    public Pair<Set<GATKSAMRecord>, CompressionStash> close() {
         // mark variant regions
-        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
+        Set<GATKSAMRecord> finalizedReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        CompressionStash regions = new CompressionStash();
+        boolean forceCloseUnfinishedRegions = true;
 
         if (!windowHeader.isEmpty()) {
             markSites(getStopLocation(windowHeader) + 1);
-            CompressionStash regions = getVariantRegionsFromThisSample(0, windowHeader.size(), markedSites.getVariantSiteBitSet());
-            finalizedReads = closeVariantRegions(regions, true);
+            regions = findVariantRegions(0, windowHeader.size(), markedSites.getVariantSiteBitSet(), forceCloseUnfinishedRegions);
+            finalizedReads = closeVariantRegions(regions);
 
             if (!windowHeader.isEmpty()) {
                 finalizedReads.addAll(addToSyntheticReads(windowHeader, 0, windowHeader.size(), false));
                 finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH));                                              // if it ended in running consensus, finish it up
             }
-
         }
 
-        return finalizedReads;
+        return new Pair<Set<GATKSAMRecord>, CompressionStash>(finalizedReads, regions);
     }
 
     /**
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 50500536f..1e539dc9d 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -26,23 +26,23 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
 
     @Test(enabled = true)
     public void testDefaultCompression() {
-        RRTest("testDefaultCompression ", L, "46ea88e32bae3072f5cd68a0db4b55f1");
+        RRTest("testDefaultCompression ", L, "98080d3c53f441564796fc143cf510da");
     }
 
     @Test(enabled = true)
     public void testMultipleIntervals() {
         String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
-        RRTest("testMultipleIntervals ", intervals, "c3784a0b42f5456b705f9b152a4b697a");
+        RRTest("testMultipleIntervals ", intervals, "c5dcdf4edf368b5b897d66f76034d9f0");
     }
 
     @Test(enabled = true)
     public void testHighCompression() {
-        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e385eb0ae5768f8507671d5303a212d5");
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "27cb99e87eda5e46187e56f50dd37f26");
     }
 
     @Test(enabled = true)
     public void testLowCompression() {
-        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "6b5546be9363e493b9838542f5dc8cae");
+        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "4e7f111688d49973c35669855b7a2eaf");
     }
 
     @Test(enabled = true)
@@ -83,7 +83,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     @Test(enabled = true)
     public void testCoReduction() {
         String base = String.format("-T ReduceReads %s -npt -R %s -I %s -I %s", COREDUCTION_L, REF, COREDUCTION_BAM_A, COREDUCTION_BAM_B) + " -o %s ";
-        executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("")));
+        executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
     }
 
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
index 45e105751..51d8aad63 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SimpleGenomeLoc.java
@@ -1,6 +1,10 @@
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.SortedSet;
 
 /**
  * GenomeLocs are very useful objects to keep track of genomic locations and perform set operations
@@ -27,4 +31,43 @@ public class SimpleGenomeLoc extends GenomeLoc {
     public boolean isFinished() {
         return finished;
     }
+
+    @Requires("a != null && b != null")
+    public static SimpleGenomeLoc merge(SimpleGenomeLoc a, SimpleGenomeLoc b) throws ReviewedStingException {
+        if(GenomeLoc.isUnmapped(a) || GenomeLoc.isUnmapped(b)) {
+            throw new ReviewedStingException("Tried to merge unmapped genome locs");
+        }
+
+        if (!(a.contiguousP(b))) {
+            throw new ReviewedStingException("The two genome locs need to be contiguous");
+        }
+
+
+        return new SimpleGenomeLoc(a.getContig(), a.contigIndex,
+                Math.min(a.getStart(), b.getStart()),
+                Math.max(a.getStop(), b.getStop()),
+                a.isFinished());
+    }
+
+    /**
+     * Merges a list of *sorted* *contiguous* locs into one
+     *
+     * @param sortedLocs a sorted list of contiguous locs
+     * @return one merged loc
+     */
+    public static SimpleGenomeLoc merge(SortedSet<SimpleGenomeLoc> sortedLocs) {
+        SimpleGenomeLoc previousLoc = null;
+        for (SimpleGenomeLoc loc : sortedLocs) {
+            if (loc.isUnmapped()) {
+                throw new ReviewedStingException("Tried to merge unmapped genome locs");
+            }
+            if (previousLoc != null && !previousLoc.contiguousP(loc)) {
+                throw new ReviewedStingException("The genome locs need to be contiguous");
+            }
+            previousLoc = loc;
+        }
+        SimpleGenomeLoc firstLoc = sortedLocs.first();
+        SimpleGenomeLoc lastLoc = sortedLocs.last();
+        return merge(firstLoc, lastLoc);
+    }
 }

From dba31018f46052984ab3b779d18950e5d1aee501 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 13 Nov 2012 01:18:37 -0500
Subject: [PATCH 077/236] Implementation of BySampleSAMFileWriter ReduceReads
 now works with the n-way-out capability, splitting by sample. DEV-27 #resolve
 #time 3m

---
 .../compression/reducereads/ReduceReads.java  |  37 +-
 .../utils/sam/BySampleSAMFileWriter.java      |  70 ++++
 .../sting/utils/sam/NWaySAMFileWriter.java    | 374 +++++++++---------
 3 files changed, 290 insertions(+), 191 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index a05992cb4..3712e4524 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -25,6 +25,9 @@
 
 package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
 
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileWriter;
+import net.sf.samtools.SAMProgramRecord;
 import net.sf.samtools.util.SequenceUtil;
 import org.broadinstitute.sting.commandline.Argument;
 import org.broadinstitute.sting.commandline.Hidden;
@@ -45,6 +48,7 @@ import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
+import org.broadinstitute.sting.utils.sam.BySampleSAMFileWriter;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.sam.ReadUtils;
 
@@ -86,7 +90,8 @@ import java.util.*;
 public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
 
     @Output
-    private StingSAMFileWriter out;
+    private StingSAMFileWriter out = null;
+    private SAMFileWriter writerToUse = null;
 
     /**
      * The number of bases to keep around mismatches (potential variation)
@@ -196,6 +201,10 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
     @Argument(fullName = "contigs", shortName = "ctg", doc = "", required = false)
     private int nContigs = 2;
 
+    @Hidden
+    @Argument(fullName = "nwayout", shortName = "nw", doc = "", required = false)
+    private boolean nwayout = false;
+
     @Hidden
     @Argument(fullName = "", shortName = "dl", doc = "", required = false)
     private int debugLevel = 0;
@@ -227,6 +236,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
     SortedSet<GenomeLoc> intervalList;
     
     private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads";   // The name that will go in the @PG tag
+    private static final String PROGRAM_FILENAME_EXTENSION = ".reduced.bam";
 
     /**
      * Basic generic initialization of the readNameHash and the intervalList. Output initialization
@@ -242,10 +252,24 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         if (toolkit.getIntervals() != null)
             intervalList.addAll(toolkit.getIntervals());
 
-        if (!NO_PG_TAG)
-            Utils.setupWriter(out, toolkit, false, true, this, PROGRAM_RECORD_NAME);
-        else
+
+        // todo -- rework the whole NO_PG_TAG thing
+        final boolean preSorted = true;
+        final boolean indexOnTheFly = true;
+        final boolean generateMD5 = true;
+        final boolean keep_records = true;
+        final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
+        if (nwayout) {
+            SAMProgramRecord programRecord = NO_PG_TAG ? null : Utils.createProgramRecord(toolkit, this, PROGRAM_RECORD_NAME);
+            writerToUse = new BySampleSAMFileWriter(toolkit, PROGRAM_FILENAME_EXTENSION, sortOrder, preSorted, indexOnTheFly, NO_PG_TAG, programRecord, true);
+        }
+        else {
+            writerToUse = out;
             out.setPresorted(false);
+            if (!NO_PG_TAG) {
+                Utils.setupWriter(out, toolkit, !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
+            }
+        }
     }
 
     /**
@@ -386,6 +410,9 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         // output any remaining reads in the compressor
         for (GATKSAMRecord read : stash.close())
             outputRead(read);
+
+        if (nwayout)
+            writerToUse.close();
     }
 
     /**
@@ -554,7 +581,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         if (!DONT_COMPRESS_READ_NAMES)
             compressReadName(read);
 
-        out.addAlignment(read);
+        writerToUse.addAlignment(read);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java
new file mode 100644
index 000000000..6bad58d9f
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/BySampleSAMFileWriter.java
@@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMProgramRecord;
+import net.sf.samtools.SAMReadGroupRecord;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: carneiro
+ * Date: Nov 13
+ */
+public class BySampleSAMFileWriter extends NWaySAMFileWriter{
+
+    private final Map<String, SAMReaderID> sampleToWriterMap;
+
+    public BySampleSAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order, boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        super(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, pRecord, keep_records);
+
+        sampleToWriterMap = new HashMap<String, SAMReaderID>(toolkit.getSAMFileHeader().getReadGroups().size() * 2);
+
+        for (SAMReaderID readerID : toolkit.getReadsDataSource().getReaderIDs()) {
+            for (SAMReadGroupRecord rg : toolkit.getReadsDataSource().getHeader(readerID).getReadGroups()) {
+                String sample = rg.getSample();
+                if (sampleToWriterMap.containsKey(sample) && sampleToWriterMap.get(sample) != readerID) {
+                    throw new ReviewedStingException("The same sample appears in multiple files, this input cannot be multiplexed using the BySampleSAMFileWriter, try NWaySAMFileWriter instead.");
+                }
+                else {
+                    sampleToWriterMap.put(sample, readerID);
+                }
+            }
+        }
+    }
+
+    @Override
+    public void addAlignment(SAMRecord samRecord) {
+        super.addAlignment(samRecord, sampleToWriterMap.get(samRecord.getReadGroup().getSample()));
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
index fa07523f3..83d1c99bf 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
@@ -1,186 +1,188 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.sam;
-
-import net.sf.samtools.*;
-import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
-import org.broadinstitute.sting.utils.exceptions.StingException;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.text.TextFormattingUtils;
-
-import java.io.File;
-import java.util.*;
-
-/**
- * Created by IntelliJ IDEA.
- * User: asivache
- * Date: May 31, 2011
- * Time: 3:52:49 PM
- * To change this template use File | Settings | File Templates.
- */
-public class NWaySAMFileWriter implements SAMFileWriter {
-
-    private Map<SAMReaderID,SAMFileWriter> writerMap = null;
-    private boolean presorted ;
-    GenomeAnalysisEngine toolkit;
-    boolean KEEP_ALL_PG_RECORDS = false;
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                             boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
-        this.presorted = presorted;
-        this.toolkit = toolkit;
-        this.KEEP_ALL_PG_RECORDS = keep_records;
-        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
-        this.presorted = presorted;
-        this.toolkit = toolkit;
-        this.KEEP_ALL_PG_RECORDS = keep_records;
-        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
-        setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                             boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
-        this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
-    }
-
-    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
-        this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
-    }
-
-    /**
-     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
-     * from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
-     * onto a unique output file name.
-     * @param toolkit
-     * @param in2out
-     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
-        if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
-        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
-
-            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
-
-            String outName;
-            if ( ! in2out.containsKey(fName) )
-                    throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
-            outName = in2out.get(fName);
-
-            if ( writerMap.containsKey( rid ) )
-                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
-                        "map file likely contains multiple entries for this input file");
-
-            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
-        }
-
-    }
-
-    /**
-     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
-     * from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
-     * input file name and adding ext instead (e.g. ".cleaned.bam").
-     * onto a unique output file name.
-     * @param toolkit
-     * @param ext
-     */
-    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
-                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
-        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
-
-            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
-
-            String outName;
-            int pos ;
-            if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
-            else {
-                if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
-                else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
-            }
-            String prefix = fName.substring(0,pos);
-            outName = prefix+ext;
-
-            if ( writerMap.containsKey( rid ) )
-                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
-            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
-        }
-
-    }
-
-    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
-                           boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
-        File f = new File(outName);
-        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
-        header.setSortOrder(order);
-
-        if ( programRecord != null )  {
-            // --->> add program record
-            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
-            for ( SAMProgramRecord record : oldRecords ) {
-                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
-                    newRecords.add(record);
-            }
-            newRecords.add(programRecord);
-            header.setProgramRecords(newRecords);
-            // <-- add program record ends here
-        }
-        SAMFileWriterFactory factory = new SAMFileWriterFactory();
-        factory.setCreateIndex(indexOnTheFly);
-        factory.setCreateMd5File(generateMD5);
-        SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
-        writerMap.put(id,sw);
-    }
-
-    public Collection<SAMFileWriter> getWriters() {
-        return writerMap.values();
-    }
-
-    public void addAlignment(SAMRecord samRecord) {
-        final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
-        String rg = samRecord.getStringAttribute("RG");
-        if ( rg != null ) {
-            String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
-            samRecord.setAttribute("RG",rg_orig);
-        }
-        writerMap.get(id).addAlignment(samRecord);
-    }
-
-    public SAMFileHeader getFileHeader() {
-        return toolkit.getSAMFileHeader();
-    }
-
-    public void close() {
-        for ( SAMFileWriter w : writerMap.values() ) w.close();
-    }
-}
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.utils.sam;
+
+import net.sf.samtools.*;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+import java.io.File;
+import java.util.*;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: asivache
+ * Date: May 31, 2011
+ * Time: 3:52:49 PM
+ * To change this template use File | Settings | File Templates.
+ */
+public class NWaySAMFileWriter implements SAMFileWriter {
+
+    private Map<SAMReaderID,SAMFileWriter> writerMap = null;
+    private boolean presorted ;
+    GenomeAnalysisEngine toolkit;
+    boolean KEEP_ALL_PG_RECORDS = false;
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                             boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        this.presorted = presorted;
+        this.toolkit = toolkit;
+        this.KEEP_ALL_PG_RECORDS = keep_records;
+        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
+        setupByReader(toolkit,in2out,order, presorted, indexOnTheFly, generateMD5, pRecord);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly , boolean generateMD5, SAMProgramRecord pRecord, boolean keep_records) {
+        this.presorted = presorted;
+        this.toolkit = toolkit;
+        this.KEEP_ALL_PG_RECORDS = keep_records;
+        writerMap = new HashMap<SAMReaderID,SAMFileWriter>();
+        setupByReader(toolkit,ext,order, presorted, indexOnTheFly, generateMD5, pRecord);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                             boolean presorted, boolean indexOnTheFly, boolean generateMD5) {
+        this(toolkit, in2out, order, presorted, indexOnTheFly, generateMD5, null,false);
+    }
+
+    public NWaySAMFileWriter(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly , boolean generateMD5) {
+        this(toolkit, ext, order, presorted, indexOnTheFly, generateMD5, null,false);
+    }
+
+    /**
+     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
+     * from <code>toolkit</code>). The <code>in2out</code> map must contain an entry for each input filename and map it
+     * onto a unique output file name.
+     * @param toolkit
+     * @param in2out
+     */
+    public void setupByReader(GenomeAnalysisEngine toolkit, Map<String,String> in2out, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
+        if ( in2out==null ) throw new StingException("input-output bam filename map for n-way-out writing is NULL");
+        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
+
+            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
+
+            String outName;
+            if ( ! in2out.containsKey(fName) )
+                    throw new UserException.BadInput("Input-output bam filename map does not contain an entry for the input file "+fName);
+            outName = in2out.get(fName);
+
+            if ( writerMap.containsKey( rid ) )
+                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered; "+
+                        "map file likely contains multiple entries for this input file");
+
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
+        }
+
+    }
+
+    /**
+     * Instantiates multiple underlying SAM writes, one per input SAM reader registered with GATK engine (those will be retrieved
+     * from <code>toolkit</code>). The output file names will be generated automatically by stripping ".sam" or ".bam" off the
+     * input file name and adding ext instead (e.g. ".cleaned.bam").
+     * onto a unique output file name.
+     * @param toolkit
+     * @param ext
+     */
+    public void setupByReader(GenomeAnalysisEngine toolkit, String ext, SAMFileHeader.SortOrder order,
+                              boolean presorted, boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord pRecord) {
+        for ( SAMReaderID rid : toolkit.getReadsDataSource().getReaderIDs() ) {
+
+            String fName = toolkit.getReadsDataSource().getSAMFile(rid).getName();
+
+            String outName;
+            int pos ;
+            if ( fName.toUpperCase().endsWith(".BAM") ) pos = fName.toUpperCase().lastIndexOf(".BAM");
+            else {
+                if ( fName.toUpperCase().endsWith(".SAM") ) pos = fName.toUpperCase().lastIndexOf(".SAM");
+                else throw new UserException.BadInput("Input file name "+fName+" does not end with .sam or .bam");
+            }
+            String prefix = fName.substring(0,pos);
+            outName = prefix+ext;
+
+            if ( writerMap.containsKey( rid ) )
+                throw new StingException("nWayOut mode: Reader id for input sam file "+fName+" is already registered");
+            addWriter(rid,outName, order, presorted, indexOnTheFly, generateMD5, pRecord);
+        }
+
+    }
+
+    private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
+                           boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
+        File f = new File(outName);
+        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
+        header.setSortOrder(order);
+
+        if ( programRecord != null )  {
+            // --->> add program record
+            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
+            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
+            for ( SAMProgramRecord record : oldRecords ) {
+                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
+                    newRecords.add(record);
+            }
+            newRecords.add(programRecord);
+            header.setProgramRecords(newRecords);
+            // <-- add program record ends here
+        }
+        SAMFileWriterFactory factory = new SAMFileWriterFactory();
+        factory.setCreateIndex(indexOnTheFly);
+        factory.setCreateMd5File(generateMD5);
+        SAMFileWriter sw = factory.makeSAMOrBAMWriter(header, presorted, f);
+        writerMap.put(id,sw);
+    }
+
+    public Collection<SAMFileWriter> getWriters() {
+        return writerMap.values();
+    }
+
+    public void addAlignment(SAMRecord samRecord) {
+        final SAMReaderID id = toolkit.getReaderIDForRead(samRecord);
+        String rg = samRecord.getStringAttribute("RG");
+        if ( rg != null ) {
+            String rg_orig = toolkit.getReadsDataSource().getOriginalReadGroupId(rg);
+            samRecord.setAttribute("RG",rg_orig);
+        }
+        addAlignment(samRecord, id);
+    }
+
+    public void addAlignment(SAMRecord samRecord, SAMReaderID readerID) {
+        writerMap.get(readerID).addAlignment(samRecord);
+    }
+
+    public SAMFileHeader getFileHeader() {
+        return toolkit.getSAMFileHeader();
+    }
+
+    public void close() {
+        for ( SAMFileWriter w : writerMap.values() ) w.close();
+    }
+}

From a079d8d0d106660ca5bdd4d2c93d01f4fef141a9 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 13 Nov 2012 15:21:57 -0500
Subject: [PATCH 078/236] Breaking the utility to write @PG tags for
 SAMFileWriters and StingSAMFileWriters

---
 .../compression/reducereads/ReduceReads.java  |  3 +-
 .../org/broadinstitute/sting/utils/Utils.java | 59 +++++++++++++++++--
 .../sting/utils/sam/NWaySAMFileWriter.java    | 21 ++-----
 3 files changed, 59 insertions(+), 24 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 3712e4524..3cdf3d75e 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -256,7 +256,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         // todo -- rework the whole NO_PG_TAG thing
         final boolean preSorted = true;
         final boolean indexOnTheFly = true;
-        final boolean generateMD5 = true;
         final boolean keep_records = true;
         final SAMFileHeader.SortOrder sortOrder = SAMFileHeader.SortOrder.coordinate;
         if (nwayout) {
@@ -267,7 +266,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
             writerToUse = out;
             out.setPresorted(false);
             if (!NO_PG_TAG) {
-                Utils.setupWriter(out, toolkit, !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
+                Utils.setupWriter(out, toolkit, toolkit.getSAMFileHeader(), !preSorted, keep_records, this, PROGRAM_RECORD_NAME);
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index f4a200af0..b780d0966 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -687,23 +687,69 @@ public class Utils {
             array[i] = value;
     }
 
-    public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
-        final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
-
-        SAMFileHeader header = toolkit.getSAMFileHeader();
+    /**
+     * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
+     * up the writer with the header and presorted status.
+     *
+     * @param toolkit             the engine
+     * @param originalHeader      original header
+     * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+     * @param programRecord       the program record for this program
+     */
+    public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, SAMProgramRecord programRecord) {
+        SAMFileHeader header = originalHeader.clone();
         List<SAMProgramRecord> oldRecords = header.getProgramRecords();
         List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
         for ( SAMProgramRecord record : oldRecords )
-            if ( !record.getId().startsWith(PROGRAM_RECORD_NAME) || KEEP_ALL_PG_RECORDS )
+            if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
                 newRecords.add(record);
 
         newRecords.add(programRecord);
         header.setProgramRecords(newRecords);
+        return header;
+    }
 
+    /**
+    * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and returns
+    * the new header to be added to the BAM writer.
+    *
+    * @param toolkit             the engine
+    * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+    * @param walker              the walker object (so we can extract the command line)
+    * @param PROGRAM_RECORD_NAME the name for the PG tag
+    * @return a pre-filled header for the bam writer
+    */
+    public static SAMFileHeader setupWriter(GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
+        final SAMProgramRecord programRecord = createProgramRecord(toolkit, walker, PROGRAM_RECORD_NAME);
+        return setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, programRecord);
+    }
+
+    /**
+     * Creates a program record for the program, adds it to the list of program records (@PG tags) in the bam file and sets
+     * up the writer with the header and presorted status.
+     *
+     * @param writer              BAM file writer
+     * @param toolkit             the engine
+     * @param preSorted           whether or not the writer can assume reads are going to be added are already sorted
+     * @param KEEP_ALL_PG_RECORDS whether or not to keep all the other program records already existing in this BAM file
+     * @param walker              the walker object (so we can extract the command line)
+     * @param PROGRAM_RECORD_NAME the name for the PG tag
+     */
+    public static void setupWriter(StingSAMFileWriter writer, GenomeAnalysisEngine toolkit, SAMFileHeader originalHeader, boolean preSorted, boolean KEEP_ALL_PG_RECORDS, Object walker, String PROGRAM_RECORD_NAME) {
+        SAMFileHeader header = setupWriter(toolkit, originalHeader, KEEP_ALL_PG_RECORDS, walker, PROGRAM_RECORD_NAME);
         writer.writeHeader(header);
         writer.setPresorted(preSorted);
     }
-    
+
+
+    /**
+     * Creates a program record (@PG) tag
+     *
+     * @param toolkit             the engine
+     * @param walker              the walker object (so we can extract the command line)
+     * @param PROGRAM_RECORD_NAME the name for the PG tag
+     * @return a program record for the tool
+     */
     public static SAMProgramRecord createProgramRecord(GenomeAnalysisEngine toolkit, Object walker, String PROGRAM_RECORD_NAME) {
         final SAMProgramRecord programRecord = new SAMProgramRecord(PROGRAM_RECORD_NAME);
         final ResourceBundle headerInfo = TextFormattingUtils.loadResourceBundle("StingText");
@@ -858,4 +904,5 @@ public class Utils {
         }
         return subLists;
     }
+
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
index 83d1c99bf..cdf70884c 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/NWaySAMFileWriter.java
@@ -28,11 +28,14 @@ package org.broadinstitute.sting.utils.sam;
 import net.sf.samtools.*;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.StingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 
 import java.io.File;
-import java.util.*;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Created by IntelliJ IDEA.
@@ -138,21 +141,7 @@ public class NWaySAMFileWriter implements SAMFileWriter {
     private void addWriter(SAMReaderID id , String outName, SAMFileHeader.SortOrder order, boolean presorted,
                            boolean indexOnTheFly, boolean generateMD5, SAMProgramRecord programRecord) {
         File f = new File(outName);
-        SAMFileHeader header = toolkit.getSAMFileHeader(id).clone();
-        header.setSortOrder(order);
-
-        if ( programRecord != null )  {
-            // --->> add program record
-            List<SAMProgramRecord> oldRecords = header.getProgramRecords();
-            List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
-            for ( SAMProgramRecord record : oldRecords ) {
-                if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
-                    newRecords.add(record);
-            }
-            newRecords.add(programRecord);
-            header.setProgramRecords(newRecords);
-            // <-- add program record ends here
-        }
+        SAMFileHeader header = Utils.setupWriter(toolkit, toolkit.getSAMFileHeader(id), KEEP_ALL_PG_RECORDS, programRecord);
         SAMFileWriterFactory factory = new SAMFileWriterFactory();
         factory.setCreateIndex(indexOnTheFly);
         factory.setCreateMd5File(generateMD5);

From 843384e43539eb9d41f5449cb2408fe3dd52cbb9 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 14 Nov 2012 11:47:09 -0500
Subject: [PATCH 079/236] Rename hg19 files in bundle to b37 since that's what
 they are

---
 .../sting/queue/qscripts/GATKResourcesBundle.scala            | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
index 24ab50451..dc6cae197 100755
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/GATKResourcesBundle.scala
@@ -147,13 +147,13 @@ class GATKResourcesBundle extends QScript {
     //
     // example call set for wiki tutorial
     //
-    addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.hg19.subset.vcf",
+    addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/exampleCalls/NA12878.HiSeq.WGS.bwa.cleaned.raw.b37.subset.vcf",
       "NA12878.HiSeq.WGS.bwa.cleaned.raw.subset", b37, true, true))
 
     //
     // Test BAM file, specific to each reference
     //
-    addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam",
+    addResource(new Resource("/humgen/gsa-hpprojects/NA12878Collection/bams/NA12878.HiSeq.WGS.bwa.cleaned.recal.b37.20.bam",
       "IGNORE", b37, false, false))
 
     //

From 8b749673bce448d1b92cae97649f161d0d04eef0 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 14 Nov 2012 13:59:34 -0500
Subject: [PATCH 080/236] centralize header element removal in reduce reads

---
 .../compression/reducereads/ReduceReads.java     |  1 -
 .../compression/reducereads/SlidingWindow.java   | 16 +++++++++-------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 3cdf3d75e..629a27c48 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -253,7 +253,6 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
             intervalList.addAll(toolkit.getIntervals());
 
 
-        // todo -- rework the whole NO_PG_TAG thing
         final boolean preSorted = true;
         final boolean indexOnTheFly = true;
         final boolean keep_records = true;
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
index 24a3ba3cb..fff1c20a5 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@@ -220,7 +220,6 @@ public class SlidingWindow {
             regions = findVariantRegions(0, breakpoint, markedSites.getVariantSiteBitSet(), !forceClose);
         }
 
-        // todo -- can be more aggressive here removing until the NEW window header start location after closing the variant regions
         while (!readsInWindow.isEmpty() && readsInWindow.first().getSoftEnd() < windowHeaderStartLocation) {
                 readsInWindow.pollFirst();
         }
@@ -607,9 +606,7 @@ public class SlidingWindow {
                     toRemove.add(read);
                 }
             }
-            for (GATKSAMRecord read : toRemove) {
-                readsInWindow.remove(read);
-            }
+            removeReadsFromWindow(toRemove);
         }
         return allReads;
     }
@@ -805,9 +802,8 @@ public class SlidingWindow {
                 hetReads.add(finalizeRunningConsensus());
         }
 
-        for (GATKSAMRecord read : toRemove) {
-            readsInWindow.remove(read);
-        }
+        removeReadsFromWindow(toRemove);
+
         return hetReads;
     }
 
@@ -924,5 +920,11 @@ public class SlidingWindow {
             }
         }
     }
+
+    private void removeReadsFromWindow (List<GATKSAMRecord> readsToRemove) {
+        for (GATKSAMRecord read : readsToRemove) {
+            readsInWindow.remove(read);
+        }
+    }
 }
 

From 89bbe73a43c6e6ebb30ee564f7b04a432139f716 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Wed, 14 Nov 2012 14:39:04 -0500
Subject: [PATCH 081/236] Commenting out CMI pipeline test that wasn't meant to
 be in GATK repository (why was this merged??)

---
 .../gatk/walkers/annotator/VariantAnnotatorEngine.java      | 6 +++++-
 .../gatk/walkers/genotyper/UnifiedGenotyperEngine.java      | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
index ee4f77752..725097ddc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@@ -277,8 +277,12 @@ public class VariantAnnotatorEngine {
             if ( expression.fieldName.equals("ID") ) {
                 if ( vc.hasID() )
                     infoAnnotations.put(expression.fullName, vc.getID());
+            } else if (expression.fieldName.equals("ALT")) {
+                infoAnnotations.put(expression.fullName, vc.getAlternateAllele(0).getDisplayString());
+
             } else if ( vc.hasAttribute(expression.fieldName) ) {
-                infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
+                    infoAnnotations.put(expression.fullName, vc.getAttribute(expression.fieldName));
+
             }
         }
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 879a46ab0..22ed95365 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -190,8 +190,8 @@ public class UnifiedGenotyperEngine {
                     final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
                     if ( vc != null )
                         results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
- //                   else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
-   //                     results.add(generateEmptyContext(tracker, refContext, null, rawContext));
+                    else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
+                        results.add(generateEmptyContext(tracker, refContext, null, rawContext));
 
                 }
             }        

From a68e6810c90711d88d2a829d27818034f721eb12 Mon Sep 17 00:00:00 2001
From: Guillermo del Angel <delangel@broadinstitute.org>
Date: Wed, 14 Nov 2012 14:45:15 -0500
Subject: [PATCH 082/236] Back off experimental code that escaped last commit,
 not for general use yet

---
 .../sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index 22ed95365..80bc04845 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -190,8 +190,9 @@ public class UnifiedGenotyperEngine {
                     final VariantContext vc = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, null, true, model, perReadAlleleLikelihoodMap);
                     if ( vc != null )
                         results.add(calculateGenotypes(tracker, refContext, rawContext, stratifiedContexts, vc, model, true, perReadAlleleLikelihoodMap));
-                    else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
-                        results.add(generateEmptyContext(tracker, refContext, null, rawContext));
+// todo - uncomment if we want to also emit a null ref call (with no QUAL) if there's no evidence for REF and if EMIT_ALL_SITES is set
+//                    else if (UAC.OutputMode == OUTPUT_MODE.EMIT_ALL_SITES)
+//                        results.add(generateEmptyContext(tracker, refContext, null, rawContext));
 
                 }
             }        

From b70fd4a2426a21f375776c1de54540a562539423 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Wed, 14 Nov 2012 11:08:48 -0500
Subject: [PATCH 083/236] Initial testing of the Active Region Traversal
 contract

- TODO: many more tests and test cases
---
 .../traversals/TraverseActiveRegions.java     |   5 +-
 .../utils/activeregion/ActivityProfile.java   |   5 +
 .../traversals/TraverseActiveRegionsTest.java | 126 ++++++++++++++++++
 3 files changed, 135 insertions(+), 1 deletion(-)
 create mode 100644 public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index a2c37944a..4fe83f331 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -34,6 +34,9 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
     private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
 
+    // package access for unit testing
+    ActivityProfile profile;
+
     @Override
     public String getTraversalUnits() {
         return "active regions";
@@ -53,7 +56,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
 
         int minStart = Integer.MAX_VALUE;
         final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
-        ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
+        profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
 
         ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
index e96eb843d..38cfbb38d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@@ -103,6 +103,11 @@ public class ActivityProfile {
         isActiveList.add(result);
     }
 
+    // for unit testing
+    public List<ActivityProfileResult> getActiveList() {
+        return isActiveList;
+    }
+
     public int size() {
         return isActiveList.size();
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
new file mode 100644
index 000000000..8740a8b68
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -0,0 +1,126 @@
+package org.broadinstitute.sting.gatk.traversals;
+
+import org.testng.Assert;
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import net.sf.samtools.SAMRecord;
+import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.datasources.providers.LocusShardDataProvider;
+import org.broadinstitute.sting.gatk.datasources.reads.MockLocusShard;
+import org.broadinstitute.sting.gatk.datasources.reads.Shard;
+import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.executive.WindowMaker;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.ActiveRegionWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.testng.annotations.BeforeClass;
+import org.testng.annotations.Test;
+
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 11/13/12
+ * Time: 2:47 PM
+ */
+public class TraverseActiveRegionsTest extends BaseTest {
+
+    private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
+        private final double prob;
+
+        public DummyActiveRegionWalker() {
+            this.prob = 1.0;
+        }
+
+        @Override
+        public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+            return new ActivityProfileResult(ref.getLocus(), prob);
+        }
+
+        @Override
+        public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
+            return 0;
+        }
+
+        @Override
+        public Integer reduceInit() {
+            return 0;
+        }
+
+        @Override
+        public Integer reduce(Integer value, Integer sum) {
+            return 0;
+        }
+    }
+
+    private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
+
+    private IndexedFastaSequenceFile reference;
+    private GenomeLocParser genomeLocParser;
+    private ActiveRegionWalker<Integer, Integer> walker;
+
+    @BeforeClass
+    private void init() throws FileNotFoundException {
+        reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
+        SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
+        genomeLocParser = new GenomeLocParser(dictionary);
+    }
+
+    @Test
+    public void testAllIntervalsSeen() throws Exception {
+        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
+        GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
+        intervals.add(interval);
+
+        LocusShardDataProvider dataProvider = createDataProvider(intervals);
+
+        t.traverse(walker, dataProvider, 0);
+
+        boolean allGenomeLocsSeen = true;
+        for (GenomeLoc loc : intervals) {
+            boolean thisGenomeLocSeen = false;
+            for (ActivityProfileResult active : t.profile.getActiveList()) {
+                if (loc.equals(active.getLoc())) {
+                    thisGenomeLocSeen = true;
+                    break;
+                }
+            }
+            if (!thisGenomeLocSeen) {
+                allGenomeLocsSeen = false;
+                break;
+            }
+        }
+
+        Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
+    }
+
+    private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
+        walker = new DummyActiveRegionWalker();
+
+        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
+        Shard shard = new MockLocusShard(genomeLocParser, intervals);
+        WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
+        WindowMaker.WindowMakerIterator window = windowMaker.next();
+
+        GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
+        //engine.setReferenceDataSource(reference);
+        engine.setGenomeLocParser(genomeLocParser);
+        t.initialize(engine);
+
+        return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
+    }
+}

From 855a68ae39c1f31623f9561af4f9e2b4e85a14ab Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 19 Nov 2012 08:06:58 -0500
Subject: [PATCH 085/236] Testing out the new github-hosted repos. Please
 ignore.

---
 testfile | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 testfile

diff --git a/testfile b/testfile
new file mode 100644
index 000000000..6de7b8c69
--- /dev/null
+++ b/testfile
@@ -0,0 +1 @@
+This is a test file.

From 2a16d6fa55140ff6835c57a737586fbb18d0452b Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 19 Nov 2012 08:07:19 -0500
Subject: [PATCH 086/236] Revert "Testing out the new github-hosted repos.
 Please ignore."

This reverts commit b6bf66cd088754e7fd3d5f105ca8b2551237f183.
---
 testfile | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 testfile

diff --git a/testfile b/testfile
deleted file mode 100644
index 6de7b8c69..000000000
--- a/testfile
+++ /dev/null
@@ -1 +0,0 @@
-This is a test file.

From 78ce822b6f56fc5c6cc43be77f0faa47fbeabba6 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 19 Nov 2012 09:07:04 -0500
Subject: [PATCH 087/236] Protect against NPE when using non-GATK reports for
 inputs expecting valid GATK reports

---
 .../broadinstitute/sting/gatk/report/GATKReportVersion.java    | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
index b51fb17f0..1079d9b91 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReportVersion.java
@@ -80,6 +80,9 @@ public enum GATKReportVersion {
      * @return The version as an enum.
      */
     public static GATKReportVersion fromHeader(String header) {
+        if ( header == null )
+            throw new UserException.BadInput("The GATK report has no version specified in the header");
+
         if (header.startsWith("##:GATKReport.v0.1 "))
             return GATKReportVersion.V0_1;
 

From ff180a8e02eaeffbc42226c789c6c6946affae68 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 19 Nov 2012 09:09:57 -0500
Subject: [PATCH 088/236] Significant refactoring of the Haplotype Caller to
 handle problems with GGA.  The main fix is that we now maintain a mapping
 from 'original' allele to 'Smith-Waterman-based' allele so that we no longer
 need to do a (buggy) matching throughout the calling process.

---
 .../haplotypecaller/GenotypingEngine.java     | 253 ++++++------------
 .../haplotypecaller/HaplotypeCaller.java      |   6 +-
 .../LikelihoodCalculationEngine.java          |  78 +++---
 .../LikelihoodCalculationEngineUnitTest.java  |   5 +-
 .../broadinstitute/sting/utils/Haplotype.java |  16 +-
 5 files changed, 144 insertions(+), 214 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index d91df82e2..9fc636efe 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -31,7 +31,6 @@ import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
-import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@@ -52,153 +51,17 @@ public class GenotypingEngine {
         noCall.add(Allele.NO_CALL);
     }
 
-    // WARN
-    // This function is the streamlined approach, currently not being used by default
-    // WARN
-    // WARN: This function is currently only being used by Menachem. Slated for removal/merging with the rest of the code.
-    // WARN
-    @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallHaplotypeEvents( final UnifiedGenotyperEngine UG_engine,
-                                                                                                                             final ArrayList<Haplotype> haplotypes,
-                                                                                                                             final byte[] ref,
-                                                                                                                             final GenomeLoc refLoc,
-                                                                                                                             final GenomeLoc activeRegionWindow,
-                                                                                                                             final GenomeLocParser genomeLocParser ) {
-        // Prepare the list of haplotype indices to genotype
-        final ArrayList<Allele> allelesToGenotype = new ArrayList<Allele>();
-
-        for( final Haplotype h : haplotypes ) {
-            allelesToGenotype.add( Allele.create(h.getBases(), h.isReference()) );
-        }
-        final int numHaplotypes = haplotypes.size();
-
-        // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
-        final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
-        for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
-            final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
-            final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, sample);
-            int glIndex = 0;
-            for( int iii = 0; iii < numHaplotypes; iii++ ) {
-                for( int jjj = 0; jjj <= iii; jjj++ ) {
-                    genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
-                }
-            }
-            genotypes.add(new GenotypeBuilder(sample, noCall).PL(genotypeLikelihoods).make());
-        }
-        final VariantCallContext call = UG_engine.calculateGenotypes(new VariantContextBuilder().loc(activeRegionWindow).alleles(allelesToGenotype).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
-        if( call == null ) { return Collections.emptyList(); } // exact model says that the call confidence is below the specified confidence threshold so nothing to do here
-
-        // Prepare the list of haplotypes that need to be run through Smith-Waterman for output to VCF
-        final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
-        for( final Haplotype h : haplotypes ) {
-            if( call.getAllele(h.getBases()) == null ) { // exact model removed this allele from the list so no need to run SW and output to VCF
-                haplotypesToRemove.add(h);
-            }
-        }
-        haplotypes.removeAll(haplotypesToRemove);
-
-        if( OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
-            final List<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>> returnVCs = new ArrayList<Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>>();
-            // set up the default 1-to-1 haplotype mapping object
-            final HashMap<Allele,ArrayList<Haplotype>> haplotypeMapping = new HashMap<Allele,ArrayList<Haplotype>>();
-            for( final Haplotype h : haplotypes ) {
-                final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-                list.add(h);
-                haplotypeMapping.put(call.getAllele(h.getBases()), list);
-            }
-            returnVCs.add( new Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>>(call,haplotypeMapping) );
-            return returnVCs;
-        }
-
-        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
-
-        // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
-        final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
-        int count = 0;
-        if( DEBUG ) { System.out.println("=== Best Haplotypes ==="); }
-        for( final Haplotype h : haplotypes ) {
-            if( DEBUG ) {
-                System.out.println( h.toString() );
-                System.out.println( "> Cigar = " + h.getCigar() );
-            }
-            // Walk along the alignment and turn any difference from the reference into an event
-            h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
-            startPosKeySet.addAll(h.getEventMap().keySet());
-        }
-        
-        // Create the VC merge priority list
-        final ArrayList<String> priorityList = new ArrayList<String>();
-        for( int iii = 0; iii < haplotypes.size(); iii++ ) {
-            priorityList.add("HC" + iii);
-        }
-        
-        // Walk along each position in the key set and create each event to be outputted
-        for( final int loc : startPosKeySet ) {
-            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
-                final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>();
-                for( final Haplotype h : haplotypes ) {
-                    final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
-                    final VariantContext vc = eventMap.get(loc);
-                    if( vc != null && !containsVCWithMatchingAlleles(eventsAtThisLoc, vc) ) {
-                        eventsAtThisLoc.add(vc);
-                    }
-                }
-                
-                // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
-                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
-
-                // Merge the event to find a common reference representation
-                final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
-
-                final HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
-                int aCount = 0;
-                for( final Allele a : mergedVC.getAlleles() ) {
-                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
-                }
-
-                if( DEBUG ) {
-                    System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
-                    //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
-                }
-
-                // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
-                final GenotypesContext myGenotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
-                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
-                    final int myNumHaplotypes = alleleMapper.size();
-                    final double[] genotypeLikelihoods = new double[myNumHaplotypes * (myNumHaplotypes+1) / 2];
-                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
-                    int glIndex = 0;
-                    for( int iii = 0; iii < myNumHaplotypes; iii++ ) {
-                        for( int jjj = 0; jjj <= iii; jjj++ ) {
-                            genotypeLikelihoods[glIndex++] = haplotypeLikelihoodMatrix[iii][jjj]; // for example: AA,AB,BB,AC,BC,CC
-                        }
-                    }
-
-                    // using the allele mapping object translate the haplotype allele into the event allele
-                    final Genotype g = new GenotypeBuilder(sample)
-                            .alleles(findEventAllelesInSample(mergedVC.getAlleles(), call.getAlleles(), call.getGenotype(sample).getAlleles(), alleleMapper, haplotypes))
-                            .phased(loc != startPosKeySet.first())
-                            .PL(genotypeLikelihoods).make();
-                    myGenotypes.add(g);
-                }
-                returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(
-                                 new VariantContextBuilder(mergedVC).log10PError(call.getLog10PError()).genotypes(myGenotypes).make(), alleleHashMap) );
-            }
-        }
-        return returnCalls;
-    }
-
     // BUGBUG: Create a class to hold this complicated return type
     @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
-                                                                                                                               final ArrayList<Haplotype> haplotypes,
-                                                                                                                               final byte[] ref,
-                                                                                                                               final GenomeLoc refLoc,
-                                                                                                                               final GenomeLoc activeRegionWindow,
-                                                                                                                               final GenomeLocParser genomeLocParser,
-                                                                                                                               final ArrayList<VariantContext> activeAllelesToGenotype ) {
+    public List<Pair<VariantContext, Map<Allele, List<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                                                       final List<Haplotype> haplotypes,
+                                                                                                                       final byte[] ref,
+                                                                                                                       final GenomeLoc refLoc,
+                                                                                                                       final GenomeLoc activeRegionWindow,
+                                                                                                                       final GenomeLocParser genomeLocParser,
+                                                                                                                       final List<VariantContext> activeAllelesToGenotype ) {
 
-        final ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>>();
+        final ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>>();
 
         // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
         final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
@@ -261,7 +124,15 @@ public class GenotypingEngine {
                 if( eventsAtThisLoc.isEmpty() ) { continue; }
 
                 // Create the allele mapping object which maps the original haplotype alleles to the alleles present in just this event
-                final ArrayList<ArrayList<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+                Map<Allele, List<Haplotype>> alleleMapper = createAlleleMapper( loc, eventsAtThisLoc, haplotypes );
+
+                final Allele refAllele = eventsAtThisLoc.get(0).getReference();
+                final ArrayList<Allele> alleleOrdering = new ArrayList<Allele>(alleleMapper.size());
+                alleleOrdering.add(refAllele);
+                for ( final Allele allele : alleleMapper.keySet() ) {
+                    if ( !refAllele.equals(allele) )
+                        alleleOrdering.add(allele);
+                }
 
                 // Sanity check the priority list
                 for( final VariantContext vc : eventsAtThisLoc ) {
@@ -283,12 +154,6 @@ public class GenotypingEngine {
                 final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
                 if( mergedVC == null ) { continue; }
 
-                HashMap<Allele, ArrayList<Haplotype>> alleleHashMap = new HashMap<Allele, ArrayList<Haplotype>>();
-                int aCount = 0;
-                for( final Allele a : mergedVC.getAlleles() ) {
-                    alleleHashMap.put(a, alleleMapper.get(aCount++)); // BUGBUG: needs to be cleaned up and merged with alleleMapper
-                }
-
                 if( DEBUG ) {
                     System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
                     //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
@@ -299,7 +164,7 @@ public class GenotypingEngine {
                 for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
                     final int numHaplotypes = alleleMapper.size();
                     final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
-                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper);
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper, alleleOrdering);
                     int glIndex = 0;
                     for( int iii = 0; iii < numHaplotypes; iii++ ) {
                         for( int jjj = 0; jjj <= iii; jjj++ ) {
@@ -313,23 +178,23 @@ public class GenotypingEngine {
                     if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
                         final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call);
                         // also, need to update the allele -> haplotype mapping
-                        final HashMap<Allele, ArrayList<Haplotype>> alleleHashMapTrim = new HashMap<Allele, ArrayList<Haplotype>>();
+                        final HashMap<Allele, List<Haplotype>> alleleHashMapTrim = new HashMap<Allele, List<Haplotype>>();
                         for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC
-                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleHashMap.get(call.getAlleles().get(iii)));
+                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleMapper.get(call.getAlleles().get(iii)));
                         }
 
                         call = vcCallTrim;
-                        alleleHashMap = alleleHashMapTrim;
+                        alleleMapper = alleleHashMapTrim;
                     }
 
-                    returnCalls.add( new Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>>(call, alleleHashMap) );
+                    returnCalls.add( new Pair<VariantContext, Map<Allele,List<Haplotype>>>(call, alleleMapper) );
                 }
             }
         }
         return returnCalls;
     }
 
-    protected static void cleanUpSymbolicUnassembledEvents( final ArrayList<Haplotype> haplotypes ) {
+    protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
         final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
         for( final Haplotype h : haplotypes ) {
             for( final VariantContext vc : h.getEventMap().values() ) {
@@ -348,7 +213,7 @@ public class GenotypingEngine {
         haplotypes.removeAll(haplotypesToRemove);
     }
 
-    protected void mergeConsecutiveEventsBasedOnLD( final ArrayList<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
+    protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
         final int MAX_SIZE_TO_COMBINE = 15;
         final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
         if( startPosKeySet.size() <= 1 ) { return; }
@@ -395,7 +260,9 @@ public class GenotypingEngine {
                         final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
                         haplotypeList.add(h);
                         for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
-                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( haplotypeList, sample )[0][0];
+                            final HashSet<String> sampleSet = new HashSet<String>(1);
+                            sampleSet.add(sample);
+                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet,  haplotypeList )[0][0];
                             if( thisHapVC == null ) {
                                 if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
                                 else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
@@ -489,37 +356,71 @@ public class GenotypingEngine {
 
     @Requires({"haplotypes.size() >= eventsAtThisLoc.size() + 1"})
     @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
-    protected static ArrayList<ArrayList<Haplotype>> createAlleleMapper( final int loc, final ArrayList<VariantContext> eventsAtThisLoc, final ArrayList<Haplotype> haplotypes ) {
-        final ArrayList<ArrayList<Haplotype>> alleleMapper = new ArrayList<ArrayList<Haplotype>>();
-        final ArrayList<Haplotype> refList = new ArrayList<Haplotype>();
+    protected static Map<Allele, List<Haplotype>> createAlleleMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
+
+        final Allele refAllele = eventsAtThisLoc.get(0).getReference();
+
+        final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>(eventsAtThisLoc.size()+1);
         for( final Haplotype h : haplotypes ) {
             if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
-                refList.add(h);
+                if ( !alleleMapper.containsKey(refAllele) )
+                    alleleMapper.put(refAllele, new ArrayList<Haplotype>());
+                alleleMapper.get(refAllele).add(h);
+            } else if ( h.isArtificialHaplotype() ) {
+                if ( !alleleMapper.containsKey(h.getArtificialAllele()) )
+                    alleleMapper.put(h.getArtificialAllele(), new ArrayList<Haplotype>());
+                alleleMapper.get(h.getArtificialAllele()).add(h);
             } else {
-                boolean foundInEventList = false;
                 for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
                     if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
-                        foundInEventList = true;
+                        final Allele altAllele = vcAtThisLoc.getAlternateAllele(0);
+                        if ( !alleleMapper.containsKey(altAllele) )
+                            alleleMapper.put(altAllele, new ArrayList<Haplotype>());
+                        alleleMapper.get(altAllele).add(h);
+                        break;
                     }
                 }
-                if( !foundInEventList ) { // event at this location isn't one of the genotype-able options (during GGA) so this is a reference-supporting haplotype
-                    refList.add(h);
-                }
             }
         }
-        alleleMapper.add(refList);
-        for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            for( final Haplotype h : haplotypes ) {
-                if( h.getEventMap().get(loc) != null && h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
-                    list.add(h);
+
+        for( final Haplotype h : haplotypes ) {
+            if ( h.getEventMap().get(loc) == null || h.isArtificialHaplotype() )
+                continue;
+
+            Allele matchingAllele = null;
+            for ( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
+                if ( alleleToTest.getKey().equals(refAllele) )
+                    continue;
+
+                final Haplotype artificialHaplotype = alleleToTest.getValue().get(0);
+                if ( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap()) ) {
+                    matchingAllele = alleleToTest.getKey();
+                    break;
                 }
             }
-            alleleMapper.add(list);
+
+            if ( matchingAllele == null )
+                matchingAllele = refAllele;
+            alleleMapper.get(matchingAllele).add(h);
         }
+
         return alleleMapper;
     }
 
+    protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset) {
+
+        for ( final Map.Entry<Integer, VariantContext> fromSubset : subset.entrySet() ) {
+            final VariantContext fromSuperset = superset.get(fromSubset.getKey());
+            if ( fromSuperset == null )
+                return false;
+
+            if ( !fromSuperset.hasAlternateAllele(fromSubset.getValue().getAlternateAllele(0)) )
+                return false;
+        }
+
+        return true;
+    }
+
     @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
     protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
         if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index a185ba6af..2b739a321 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -421,10 +421,8 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
         // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
         final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
 
-        for( final Pair<VariantContext, HashMap<Allele, ArrayList<Haplotype>>> callResult :
-                ( GENOTYPE_FULL_ACTIVE_REGION && UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES
-                  ? genotypingEngine.assignGenotypeLikelihoodsAndCallHaplotypeEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getExtendedLoc(), getToolkit().getGenomeLocParser() )
-                  : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) ) {
+        for( final Pair<VariantContext, Map<Allele, List<Haplotype>>> callResult :
+                genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) {
             if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
 
             final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index a0924623b..543987e74 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -148,34 +148,21 @@ public class LikelihoodCalculationEngine {
         return Math.min(b1.length, b2.length);
     }
 
-    @Requires({"haplotypes.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypes.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final ArrayList<Haplotype> haplotypes, final String sample ) {
-        // set up the default 1-to-1 haplotype mapping object, BUGBUG: target for future optimization?
-        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
-        for( final Haplotype h : haplotypes ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            list.add(h);
-            haplotypeMapping.add(list);
-        }
-        return computeDiploidHaplotypeLikelihoods( sample, haplotypeMapping );
-    }
-
     // This function takes just a single sample and a haplotypeMapping
     @Requires({"haplotypeMapping.size() > 0"})
     @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {
         final TreeSet<String> sampleSet = new TreeSet<String>();
         sampleSet.add(sample);
-        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping);
+        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping, alleleOrdering);
     }
 
     // This function takes a set of samples to pool over and a haplotypeMapping
     @Requires({"haplotypeMapping.size() > 0"})
     @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final ArrayList<ArrayList<Haplotype>> haplotypeMapping ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {
 
-        final int numHaplotypes = haplotypeMapping.size();
+        final int numHaplotypes = alleleOrdering.size();
         final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
         for( int iii = 0; iii < numHaplotypes; iii++ ) {
             Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
@@ -184,9 +171,9 @@ public class LikelihoodCalculationEngine {
         // compute the diploid haplotype likelihoods
         // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
         for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            for( int jjj = 0; jjj <= iii; jjj++ ) {                
-                for( final Haplotype iii_mapped : haplotypeMapping.get(iii) ) {
-                    for( final Haplotype jjj_mapped : haplotypeMapping.get(jjj) ) {
+            for( int jjj = 0; jjj <= iii; jjj++ ) {
+                for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
+                    for( final Haplotype jjj_mapped : haplotypeMapping.get(alleleOrdering.get(jjj)) ) {
                         double haplotypeLikelihood = 0.0;
                         for( final String sample : samples ) {
                             final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
@@ -200,12 +187,48 @@ public class LikelihoodCalculationEngine {
                         }
                         haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
                     }
-                }       
+                }
             }
         }
 
         // normalize the diploid likelihoods matrix
-        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );        
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
+    }
+
+    // This function takes a set of samples to pool over and a haplotypeMapping
+    @Requires({"haplotypeMapping.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final List<Haplotype> haplotypeList ) {
+
+        final int numHaplotypes = haplotypeList.size();
+        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
+        }
+
+        // compute the diploid haplotype likelihoods
+        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
+        for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            final Haplotype iii_haplotype = haplotypeList.get(iii);
+            for( int jjj = 0; jjj <= iii; jjj++ ) {
+                final Haplotype jjj_haplotype = haplotypeList.get(jjj);
+                double haplotypeLikelihood = 0.0;
+                for( final String sample : samples ) {
+                    final double[] readLikelihoods_iii = iii_haplotype.getReadLikelihoods(sample);
+                    final int[] readCounts_iii = iii_haplotype.getReadCounts(sample);
+                    final double[] readLikelihoods_jjj = jjj_haplotype.getReadLikelihoods(sample);
+                    for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                        // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
+                        // First term is approximated by Jacobian log with table lookup.
+                        haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
+                    }
+                }
+                haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
+            }
+        }
+
+        // normalize the diploid likelihoods matrix
+        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
     }
 
     @Requires({"likelihoodMatrix.length == likelihoodMatrix[0].length"})
@@ -296,14 +319,7 @@ public class LikelihoodCalculationEngine {
         final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
         final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
         bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
-        // set up the default 1-to-1 haplotype mapping object
-        final ArrayList<ArrayList<Haplotype>> haplotypeMapping = new ArrayList<ArrayList<Haplotype>>();
-        for( final Haplotype h : haplotypes ) {
-            final ArrayList<Haplotype> list = new ArrayList<Haplotype>();
-            list.add(h);
-            haplotypeMapping.add(list);
-        }
-        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypeMapping ); // all samples pooled together
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypes ); // all samples pooled together
 
         int hap1 = 0;
         int hap2 = 0;
@@ -347,7 +363,7 @@ public class LikelihoodCalculationEngine {
     public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
                                                                                             final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
                                                                                             final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
-                                                                                            final Pair<VariantContext, HashMap<Allele,ArrayList<Haplotype>>> call,
+                                                                                            final Pair<VariantContext, Map<Allele,List<Haplotype>>> call,
                                                                                             final double downsamplingFraction,
                                                                                             final PrintStream downsamplingLog ) {
         final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
index e82946690..19ced9f42 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@@ -10,7 +10,6 @@ import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.Haplotype;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
@@ -102,7 +101,9 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
                     haplotypes.add(haplotype);
                 }
             }
-            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(haplotypes, "myTestSample");
+            final HashSet<String> sampleSet = new HashSet<String>(1);
+            sampleSet.add("myTestSample");
+            return LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sampleSet, haplotypes);
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index b30d47074..6de15e18b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -49,6 +49,7 @@ public class Haplotype {
     private int alignmentStartHapwrtRef;
     public int leftBreakPoint = 0;
     public int rightBreakPoint = 0;
+    private Allele artificialAllele = null;
  
     /**
      * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
@@ -71,6 +72,11 @@ public class Haplotype {
         this(bases, 0);
     }
 
+    public Haplotype( final byte[] bases, final Allele artificialAllele ) {
+        this(bases, 0);
+        this.artificialAllele = artificialAllele;
+    }
+
     public Haplotype( final byte[] bases, final GenomeLoc loc ) {
         this(bases);
         this.genomeLocation = loc;
@@ -171,6 +177,14 @@ public class Haplotype {
         this.cigar = cigar;
     }
 
+    public boolean isArtificialHaplotype() {
+        return artificialAllele != null;
+    }
+
+    public Allele getArtificialAllele() {
+        return artificialAllele;
+    }
+
     @Requires({"refInsertLocation >= 0"})
     public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
         // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
@@ -182,7 +196,7 @@ public class Haplotype {
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
-        return new Haplotype(newHaplotypeBases);
+        return new Haplotype(newHaplotypeBases, altAllele);
     }
 
     public static class HaplotypeBaseComparator implements Comparator<Haplotype>, Serializable {

From 9fc63efc307475f6b36ffad7e425b90bcab817cf Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 19 Nov 2012 09:34:15 -0500
Subject: [PATCH 089/236] Second test of new repos. Please ignore.

---
 testfile | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 testfile

diff --git a/testfile b/testfile
new file mode 100644
index 000000000..524acfffa
--- /dev/null
+++ b/testfile
@@ -0,0 +1 @@
+Test file

From e1a5c3ce7a97e8eb92c64cc144340c81d01c9903 Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 19 Nov 2012 09:34:32 -0500
Subject: [PATCH 090/236] Revert "Second test of new repos. Please ignore."

This reverts commit 077532d870ddf53ec514b98f14534ca7dbf55331.
---
 testfile | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 testfile

diff --git a/testfile b/testfile
deleted file mode 100644
index 524acfffa..000000000
--- a/testfile
+++ /dev/null
@@ -1 +0,0 @@
-Test file

From f0b8a0228fef45f23478c1a12be1cd58633c0873 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 19 Nov 2012 09:57:55 -0500
Subject: [PATCH 092/236] Quick fix for HC refactoring: when copying over
 Haplotype objects, make sure to copy over the artificial allele used to
 create it too.

---
 .../gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java | 2 ++
 public/java/src/org/broadinstitute/sting/utils/Haplotype.java | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
index fd46e4e69..33fa49543 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@@ -369,6 +369,8 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
 
         h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
         h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
+        if ( haplotype.isArtificialHaplotype() )
+            h.setArtificialAllele(haplotype.getArtificialAllele());
         h.leftBreakPoint = leftBreakPoint;
         h.rightBreakPoint = rightBreakPoint;
         if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index 6de15e18b..af4e31698 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -185,6 +185,10 @@ public class Haplotype {
         return artificialAllele;
     }
 
+    public void setArtificialAllele(final Allele artificialAllele) {
+        this.artificialAllele = artificialAllele;
+    }
+
     @Requires({"refInsertLocation >= 0"})
     public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
         // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates

From 937ac7290f7caa7f0cae996608f5a68358f10c09 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 20 Nov 2012 16:13:29 -0500
Subject: [PATCH 099/236] Lots more GGA fixes for the HC now that I understand
 what's going on internally.  Integration tests pass except for the GGA test
 which I believe now produces better results.

---
 .../haplotypecaller/GenotypingEngine.java     | 82 ++++++++++++-------
 .../LikelihoodCalculationEngine.java          |  4 +-
 .../SimpleDeBruijnAssembler.java              | 22 +++--
 .../HaplotypeCallerIntegrationTest.java       |  3 +-
 .../broadinstitute/sting/utils/Haplotype.java | 17 ++--
 .../sting/utils/HaplotypeUnitTest.java        |  2 +-
 6 files changed, 87 insertions(+), 43 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index 9fc636efe..beec8a92e 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -62,6 +62,7 @@ public class GenotypingEngine {
                                                                                                                        final List<VariantContext> activeAllelesToGenotype ) {
 
         final ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>>();
+        final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
 
         // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
         final TreeSet<Integer> startPosKeySet = new TreeSet<Integer>();
@@ -70,7 +71,7 @@ public class GenotypingEngine {
         for( final Haplotype h : haplotypes ) {
             // Walk along the alignment and turn any difference from the reference into an event
             h.setEventMap( generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), ref, h.getBases(), refLoc, "HC" + count++ ) );
-            if( activeAllelesToGenotype.isEmpty() ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
+            if( !in_GGA_mode ) { startPosKeySet.addAll(h.getEventMap().keySet()); }
             if( DEBUG ) {
                 System.out.println( h.toString() );
                 System.out.println( "> Cigar = " + h.getCigar() );
@@ -80,10 +81,10 @@ public class GenotypingEngine {
         }
 
         cleanUpSymbolicUnassembledEvents( haplotypes );
-        if( activeAllelesToGenotype.isEmpty() && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
+        if( !in_GGA_mode && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
             mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
         }
-        if( !activeAllelesToGenotype.isEmpty() ) { // we are in GGA mode!
+        if( in_GGA_mode ) {
             for( final VariantContext compVC : activeAllelesToGenotype ) {
                 startPosKeySet.add( compVC.getStart() );
             }
@@ -95,7 +96,7 @@ public class GenotypingEngine {
                 final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
                 final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
 
-                if( activeAllelesToGenotype.isEmpty() ) {
+                if( !in_GGA_mode ) {
                     for( final Haplotype h : haplotypes ) {
                         final HashMap<Integer,VariantContext> eventMap = h.getEventMap();
                         final VariantContext vc = eventMap.get(loc);
@@ -129,9 +130,8 @@ public class GenotypingEngine {
                 final Allele refAllele = eventsAtThisLoc.get(0).getReference();
                 final ArrayList<Allele> alleleOrdering = new ArrayList<Allele>(alleleMapper.size());
                 alleleOrdering.add(refAllele);
-                for ( final Allele allele : alleleMapper.keySet() ) {
-                    if ( !refAllele.equals(allele) )
-                        alleleOrdering.add(allele);
+                for( final VariantContext vc : eventsAtThisLoc ) {
+                    alleleOrdering.add(vc.getAlternateAllele(0));
                 }
 
                 // Sanity check the priority list
@@ -154,6 +154,16 @@ public class GenotypingEngine {
                 final VariantContext mergedVC = VariantContextUtils.simpleMerge(genomeLocParser, eventsAtThisLoc, priorityList, VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED, VariantContextUtils.GenotypeMergeType.PRIORITIZE, false, false, null, false, false);
                 if( mergedVC == null ) { continue; }
 
+                // let's update the Allele keys in the mapper because they can change after merging when there are complex events
+                Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
+                for ( int i = 0; i < mergedVC.getNAlleles(); i++ ) {
+                    final Allele oldAllele = alleleOrdering.get(i);
+                    final Allele newAllele = mergedVC.getAlleles().get(i);
+                    updatedAlleleMapper.put(newAllele, alleleMapper.get(oldAllele));
+                    alleleOrdering.set(i, newAllele);
+                }
+                alleleMapper = updatedAlleleMapper;
+
                 if( DEBUG ) {
                     System.out.println("Genotyping event at " + loc + " with alleles = " + mergedVC.getAlleles());
                     //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
@@ -358,48 +368,48 @@ public class GenotypingEngine {
     @Ensures({"result.size() == eventsAtThisLoc.size() + 1"})
     protected static Map<Allele, List<Haplotype>> createAlleleMapper( final int loc, final List<VariantContext> eventsAtThisLoc, final List<Haplotype> haplotypes ) {
 
-        final Allele refAllele = eventsAtThisLoc.get(0).getReference();
-
         final Map<Allele, List<Haplotype>> alleleMapper = new HashMap<Allele, List<Haplotype>>(eventsAtThisLoc.size()+1);
+        final Allele refAllele = eventsAtThisLoc.get(0).getReference();
+        alleleMapper.put(refAllele, new ArrayList<Haplotype>());
+        for( final VariantContext vc : eventsAtThisLoc )
+            alleleMapper.put(vc.getAlternateAllele(0), new ArrayList<Haplotype>());
+
+        final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
         for( final Haplotype h : haplotypes ) {
             if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
-                if ( !alleleMapper.containsKey(refAllele) )
-                    alleleMapper.put(refAllele, new ArrayList<Haplotype>());
                 alleleMapper.get(refAllele).add(h);
-            } else if ( h.isArtificialHaplotype() ) {
-                if ( !alleleMapper.containsKey(h.getArtificialAllele()) )
-                    alleleMapper.put(h.getArtificialAllele(), new ArrayList<Haplotype>());
+            } else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
                 alleleMapper.get(h.getArtificialAllele()).add(h);
             } else {
+                boolean haplotypeIsDetermined = false;
                 for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
                     if( h.getEventMap().get(loc).hasSameAllelesAs(vcAtThisLoc) ) {
-                        final Allele altAllele = vcAtThisLoc.getAlternateAllele(0);
-                        if ( !alleleMapper.containsKey(altAllele) )
-                            alleleMapper.put(altAllele, new ArrayList<Haplotype>());
-                        alleleMapper.get(altAllele).add(h);
+                        alleleMapper.get(vcAtThisLoc.getAlternateAllele(0)).add(h);
+                        haplotypeIsDetermined = true;
                         break;
                     }
                 }
+
+                if( !haplotypeIsDetermined )
+                    undeterminedHaplotypes.add(h);
             }
         }
 
-        for( final Haplotype h : haplotypes ) {
-            if ( h.getEventMap().get(loc) == null || h.isArtificialHaplotype() )
-                continue;
-
+        for( final Haplotype h : undeterminedHaplotypes ) {
             Allele matchingAllele = null;
-            for ( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
-                if ( alleleToTest.getKey().equals(refAllele) )
+            for( final Map.Entry<Allele, List<Haplotype>> alleleToTest : alleleMapper.entrySet() ) {
+                // don't test against the reference allele
+                if( alleleToTest.getKey().equals(refAllele) )
                     continue;
 
                 final Haplotype artificialHaplotype = alleleToTest.getValue().get(0);
-                if ( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap()) ) {
+                if( isSubSetOf(artificialHaplotype.getEventMap(), h.getEventMap(), true) ) {
                     matchingAllele = alleleToTest.getKey();
                     break;
                 }
             }
 
-            if ( matchingAllele == null )
+            if( matchingAllele == null )
                 matchingAllele = refAllele;
             alleleMapper.get(matchingAllele).add(h);
         }
@@ -407,20 +417,36 @@ public class GenotypingEngine {
         return alleleMapper;
     }
 
-    protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset) {
+    protected static boolean isSubSetOf(final Map<Integer, VariantContext> subset, final Map<Integer, VariantContext> superset, final boolean resolveSupersetToSubset) {
 
         for ( final Map.Entry<Integer, VariantContext> fromSubset : subset.entrySet() ) {
             final VariantContext fromSuperset = superset.get(fromSubset.getKey());
             if ( fromSuperset == null )
                 return false;
 
-            if ( !fromSuperset.hasAlternateAllele(fromSubset.getValue().getAlternateAllele(0)) )
+            List<Allele> supersetAlleles = fromSuperset.getAlternateAlleles();
+            if ( resolveSupersetToSubset )
+                supersetAlleles = resolveAlternateAlleles(fromSubset.getValue().getReference(), fromSuperset.getReference(), supersetAlleles);
+
+            if ( !supersetAlleles.contains(fromSubset.getValue().getAlternateAllele(0)) )
                 return false;
         }
 
         return true;
     }
 
+    private static List<Allele> resolveAlternateAlleles(final Allele targetReference, final Allele actualReference, final List<Allele> currentAlleles) {
+        if ( targetReference.length() <= actualReference.length() )
+            return currentAlleles;
+
+        final List<Allele> newAlleles = new ArrayList<Allele>(currentAlleles.size());
+        final byte[] extraBases = Arrays.copyOfRange(targetReference.getBases(), actualReference.length(), targetReference.length());
+        for ( final Allele a : currentAlleles ) {
+            newAlleles.add(Allele.extend(a, extraBases));
+        }
+        return newAlleles;
+    }
+
     @Ensures({"result.size() == haplotypeAllelesForSample.size()"})
     protected static List<Allele> findEventAllelesInSample( final List<Allele> eventAlleles, final List<Allele> haplotypeAlleles, final List<Allele> haplotypeAllelesForSample, final ArrayList<ArrayList<Haplotype>> alleleMapper, final ArrayList<Haplotype> haplotypes ) {
         if( haplotypeAllelesForSample.contains(Allele.NO_CALL) ) { return noCall; }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 543987e74..304f8d5cb 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -196,8 +196,8 @@ public class LikelihoodCalculationEngine {
     }
 
     // This function takes a set of samples to pool over and a haplotypeMapping
-    @Requires({"haplotypeMapping.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    @Requires({"haplotypeList.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == haplotypeList.size()"})
     public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final List<Haplotype> haplotypeList ) {
 
         final int numHaplotypes = haplotypeList.size();
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
index 33fa49543..4f072d720 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@@ -278,9 +278,10 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
         final int activeRegionStart = refHaplotype.getAlignmentStartHapwrtRef();
         final int activeRegionStop = refHaplotype.getAlignmentStartHapwrtRef() + refHaplotype.getCigar().getReferenceLength();
 
-        for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype
+        // for GGA mode, add the desired allele into the haplotype
+        for( final VariantContext compVC : activeAllelesToGenotype ) {
             for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart());
+                final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
                 if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
                     return returnHaplotypes;
                     //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
@@ -290,15 +291,24 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
 
         for( final DefaultDirectedGraph<DeBruijnVertex, DeBruijnEdge> graph : graphs ) {
             for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
+
                 final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
                 if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
-                    if( !activeAllelesToGenotype.isEmpty() ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
+
+                    // for GGA mode, add the desired allele into the haplotype if it isn't already present
+                    if( !activeAllelesToGenotype.isEmpty() ) {
                         final HashMap<Integer,VariantContext> eventMap = GenotypingEngine.generateVCsFromAlignment( h, h.getAlignmentStartHapwrtRef(), h.getCigar(), fullReferenceWithPadding, h.getBases(), refLoc, "HCassembly" ); // BUGBUG: need to put this function in a shared place
                         for( final VariantContext compVC : activeAllelesToGenotype ) { // for GGA mode, add the desired allele into the haplotype if it isn't already present
                             final VariantContext vcOnHaplotype = eventMap.get(compVC.getStart());
-                            if( vcOnHaplotype == null || !vcOnHaplotype.hasSameAllelesAs(compVC) ) {
+
+                            // This if statement used to additionally have:
+                            //      "|| !vcOnHaplotype.hasSameAllelesAs(compVC)"
+                            //  but that can lead to problems downstream when e.g. you are injecting a 1bp deletion onto
+                            //  a haplotype that already contains a 1bp insertion (so practically it is reference but
+                            //  falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
+                            if( vcOnHaplotype == null ) {
                                 for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
+                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
                                 }
                             }
                         }
@@ -370,7 +380,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
         h.setAlignmentStartHapwrtRef( swConsensus2.getAlignmentStart2wrt1() );
         h.setCigar( AlignmentUtils.leftAlignIndel(swConsensus2.getCigar(), ref, h.getBases(), swConsensus2.getAlignmentStart2wrt1(), 0) );
         if ( haplotype.isArtificialHaplotype() )
-            h.setArtificialAllele(haplotype.getArtificialAllele());
+            h.setArtificialAllele(haplotype.getArtificialAllele(), haplotype.getArtificialAllelePosition());
         h.leftBreakPoint = leftBreakPoint;
         h.rightBreakPoint = rightBreakPoint;
         if( swConsensus2.getCigar().toString().contains("S") || swConsensus2.getCigar().getReferenceLength() != activeRegionStop - activeRegionStart ) { // protect against SW failures
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index 6828dbcb5..a57462d1d 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -29,9 +29,10 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
         HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
     }
 
+    // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "39da622b309597d7a0b082c8aa1748c9");
+        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "f2d0309fdf50d5827e9c60ed0dd07e3f");
     }
 
     private void HCTestComplexVariants(String bam, String args, String md5) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index af4e31698..30fdce75d 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -50,7 +50,8 @@ public class Haplotype {
     public int leftBreakPoint = 0;
     public int rightBreakPoint = 0;
     private Allele artificialAllele = null;
- 
+    private int artificialAllelePosition = -1;
+
     /**
      * Create a simple consensus sequence with provided bases and a uniform quality over all bases of qual
      *
@@ -72,9 +73,10 @@ public class Haplotype {
         this(bases, 0);
     }
 
-    public Haplotype( final byte[] bases, final Allele artificialAllele ) {
+    protected Haplotype( final byte[] bases, final Allele artificialAllele, final int artificialAllelePosition ) {
         this(bases, 0);
         this.artificialAllele = artificialAllele;
+        this.artificialAllelePosition = artificialAllelePosition;
     }
 
     public Haplotype( final byte[] bases, final GenomeLoc loc ) {
@@ -185,12 +187,17 @@ public class Haplotype {
         return artificialAllele;
     }
 
-    public void setArtificialAllele(final Allele artificialAllele) {
+    public int getArtificialAllelePosition() {
+        return artificialAllelePosition;
+    }
+
+    public void setArtificialAllele(final Allele artificialAllele, final int artificialAllelePosition) {
         this.artificialAllele = artificialAllele;
+        this.artificialAllelePosition = artificialAllelePosition;
     }
 
     @Requires({"refInsertLocation >= 0"})
-    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation ) {
+    public Haplotype insertAllele( final Allele refAllele, final Allele altAllele, final int refInsertLocation, final int genomicInsertLocation ) {
         // refInsertLocation is in ref haplotype offset coordinates NOT genomic coordinates
         final int haplotypeInsertLocation = ReadUtils.getReadCoordinateForReferenceCoordinate(alignmentStartHapwrtRef, cigar, refInsertLocation, ReadUtils.ClippingTail.RIGHT_TAIL, true);
         if( haplotypeInsertLocation == -1 || haplotypeInsertLocation + refAllele.length() >= bases.length ) { // desired change falls inside deletion so don't bother creating a new haplotype
@@ -200,7 +207,7 @@ public class Haplotype {
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, 0, haplotypeInsertLocation)); // bases before the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, altAllele.getBases()); // the alt allele of the variant
         newHaplotypeBases = ArrayUtils.addAll(newHaplotypeBases, ArrayUtils.subarray(bases, haplotypeInsertLocation + refAllele.length(), bases.length)); // bases after the variant
-        return new Haplotype(newHaplotypeBases, altAllele);
+        return new Haplotype(newHaplotypeBases, altAllele, genomicInsertLocation);
     }
 
     public static class HaplotypeBaseComparator implements Comparator<Haplotype>, Serializable {
diff --git a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
index ddffb6e4c..13db1d39e 100644
--- a/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/HaplotypeUnitTest.java
@@ -159,7 +159,7 @@ public class HaplotypeUnitTest extends BaseTest {
         final VariantContext vc = new VariantContextBuilder().alleles(alleles).loc("1", loc, loc + h1refAllele.getBases().length - 1).make();
         h.setAlignmentStartHapwrtRef(0);
         h.setCigar(cigar);
-        final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc);
+        final Haplotype h1 = h.insertAllele(vc.getReference(), vc.getAlternateAllele(0), loc, vc.getStart());
         final Haplotype h1expected = new Haplotype(newHap.getBytes());
         Assert.assertEquals(h1, h1expected);
     }

From cc7680e6010ab184752c9f4c20b96e055178478c Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sun, 4 Nov 2012 14:40:17 -0800
Subject: [PATCH 100/236] NA12878 knowledge base backed by MongoDB

-- Idea is simply to create a persistent database of all TP/FP sites on chr20 in NA12878.  Individual callsets can be imported, and a consensus algorithm is run over all callsets in the database to create a consensus collection, which can be used to assess NA12878 callsets for GATK and methods development
-- Framework for representing simple VariantContexts and Genotypes in MongoDB, querying for records, and iterating over them in the GATK
-- Not hooked up to Tribble, but could be done reasonably easily now (future TODO)
-- Tools to import callsets, create consensus callsets, import and export reviews
-- Scripts to reset the knowledge base and repopulate it with the standard data files (Eric will expand)
-- Actually scales to all of chr20, includes AssessNA12878 that reads a VCF and itemizes it against the truth data set
-- ImportCallset can load OMNI, HM3, CEU best practices, mills/devine sites and genotypes, properly marking sites as poly/mono/unk as well as TP/FP/UNK based on command line parameters
-- Added shell scripts that start up a local mongo db, that connect to a local or BI hosted mongo for NA12878.db for debugging, and a setupNA12878db script that can load OMNI, HM3, CEU best practices, Mills/Devine into the db and then update the consensus.
-- Reviewed sites can be exported to a VCF, and imported again, as a mechanism to safely store the only non-recoverable data from the Mongo DB.
-- Created a NA12878DBWalker that manages the outer DB interaction, and that all MongoDB interacting walkers inherit from.  Added a NA12878DBArgumentCollection.java consolating all of the common command line arguments (though strictly not necessary as all of this occurs in the root walker)

UnitTests
-- Can connect to a test knowledge base for development and unit testing
-- PolymorphicStatus, TruthStatus, SiteIterator
-- NA12878KBUnitTestBase provides simple utilities for connecting to the test mongo db, getting calls, etc
-- MongoVariantContext tests creation, matching, and encoding -> writing -> read -> decoding from the mongodb

AssessNA12878
-- Generic tool for comparing a NA12878 callset against the knowledge base.  See http://gatkforums.broadinstitute.org/discussion/1848/using-the-na12878-knowledge-base for detailed documentation
-- Performs trivial filtering on FS, MQ, QD for SNPs and non-SNPs to separate out variants likely to be filtered from those that are honest-to-goodness FPs

Misc
-- Ability to provide Description for Simplified GATK report
---
 .../sting/gatk/report/GATKReport.java         | 13 ++++++-
 .../broadinstitute/sting/utils/GenomeLoc.java | 14 ++++++++
 .../org/broadinstitute/sting/utils/Utils.java |  4 +++
 .../sting/utils/codecs/vcf/VCFUtils.java      | 34 +++++++++++++++++++
 4 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 47bc48f81..6685ee12a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -271,7 +271,18 @@ public class GATKReport {
      * @return a simplified GATK report
      */
     public static GATKReport newSimpleReport(final String tableName, final String... columns) {
-        GATKReportTable table = new GATKReportTable(tableName, "A simplified GATK table report", columns.length);
+        return newSimpleReportWithDescription(tableName, "A simplified GATK table report", columns);
+    }
+
+    /**
+     * @see #newSimpleReport(String, String...) but with a customized description
+     * @param tableName
+     * @param desc
+     * @param columns
+     * @return
+     */
+    public static GATKReport newSimpleReportWithDescription(final String tableName, final String desc, final String... columns) {
+        GATKReportTable table = new GATKReportTable(tableName, desc, columns.length);
 
         for (String column : columns) {
             table.addColumn(column, "");
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index 6df9c9f1d..4d2c26a79 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -315,6 +315,20 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
         return ( comparison == -1 || ( comparison == 0 && this.getStop() < that.getStart() ));        
     }
 
+    /**
+     * Tests whether this genome loc starts at the same position as that.
+     *
+     * i.e., do this and that have the same contig and the same start position
+     *
+     * @param that genome loc to compare to
+     * @return true if this and that have the same contig and the same start position
+     */
+    @Requires("that != null")
+    public final boolean startsAt( GenomeLoc that ) {
+        int comparison = this.compareContigs(that);
+        return comparison == 0 && this.getStart() == that.getStart();
+    }
+
     /**
      * Tests whether any portion of this contig is before that contig.
      * @param that Other contig to test.
diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index b780d0966..1d12d6f8b 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -293,6 +293,10 @@ public class Utils {
         }
     }
 
+    public static <T> String join(final String separator, final T ... objects) {
+        return join(separator, Arrays.asList(objects));
+    }
+
     public static String dupString(char c, int nCopies) {
         char[] chars = new char[nCopies];
         Arrays.fill(chars, c);
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
index be87e7306..a8aefb703 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFUtils.java
@@ -30,12 +30,17 @@ import net.sf.samtools.SAMSequenceRecord;
 import org.apache.commons.io.FilenameUtils;
 import org.apache.log4j.Logger;
 import org.broad.tribble.Feature;
+import org.broad.tribble.FeatureCodecHeader;
+import org.broad.tribble.readers.PositionalBufferedStream;
 import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
 import java.util.*;
 
 /**
@@ -317,4 +322,33 @@ public class VCFUtils {
             assembly = "hg19";
         return assembly;
     }
+
+    /**
+     * Read all of the VCF records from source into memory, returning the header and the VariantContexts
+     *
+     * @param source the file to read, must be in VCF4 format
+     * @return
+     * @throws IOException
+     */
+    public static Pair<VCFHeader, List<VariantContext>> readVCF(final File source) throws IOException {
+        // read in the features
+        final List<VariantContext> vcs = new ArrayList<VariantContext>();
+        final VCFCodec codec = new VCFCodec();
+        PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(source));
+        FeatureCodecHeader header = codec.readHeader(pbs);
+        pbs.close();
+
+        pbs = new PositionalBufferedStream(new FileInputStream(source));
+        pbs.skip(header.getHeaderEnd());
+
+        final VCFHeader vcfHeader = (VCFHeader)header.getHeaderValue();
+
+        while ( ! pbs.isDone() ) {
+            final VariantContext vc = codec.decode(pbs);
+            if ( vc != null )
+                vcs.add(vc);
+        }
+
+        return new Pair<VCFHeader, List<VariantContext>>(vcfHeader, vcs);
+    }
 }
\ No newline at end of file

From ff87642a91c569982fed062d34cf1bf63caab63b Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 20 Nov 2012 22:29:56 -0500
Subject: [PATCH 101/236] Enable cycle covariate unit tests

---
 .../sting/utils/recalibration/CycleCovariateUnitTest.java       | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
index c3d93b2cb..deb0931d6 100644
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
@@ -24,7 +24,7 @@ public class CycleCovariateUnitTest {
         covariate.initialize(RAC);
     }
 
-    @Test(enabled = false)
+    @Test(enabled = true)
     public void testSimpleCycles() {
         short readLength = 10;
         GATKSAMRecord read = ReadUtils.createRandomRead(readLength);

From 72e2d569c540e758284d9304caf44bf2b2e6ca35 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 20 Nov 2012 22:41:57 -0500
Subject: [PATCH 102/236] The user can now set the maximum allowable cycle on
 the command-line with --maximum_cycle_value.  This value is (now) enforced in
 the Cycle covariate and a User Error is thrown if the maximum value is passed
 (with a helpful error message).  Added unit tests to cover this new
 functionality.

---
 .../bqsr/RecalibrationArgumentCollection.java | 18 ++++++++++---
 .../covariates/CycleCovariate.java            |  7 +++++-
 .../recalibration/CycleCovariateUnitTest.java | 25 ++++++++++++++++++-
 3 files changed, 44 insertions(+), 6 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
index e5704a1e2..c64482151 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/bqsr/RecalibrationArgumentCollection.java
@@ -102,13 +102,10 @@ public class RecalibrationArgumentCollection {
     @Argument(fullName = "no_standard_covs", shortName = "noStandard", doc = "Do not use the standard set of covariates, but rather just the ones listed using the -cov argument", required = false)
     public boolean DO_NOT_USE_STANDARD_COVARIATES = false;
 
-    /////////////////////////////
-    // Debugging-only Arguments
-    /////////////////////////////
     /**
      * This calculation is critically dependent on being able to skip over known polymorphic sites. Please be sure that you know what you are doing if you use this option.
      */
-    @Hidden
+    @Advanced
     @Argument(fullName = "run_without_dbsnp_potentially_ruining_quality", shortName = "run_without_dbsnp_potentially_ruining_quality", required = false, doc = "If specified, allows the recalibrator to be used without a dbsnp rod. Very unsafe and for expert users only.")
     public boolean RUN_WITHOUT_DBSNP = false;
 
@@ -139,6 +136,13 @@ public class RecalibrationArgumentCollection {
     @Argument(fullName = "indels_context_size", shortName = "ics", doc = "size of the k-mer context to be used for base insertions and deletions", required = false)
     public int INDELS_CONTEXT_SIZE = 3;
 
+    /**
+     * The cycle covariate will generate an error if it encounters a cycle greater than this value.
+     * This argument is ignored if the Cycle covariate is not used.
+     */
+    @Argument(fullName = "maximum_cycle_value", shortName = "maxCycle", doc = "the maximum cycle value permitted for the Cycle covariate", required = false)
+    public int MAXIMUM_CYCLE_VALUE = 500;
+
     /**
      * A default base qualities to use as a prior (reported quality) in the mismatch covariate model. This value will replace all base qualities in the read for this default value. Negative value turns it off (default is off)
      */
@@ -176,9 +180,15 @@ public class RecalibrationArgumentCollection {
     @Argument(fullName = "binary_tag_name", shortName = "bintag", required = false, doc = "the binary tag covariate name if using it")
     public String BINARY_TAG_NAME = null;
 
+
+    /////////////////////////////
+    // Debugging-only Arguments
+    /////////////////////////////
+
     @Hidden
     @Argument(fullName = "default_platform", shortName = "dP", required = false, doc = "If a read has no platform then default to the provided String. Valid options are illumina, 454, and solid.")
     public String DEFAULT_PLATFORM = null;
+
     @Hidden
     @Argument(fullName = "force_platform", shortName = "fP", required = false, doc = "If provided, the platform of EVERY read will be forced to be the provided String. Valid options are illumina, 454, and solid.")
     public String FORCE_PLATFORM = null;
diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
index 5d0d94b69..a9b6c7152 100755
--- a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/CycleCovariate.java
@@ -49,7 +49,7 @@ import java.util.EnumSet;
 
 public class CycleCovariate implements StandardCovariate {
 
-    private static final int MAXIMUM_CYCLE_VALUE = 1000;
+    private int MAXIMUM_CYCLE_VALUE;
     private static final int CUSHION_FOR_INDELS = 4;
     private String default_platform = null;
 
@@ -59,6 +59,8 @@ public class CycleCovariate implements StandardCovariate {
     // Initialize any member variables using the command-line arguments passed to the walkers
     @Override
     public void initialize(final RecalibrationArgumentCollection RAC) {
+        this.MAXIMUM_CYCLE_VALUE = RAC.MAXIMUM_CYCLE_VALUE;
+
         if (RAC.DEFAULT_PLATFORM != null && !NGSPlatform.isKnown(RAC.DEFAULT_PLATFORM))
             throw new UserException.CommandLineException("The requested default platform (" + RAC.DEFAULT_PLATFORM + ") is not a recognized platform.");
 
@@ -88,6 +90,9 @@ public class CycleCovariate implements StandardCovariate {
 
             final int MAX_CYCLE_FOR_INDELS = readLength - CUSHION_FOR_INDELS - 1;
             for (int i = 0; i < readLength; i++) {
+                if ( cycle > MAXIMUM_CYCLE_VALUE )
+                    throw new UserException("The maximum allowed value for the cycle is " + MAXIMUM_CYCLE_VALUE + ", but a larger cycle was detected in read " + read.getReadName() + ".  Please use the --maximum_cycle_value argument to increase this value (at the expense of requiring more memory to run)");
+
                 final int substitutionKey = keyFromCycle(cycle);
                 final int indelKey = (i < CUSHION_FOR_INDELS || i > MAX_CYCLE_FOR_INDELS) ? -1 : substitutionKey;
                 values.addCovariate(substitutionKey, indelKey, indelKey, i);
diff --git a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
index deb0931d6..b73b1a311 100644
--- a/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/recalibration/CycleCovariateUnitTest.java
@@ -1,6 +1,7 @@
 package org.broadinstitute.sting.utils.recalibration;
 
 import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.recalibration.covariates.CycleCovariate;
 import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -53,9 +54,31 @@ public class CycleCovariateUnitTest {
         for (short i = 0; i < values.length; i++) {
             short actual = Short.decode(covariate.formatKey(values[i][0]));
             int expected = init + (increment * i);
-            //            System.out.println(String.format("%d: %d, %d", i, actual, expected));
             Assert.assertEquals(actual, expected);
         }
     }
 
+    @Test(enabled = true, expectedExceptions={UserException.class})
+    public void testMoreThanMaxCycleFails() {
+        int readLength = RAC.MAXIMUM_CYCLE_VALUE + 1;
+        GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
+        read.setReadPairedFlag(true);
+        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
+        read.getReadGroup().setPlatform("illumina");
+
+        ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
+        covariate.recordValues(read, readCovariates);
+    }
+
+    @Test(enabled = true)
+    public void testMaxCyclePasses() {
+        int readLength = RAC.MAXIMUM_CYCLE_VALUE;
+        GATKSAMRecord read = ReadUtils.createRandomRead(readLength);
+        read.setReadPairedFlag(true);
+        read.setReadGroup(new GATKSAMReadGroupRecord("MY.ID"));
+        read.getReadGroup().setPlatform("illumina");
+
+        ReadCovariates readCovariates = new ReadCovariates(read.getReadLength(), 1);
+        covariate.recordValues(read, readCovariates);
+    }
 }

From 4714ccc28456625853791d56167f114b74eeba22 Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Wed, 21 Nov 2012 10:47:35 -0500
Subject: [PATCH 104/236] change the way CombineVariants check the priority
 arguments in order to throw error when the genotypeMergeOption argument is
 set to PRIORITIZE but PRIORITY_STRING is not provided

---
 .../walkers/variantutils/CombineVariants.java | 21 +++++++++----------
 .../variantcontext/VariantContextUtils.java   |  5 ++++-
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index b1d8dc91d..e710befc9 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -134,7 +134,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
     protected VariantContextWriter vcfWriter = null;
 
     @Argument(shortName="genotypeMergeOptions", doc="Determines how we should merge genotype records for samples shared across the ROD files", required=false)
-    public VariantContextUtils.GenotypeMergeType genotypeMergeOption = VariantContextUtils.GenotypeMergeType.PRIORITIZE;
+    public VariantContextUtils.GenotypeMergeType genotypeMergeOption = null;
 
     @Argument(shortName="filteredRecordsMergeType", doc="Determines how we should handle records seen at the same site in the VCF, but with different FILTER fields", required=false)
     public VariantContextUtils.FilteredRecordMergeType filteredRecordsMergeType = VariantContextUtils.FilteredRecordMergeType.KEEP_IF_ANY_UNFILTERED;
@@ -200,13 +200,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
         } else
             logger.warn("VCF output file not an instance of VCFWriterStub; cannot enable sites only output option");
 
-        if ( PRIORITY_STRING == null ) {
+        validateAnnotateUnionArguments();
+        if ( PRIORITY_STRING == null && genotypeMergeOption == null) {
             genotypeMergeOption = VariantContextUtils.GenotypeMergeType.UNSORTED;
             //PRIORITY_STRING = Utils.join(",", vcfRods.keySet());  Deleted by Ami (7/10/12)
-            logger.info("Priority string not provided, using arbitrary genotyping order: " + PRIORITY_STRING);
+            logger.info("Priority string not provided, using arbitrary genotyping order: "+priority);
         }
 
-        validateAnnotateUnionArguments();
         samples = sitesOnlyVCF ? Collections.<String>emptySet() : SampleUtils.getSampleList(vcfRods, genotypeMergeOption);
 
         if ( SET_KEY.toLowerCase().equals("null") )
@@ -228,16 +228,15 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
         if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
             throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
 
-        if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE )
+        if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ){
             priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
-        else
-            priority = new ArrayList<String>(rodNames);
+            if ( rodNames.size() != priority.size() )
+                throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
 
-        if ( rodNames.size() != priority.size() )
-            throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
+            if ( ! rodNames.containsAll(priority) )
+                throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
+        }
 
-        if ( ! rodNames.containsAll(priority) )
-            throw new UserException.BadArgumentValue("rod_priority_list", "Not all priority elements provided as input RODs: " + PRIORITY_STRING);
     }
 
     public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 81959c998..614b234e9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -840,8 +840,11 @@ public class VariantContextUtils {
         if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
             throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
 
-        if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
+        if ( mergeOption == GenotypeMergeType.UNSORTED ){
+            if (priorityListOfVCs != null )
+                logger.info("Priority string was provided but is not used since GenotypeMergeType is UNSORTED");
             return new ArrayList<VariantContext>(unsortedVCs);
+        }
         else {
             ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
             Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));

From c8be7c3102d7764b0141665bc64955062da8c00a Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Wed, 21 Nov 2012 15:56:53 -0500
Subject: [PATCH 106/236] Keep SNPs and indels separately for batch merging;
 Add options to DepthOfCoverage to count fragments (to not double-count
 overlapping reads of same fragment); DepthOfCoverage should now support
 ReducedReads; Replace recusrion with loop in DoC/package.scala (for lists
 longer than 5000 elements)

---
 .../gatk/walkers/coverage/CoverageUtils.java  | 114 +++++++++++++++---
 .../walkers/coverage/DepthOfCoverage.java     |   6 +-
 .../pileup/AbstractReadBackedPileup.java      |  56 ++++++++-
 .../sting/utils/pileup/ReadBackedPileup.java  |  17 +++
 .../queue/qscripts/CNV/xhmmCNVpipeline.scala  |  25 +++-
 .../sting/queue/util/DoC/package.scala        |  26 ++--
 .../sting/queue/util/VCF_BAM_utilities.scala  |  27 ++---
 7 files changed, 215 insertions(+), 56 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java
index a41e55166..21532823b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/CoverageUtils.java
@@ -6,11 +6,10 @@ import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.broadinstitute.sting.utils.fragments.FragmentCollection;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 
-import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
+import java.util.*;
 
 /**
  * IF THERE IS NO JAVADOC RIGHT HERE, YELL AT chartl
@@ -20,6 +19,21 @@ import java.util.Map;
  */
 public class CoverageUtils {
 
+    public enum CountPileupType {
+        /**
+         * Count all reads independently (even if from the same fragment).
+         */
+        COUNT_READS,
+        /**
+         * Count all fragments (even if the reads that compose the fragment are not consistent at that base).
+         */
+        COUNT_FRAGMENTS,
+        /**
+         * Count all fragments (but only if the reads that compose the fragment are consistent at that base).
+         */
+        COUNT_FRAGMENTS_REQUIRE_SAME_BASE
+    }
+
     /**
      * Returns the counts of bases from reads with MAPQ > minMapQ and base quality > minBaseQ in the context
      * as an array of ints, indexed by the index fields of BaseUtils
@@ -64,10 +78,10 @@ public class CoverageUtils {
     }
 
     public static Map<DoCOutputType.Partition,Map<String,int[]>>
-                    getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, Collection<DoCOutputType.Partition> types) {
+                    getBaseCountsByPartition(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, CountPileupType countType, Collection<DoCOutputType.Partition> types) {
 
         Map<DoCOutputType.Partition,Map<String,int[]>> countsByIDByType = new HashMap<DoCOutputType.Partition,Map<String,int[]>>();
-        Map<SAMReadGroupRecord,int[]> countsByRG = getBaseCountsByReadGroup(context,minMapQ,maxMapQ,minBaseQ,maxBaseQ);
+        Map<SAMReadGroupRecord,int[]> countsByRG = getBaseCountsByReadGroup(context,minMapQ,maxMapQ,minBaseQ,maxBaseQ,countType);
         for (DoCOutputType.Partition t : types ) {
             // iterate through the read group counts and build the type associations
             for ( Map.Entry<SAMReadGroupRecord,int[]> readGroupCountEntry : countsByRG.entrySet() ) {
@@ -95,31 +109,95 @@ public class CoverageUtils {
         }
     }
 
-    public static Map<SAMReadGroupRecord,int[]> getBaseCountsByReadGroup(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ) {
+    public static Map<SAMReadGroupRecord,int[]> getBaseCountsByReadGroup(AlignmentContext context, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ, CountPileupType countType) {
         Map<SAMReadGroupRecord, int[]> countsByRG = new HashMap<SAMReadGroupRecord,int[]>();
-        for ( PileupElement e : context.getBasePileup() ) {
-            if ( e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ || e.isDeletion() ) ) {
-                SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
-                if ( ! countsByRG.keySet().contains(readGroup) ) {
-                    countsByRG.put(readGroup,new int[6]);
-                    updateCounts(countsByRG.get(readGroup),e);
-                } else {
-                    updateCounts(countsByRG.get(readGroup),e);
+
+        List<PileupElement> countPileup = new LinkedList<PileupElement>();
+        FragmentCollection<PileupElement> fpile;
+
+        switch (countType) {
+
+            case COUNT_READS:
+                for (PileupElement e : context.getBasePileup())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+                break;
+
+            case COUNT_FRAGMENTS: // ignore base identities and put in FIRST base that passes filters:
+                fpile = context.getBasePileup().getStartSortedPileup().toFragments();
+
+                for (PileupElement e : fpile.getSingletonReads())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+
+                for (List<PileupElement> overlappingPair : fpile.getOverlappingPairs()) {
+                    // iterate over all elements in fragment:
+                    for (PileupElement e : overlappingPair) {
+                        if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ)) {
+                            countPileup.add(e); // add the first passing element per fragment
+                            break;
+                        }
+                    }
                 }
-            }
+                break;
+
+            case COUNT_FRAGMENTS_REQUIRE_SAME_BASE:
+                fpile = context.getBasePileup().getStartSortedPileup().toFragments();
+
+                for (PileupElement e : fpile.getSingletonReads())
+                    if (countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                        countPileup.add(e);
+
+                for (List<PileupElement> overlappingPair : fpile.getOverlappingPairs()) {
+                    PileupElement firstElem = null;
+                    PileupElement addElem = null;
+
+                    // iterate over all elements in fragment:
+                    for (PileupElement e : overlappingPair) {
+                        if (firstElem == null)
+                            firstElem = e;
+                        else if (e.getBase() != firstElem.getBase()) {
+                            addElem = null;
+                            break;
+                        }
+
+                        // will add the first passing element per base-consistent fragment:
+                        if (addElem == null && countElement(e, minMapQ, maxMapQ, minBaseQ, maxBaseQ))
+                            addElem = e;
+                    }
+
+                    if (addElem != null)
+                        countPileup.add(addElem);
+                }
+                break;
+
+            default:
+                throw new UserException("Must use valid CountPileupType");
+        }
+
+        for (PileupElement e : countPileup) {
+            SAMReadGroupRecord readGroup = getReadGroup(e.getRead());
+            if (!countsByRG.keySet().contains(readGroup))
+                countsByRG.put(readGroup, new int[6]);
+
+            updateCounts(countsByRG.get(readGroup), e);
         }
 
         return countsByRG;
     }
 
+    private static boolean countElement(PileupElement e, int minMapQ, int maxMapQ, byte minBaseQ, byte maxBaseQ) {
+        return (e.getMappingQual() >= minMapQ && e.getMappingQual() <= maxMapQ && ( e.getQual() >= minBaseQ && e.getQual() <= maxBaseQ || e.isDeletion() ));
+    }
+
     private static void updateCounts(int[] counts, PileupElement e) {
         if ( e.isDeletion() ) {
-            counts[BaseUtils.DELETION_INDEX]++;
+            counts[BaseUtils.DELETION_INDEX] += e.getRepresentativeCount();
         } else if ( BaseUtils.basesAreEqual((byte) 'N', e.getBase()) ) {
-            counts[BaseUtils.NO_CALL_INDEX]++;
+            counts[BaseUtils.NO_CALL_INDEX] += e.getRepresentativeCount();
         } else {
             try {
-                counts[BaseUtils.simpleBaseToBaseIndex(e.getBase())]++;
+                counts[BaseUtils.simpleBaseToBaseIndex(e.getBase())] += e.getRepresentativeCount();
             } catch (ArrayIndexOutOfBoundsException exc) {
                 throw new ReviewedStingException("Expected a simple base, but actually received"+(char)e.getBase());
             }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
index 44b0d74ca..fe9942662 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/coverage/DepthOfCoverage.java
@@ -129,11 +129,15 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
     int minMappingQuality = -1;
     @Argument(fullName = "maxMappingQuality", doc = "Maximum mapping quality of reads to count towards depth. Defaults to 2^31-1 (Integer.MAX_VALUE).", required = false)
     int maxMappingQuality = Integer.MAX_VALUE;
+
     @Argument(fullName = "minBaseQuality", shortName = "mbq", doc = "Minimum quality of bases to count towards depth. Defaults to -1.", required = false)
     byte minBaseQuality = -1;
     @Argument(fullName = "maxBaseQuality", doc = "Maximum quality of bases to count towards depth. Defaults to 127 (Byte.MAX_VALUE).", required = false)
     byte maxBaseQuality = Byte.MAX_VALUE;
 
+    @Argument(fullName = "countType", doc = "How should overlapping reads from the same fragment be handled?", required = false)
+    CoverageUtils.CountPileupType countType = CoverageUtils.CountPileupType.COUNT_READS;
+
     /**
      * Instead of reporting depth, report the base pileup at each locus
      */
@@ -373,7 +377,7 @@ public class DepthOfCoverage extends LocusWalker<Map<DoCOutputType.Partition,Map
                 //System.out.printf("\t[log]\t%s",ref.getLocus());
             }
 
-            return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,partitionTypes);
+            return CoverageUtils.getBaseCountsByPartition(context,minMappingQuality,maxMappingQuality,minBaseQuality,maxBaseQuality,countType,partitionTypes);
         } else {
             return null;
         }
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index ed6fc46bb..d0ae68912 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -254,19 +254,32 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
      * Returns a new ReadBackedPileup where only one read from an overlapping read
      * pair is retained.  If the two reads in question disagree to their basecall,
      * neither read is retained.  If they agree on the base, the read with the higher
-     * quality observation is retained
+     * base quality observation is retained
      *
      * @return the newly filtered pileup
      */
     @Override
-    public RBP getOverlappingFragmentFilteredPileup() {
+    public ReadBackedPileup getOverlappingFragmentFilteredPileup() {
+        return getOverlappingFragmentFilteredPileup(true, true);
+    }
+
+    /**
+     * Returns a new ReadBackedPileup where only one read from an overlapping read
+     * pair is retained.  If discardDiscordant and the two reads in question disagree to their basecall,
+     * neither read is retained.  Otherwise, the read with the higher
+     * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
+     *
+     * @return the newly filtered pileup
+     */
+    @Override
+    public RBP getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual) {
         if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
             PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
             PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
 
             for (final String sample : tracker.getSamples()) {
                 PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
-                AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup();
+                AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements).getOverlappingFragmentFilteredPileup(discardDiscordant, baseQualNotMapQual);
                 filteredTracker.addElements(sample, pileup.pileupElementTracker);
             }
             return (RBP) createNewPileup(loc, filteredTracker);
@@ -284,11 +297,16 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
 
                     // if the reads disagree at this position, throw them both out.  Otherwise
                     // keep the element with the higher quality score
-                    if (existing.getBase() != p.getBase()) {
+                    if (discardDiscordant && existing.getBase() != p.getBase()) {
                         filteredPileup.remove(readName);
                     } else {
-                        if (existing.getQual() < p.getQual()) {
-                            filteredPileup.put(readName, p);
+                        if (baseQualNotMapQual) {
+                            if (existing.getQual() < p.getQual())
+                                filteredPileup.put(readName, p);
+                        }
+                        else {
+                            if (existing.getMappingQual() < p.getMappingQual())
+                                filteredPileup.put(readName, p);
                         }
                     }
                 }
@@ -998,6 +1016,32 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
         return quals2String(getQuals());
     }
 
+    /**
+     * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads.
+     *
+     * @return
+     */
+    @Override
+    public ReadBackedPileup getStartSortedPileup() {
+
+        final TreeSet<PE> sortedElements = new TreeSet<PE>(new Comparator<PE>() {
+            @Override
+            public int compare(PE element1, PE element2) {
+                final int difference = element1.getRead().getAlignmentStart() - element2.getRead().getAlignmentStart();
+                return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
+            }
+        });
+        UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
+        for (PE pile : tracker)
+            sortedElements.add(pile);
+
+        UnifiedPileupElementTracker<PE> sortedTracker = new UnifiedPileupElementTracker<PE>();
+        for (PE pile : sortedElements)
+            sortedTracker.add(pile);
+
+        return (RBP) createNewPileup(this.getLocation(), sortedTracker);
+    }
+
     @Override
     public FragmentCollection<PileupElement> toFragments() {
         return FragmentUtils.create(this);
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
index f15468840..be61bad99 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
@@ -60,6 +60,16 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
      */
     public ReadBackedPileup getOverlappingFragmentFilteredPileup();
 
+    /**
+     * Returns a new ReadBackedPileup where only one read from an overlapping read
+     * pair is retained.  If discardDiscordant and the two reads in question disagree to their basecall,
+     * neither read is retained.  Otherwise, the read with the higher
+     * quality (base or mapping, depending on baseQualNotMapQual) observation is retained
+     *
+     * @return the newly filtered pileup
+     */
+    public ReadBackedPileup getOverlappingFragmentFilteredPileup(boolean discardDiscordant, boolean baseQualNotMapQual);
+
     /**
      * Returns a new ReadBackedPileup that is free of mapping quality zero reads in this pileup.  Note that this
      * does not copy the data, so both ReadBackedPileups should not be changed.  Doesn't make an unnecessary copy
@@ -261,6 +271,13 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
      */
     public byte[] getMappingQuals();
 
+    /**
+     * Returns a new ReadBackedPileup that is sorted by start coordinate of the reads.
+     *
+     * @return
+     */
+    public ReadBackedPileup getStartSortedPileup();
+
     /**
      * Converts this pileup into a FragmentCollection (see FragmentUtils for documentation)
      * @return
diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
index 8db089484..c556913ab 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
@@ -8,6 +8,7 @@ import org.broadinstitute.sting.commandline.Hidden
 import java.io.{PrintStream, PrintWriter}
 import org.broadinstitute.sting.utils.text.XReadLines
 import collection.JavaConversions._
+import org.broadinstitute.sting.gatk.walkers.coverage.CoverageUtils
 
 class xhmmCNVpipeline extends QScript {
   qscript =>
@@ -15,22 +16,22 @@ class xhmmCNVpipeline extends QScript {
   @Input(doc = "bam input, as .bam or as a list of files", shortName = "I", required = true)
   var bams: File = _
 
-  @Argument(doc = "gatk jar file", shortName = "J", required = true)
+  @Input(doc = "gatk jar file", shortName = "J", required = true)
   var gatkJarFile: File = _
 
-  @Argument(doc = "xhmm executable file", shortName = "xhmmExec", required = true)
+  @Input(doc = "xhmm executable file", shortName = "xhmmExec", required = true)
   var xhmmExec: File = _
 
-  @Argument(doc = "Plink/Seq executable file", shortName = "pseqExec", required = true)
+  @Input(doc = "Plink/Seq executable file", shortName = "pseqExec", required = true)
   var pseqExec: File = _
 
   @Argument(doc = "Plink/Seq SEQDB file (Reference genome sequence)", shortName = "SEQDB", required = true)
   var pseqSeqDB: String = _
 
-  @Argument(shortName = "R", doc = "ref", required = true)
+  @Input(shortName = "R", doc = "ref", required = true)
   var referenceFile: File = _
 
-  @Argument(shortName = "L", doc = "Intervals", required = false)
+  @Input(shortName = "L", doc = "Intervals", required = false)
   var intervals: File = _
 
   @Argument(doc = "level of parallelism for BAM DoC.   By default is set to 0 [no scattering].", shortName = "scatter", required = false)
@@ -42,6 +43,15 @@ class xhmmCNVpipeline extends QScript {
   @Output(doc = "Base name for files to output", shortName = "o", required = true)
   var outputBase: File = _
 
+  @Hidden
+  @Argument(doc = "How should overlapping reads from the same fragment be handled?", shortName = "countType", required = false)
+  // TODO: change this to be the default once reads can be ordered properly for FragmentUtils.create():
+  //
+  // Don't want to double-count (but also don't mind counting base-inconsistencies in overlap):
+  //var countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS
+  //
+  var countType = CoverageUtils.CountPileupType.COUNT_READS
+
   @Argument(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
   var MAX_DEPTH = 20000
 
@@ -56,6 +66,9 @@ class xhmmCNVpipeline extends QScript {
   @Argument(doc = "Minimum read mapping quality", shortName = "MMQ", required = false)
   var minMappingQuality = 0
 
+  @Argument(doc = "Minimum base quality to be counted in depth", shortName = "MBQ", required = false)
+  var minBaseQuality = 0
+
   @Argument(doc = "Memory (in GB) required for storing the whole matrix in memory", shortName = "wholeMatrixMemory", required = false)
   var wholeMatrixMemory = -1
 
@@ -159,7 +172,7 @@ class xhmmCNVpipeline extends QScript {
     var docs: List[DoC] = List[DoC]()
     for (group <- groups) {
       Console.out.printf("Group is %s%n", group)
-      docs ::= new DoC(group.bams, group.DoC_output, MAX_DEPTH, minMappingQuality, scatterCountInput, START_BIN, NUM_BINS, Nil) with CommandLineGATKArgs
+      docs ::= new DoC(group.bams, group.DoC_output, countType, MAX_DEPTH, minMappingQuality, minBaseQuality, scatterCountInput, START_BIN, NUM_BINS, Nil) with CommandLineGATKArgs
     }
     addAll(docs)
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala b/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
index f35db4aa3..2b19b0f8e 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/DoC/package.scala
@@ -6,9 +6,10 @@ import org.broadinstitute.sting.queue.function.scattergather.ScatterGatherableFu
 import org.broadinstitute.sting.gatk.downsampling.DownsampleType
 import org.broadinstitute.sting.commandline.{Input, Gather, Output}
 import org.broadinstitute.sting.queue.function.CommandLineFunction
+import org.broadinstitute.sting.gatk.walkers.coverage.CoverageUtils
 
 package object DoC {
-  class DoC(val bams: List[File], val DoC_output: File, val MAX_DEPTH: Int, val minMappingQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int]) extends CommandLineGATK with ScatterGatherableFunction {
+  class DoC(val bams: List[File], val DoC_output: File, val countType: CoverageUtils.CountPileupType, val MAX_DEPTH: Int, val minMappingQuality: Int, val minBaseQuality: Int, val scatterCountInput: Int, val START_BIN: Int, val NUM_BINS: Int, val minCoverageCalcs: Seq[Int]) extends CommandLineGATK with ScatterGatherableFunction {
     val DOC_OUTPUT_SUFFIX: String = ".sample_interval_summary"
 
     // So that the output files of this DoC run get deleted once they're used further downstream:
@@ -32,8 +33,9 @@ package object DoC {
     override def commandLine = super.commandLine +
       " --omitDepthOutputAtEachBase" +
       " --omitLocusTable" +
-      " --minBaseQuality 0" +
       " --minMappingQuality " + minMappingQuality +
+      " --minBaseQuality " + minBaseQuality +
+      optional("--countType", countType, spaceSeparated=true, escape=true, format="%s") +
       " --start " + START_BIN + " --stop " + MAX_DEPTH + " --nBins " + NUM_BINS +
       (if (!minCoverageCalcs.isEmpty) minCoverageCalcs.map(cov => " --summaryCoverageThreshold " + cov).reduceLeft(_ + "" + _) else "") +
       " --includeRefNSites" +
@@ -42,7 +44,7 @@ package object DoC {
     override def shortDescription = "DoC: " + DoC_output
   }
 
-  class DoCwithDepthOutputAtEachBase(bams: List[File], DoC_output: File, MAX_DEPTH: Int, minMappingQuality: Int, scatterCountInput: Int, START_BIN: Int, NUM_BINS: Int, minCoverageCalcs: Seq[Int]) extends DoC(bams, DoC_output, MAX_DEPTH: Int, minMappingQuality, scatterCountInput, START_BIN, NUM_BINS, minCoverageCalcs) {
+  class DoCwithDepthOutputAtEachBase(bams: List[File], DoC_output: File, countType: CoverageUtils.CountPileupType, MAX_DEPTH: Int, minMappingQuality: Int, minBaseQuality: Int, scatterCountInput: Int, START_BIN: Int, NUM_BINS: Int, minCoverageCalcs: Seq[Int]) extends DoC(bams, DoC_output, countType: CoverageUtils.CountPileupType, MAX_DEPTH: Int, minMappingQuality, minBaseQuality, scatterCountInput, START_BIN, NUM_BINS, minCoverageCalcs) {
     // HACK for DoC to work properly within Queue:
     @Output
     @Gather(classOf[org.broadinstitute.sting.queue.function.scattergather.SimpleTextGatherFunction])
@@ -52,15 +54,21 @@ package object DoC {
   }
 
   def buildDoCgroups(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]], samplesPerJob: Int, outputBase: File): List[Group] = {
+    var l: List[Group] = Nil
 
-    def buildDoCgroupsHelper(samples: List[String], count: Int): List[Group] = (samples splitAt samplesPerJob) match {
-      case (Nil, y) =>
-        return Nil
-      case (subsamples, remaining) =>
-        return new Group("group" + count, outputBase, subsamples, VCF_BAM_utilities.findBAMsForSamples(subsamples, sampleToBams)) :: buildDoCgroupsHelper(remaining, count + 1)
+    var remaining = samples
+    var subsamples: List[String] = Nil
+    var count = 1
+
+    while (!remaining.isEmpty) {
+      val splitRes = (remaining splitAt samplesPerJob)
+      subsamples = splitRes._1
+      remaining = splitRes._2
+      l ::= new Group("group" + count, outputBase, subsamples, VCF_BAM_utilities.findBAMsForSamples(subsamples, sampleToBams))
+      count = count + 1
     }
 
-    return buildDoCgroupsHelper(samples, 0)
+    return l
   }
 
   // A group has a list of samples and bam files to use for DoC
diff --git a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala
index 1f18858e1..3fe867981 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/util/VCF_BAM_utilities.scala
@@ -26,36 +26,31 @@ object VCF_BAM_utilities {
     case _ => throw new RuntimeException("Unexpected BAM input type: " + bamsIn + "; only permitted extensions are .bam and .list")
   }
 
-  def getMapOfBAMsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = bams match {
-    case Nil => return scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]]
-
-    case x :: y =>
-      val m: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = getMapOfBAMsForSample(y)
-      val bamSamples: List[String] = getSamplesInBAM(x)
+  def getMapOfBAMsForSample(bams: List[File]): scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]] = {
+    var m = scala.collection.mutable.Map.empty[String, scala.collection.mutable.Set[File]]
 
+    for (bam <- bams) {
+      val bamSamples: List[String] = getSamplesInBAM(bam)
       for (s <- bamSamples) {
         if (!m.contains(s))
           m += s -> scala.collection.mutable.Set.empty[File]
 
-        m(s) = m(s) + x
+        m(s) += bam
       }
+    }
 
       return m
   }
 
   def findBAMsForSamples(samples: List[String], sampleToBams: scala.collection.mutable.Map[String, scala.collection.mutable.Set[File]]): List[File] = {
+    var s = scala.collection.mutable.Set.empty[File]
 
-    def findBAMsForSamplesHelper(samples: List[String]): scala.collection.mutable.Set[File] = samples match {
-      case Nil => scala.collection.mutable.Set.empty[File]
-
-      case x :: y =>
-        var bamsForSampleX: scala.collection.mutable.Set[File] = scala.collection.mutable.Set.empty[File]
-        if (sampleToBams.contains(x))
-          bamsForSampleX = sampleToBams(x)
-        return bamsForSampleX ++ findBAMsForSamplesHelper(y)
+    for (sample <- samples) {
+      if (sampleToBams.contains(sample))
+        s ++= sampleToBams(sample)
     }
 
     val l: List[File] = Nil
-    return l ++ findBAMsForSamplesHelper(samples)
+    return l ++ s
   }
 }

From ed50814ccba7fe72f296c466e0abf64d7923c51d Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 21 Nov 2012 15:57:05 -0500
Subject: [PATCH 107/236] Finally found a case where user errors were being
 masked behind other errors and could debug.  It turns out that the
 checkForMaskedUserErrors() method needs to run recursively over all levels
 (calling exception.getCause()) to check for the original cause.

---
 .../sting/gatk/CommandLineGATK.java           | 21 ++++++++++++-------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
index 0daad2c2b..4f9031329 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@@ -118,17 +118,24 @@ public class CommandLineGATK extends CommandLineExecutable {
     public static final String DISK_QUOTA_EXCEEDED_ERROR = "Disk quota exceeded";
 
     private static void checkForMaskedUserErrors(final Throwable t) {
+        // masked out of memory error
+        if ( t instanceof OutOfMemoryError )
+            exitSystemWithUserError(new UserException.NotEnoughMemory());
+        // masked user error
+        if ( t instanceof UserException || t instanceof TribbleException )
+            exitSystemWithUserError(new UserException(t.getMessage()));
+
+        // no message means no masked error
         final String message = t.getMessage();
         if ( message == null )
             return;
 
-        // we know what to do about the common "Too many open files" error
+        // too many open files error
         if ( message.contains("Too many open files") )
             exitSystemWithUserError(new UserException.TooManyOpenFiles());
 
         // malformed BAM looks like a SAM file
-        if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) ||
-                message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
+        if ( message.contains(PICARD_TEXT_SAM_FILE_ERROR_1) || message.contains(PICARD_TEXT_SAM_FILE_ERROR_2) )
             exitSystemWithSamError(t);
 
         // can't close tribble index when writing
@@ -138,12 +145,10 @@ public class CommandLineGATK extends CommandLineExecutable {
         // disk is full
         if ( message.contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || message.contains(DISK_QUOTA_EXCEEDED_ERROR) )
             exitSystemWithUserError(new UserException.NoSpaceOnDevice());
-        if ( t.getCause() != null && (t.getCause().getMessage().contains(NO_SPACE_LEFT_ON_DEVICE_ERROR) || t.getCause().getMessage().contains(DISK_QUOTA_EXCEEDED_ERROR)) )
-            exitSystemWithUserError(new UserException.NoSpaceOnDevice());
 
-        // masked out of memory error
-        if ( t.getCause() != null && t.getCause() instanceof OutOfMemoryError )
-            exitSystemWithUserError(new UserException.NotEnoughMemory());
+        // masked error wrapped in another one
+        if ( t.getCause() != null )
+            checkForMaskedUserErrors(t.getCause());
     }
 
     /**

From a8c7edca053a5e0fbe360bd9bd3bc09472e9b532 Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Wed, 21 Nov 2012 16:01:10 -0500
Subject: [PATCH 108/236] Fixed fragment handling in DepthOfCoverage

---
 .../sting/queue/qscripts/CNV/xhmmCNVpipeline.scala         | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
index c556913ab..28a2534c0 100644
--- a/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
+++ b/public/scala/qscript/org/broadinstitute/sting/queue/qscripts/CNV/xhmmCNVpipeline.scala
@@ -45,12 +45,7 @@ class xhmmCNVpipeline extends QScript {
 
   @Hidden
   @Argument(doc = "How should overlapping reads from the same fragment be handled?", shortName = "countType", required = false)
-  // TODO: change this to be the default once reads can be ordered properly for FragmentUtils.create():
-  //
-  // Don't want to double-count (but also don't mind counting base-inconsistencies in overlap):
-  //var countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS
-  //
-  var countType = CoverageUtils.CountPileupType.COUNT_READS
+  var countType = CoverageUtils.CountPileupType.COUNT_FRAGMENTS
 
   @Argument(doc = "Maximum depth (before GATK down-sampling kicks in...)", shortName = "MAX_DEPTH", required = false)
   var MAX_DEPTH = 20000

From 4f2229d399948200576695574ef46111f3ed542d Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 21 Nov 2012 16:01:26 -0500
Subject: [PATCH 109/236] As per the TODO message, I removed a check that was
 no longer necessary.  Now ID is an allowable INFO field key.

---
 .../sting/utils/variantcontext/VariantContext.java         | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 27a5b0c24..12f9cb20c 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -184,9 +184,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
     protected CommonInfo commonInfo = null;
     public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
 
-    @Deprecated // ID is no longer stored in the attributes map
-    private final static String ID_KEY = "ID";
-
     public final static Set<String> PASSES_FILTERS = Collections.unmodifiableSet(new LinkedHashSet<String>());
 
     /** The location of this VariantContext */
@@ -287,10 +284,6 @@ public class VariantContext implements Feature { // to enable tribble integratio
 
         this.commonInfo = new CommonInfo(source, log10PError, filters, attributes);
 
-        // todo -- remove me when this check is no longer necessary
-        if ( this.commonInfo.hasAttribute(ID_KEY) )
-            throw new IllegalArgumentException("Trying to create a VariantContext with a ID key.  Please use provided constructor argument ID");
-
         if ( alleles == null ) { throw new IllegalArgumentException("Alleles cannot be null"); }
 
         // we need to make this a LinkedHashSet in case the user prefers a given ordering of alleles

From c08b78274395c916db872417d4f17ebdacd39906 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 19 Nov 2012 14:39:06 -0500
Subject: [PATCH 110/236] Count isActive calls directly

---
 .../sting/gatk/traversals/TraverseActiveRegions.java  |  5 +----
 .../sting/utils/activeregion/ActivityProfile.java     |  5 -----
 .../gatk/traversals/TraverseActiveRegionsTest.java    | 11 ++++++++---
 3 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 4fe83f331..a2c37944a 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -34,9 +34,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
     private final LinkedList<ActiveRegion> workQueue = new LinkedList<ActiveRegion>();
     private final LinkedHashSet<GATKSAMRecord> myReads = new LinkedHashSet<GATKSAMRecord>();
 
-    // package access for unit testing
-    ActivityProfile profile;
-
     @Override
     public String getTraversalUnits() {
         return "active regions";
@@ -56,7 +53,7 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
 
         int minStart = Integer.MAX_VALUE;
         final List<ActiveRegion> activeRegions = new LinkedList<ActiveRegion>();
-        profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
+        ActivityProfile profile = new ActivityProfile(engine.getGenomeLocParser(), walker.hasPresetActiveRegions() );
 
         ReferenceOrderedView referenceOrderedDataView = getReferenceOrderedView(walker, dataProvider, locusView);
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
index 38cfbb38d..e96eb843d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActivityProfile.java
@@ -103,11 +103,6 @@ public class ActivityProfile {
         isActiveList.add(result);
     }
 
-    // for unit testing
-    public List<ActivityProfileResult> getActiveList() {
-        return isActiveList;
-    }
-
     public int size() {
         return isActiveList.size();
     }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index 8740a8b68..edc818aca 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -41,6 +41,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
     private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
         private final double prob;
+        public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
 
         public DummyActiveRegionWalker() {
             this.prob = 1.0;
@@ -48,6 +49,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         @Override
         public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+            isActiveCalls.add(ref.getLocus());
             return new ActivityProfileResult(ref.getLocus(), prob);
         }
 
@@ -71,7 +73,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
     private IndexedFastaSequenceFile reference;
     private GenomeLocParser genomeLocParser;
-    private ActiveRegionWalker<Integer, Integer> walker;
+    private DummyActiveRegionWalker walker;
 
     @BeforeClass
     private void init() throws FileNotFoundException {
@@ -83,18 +85,21 @@ public class TraverseActiveRegionsTest extends BaseTest {
     @Test
     public void testAllIntervalsSeen() throws Exception {
         List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
+        List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
+
         GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
         intervals.add(interval);
 
         LocusShardDataProvider dataProvider = createDataProvider(intervals);
 
         t.traverse(walker, dataProvider, 0);
+        activeIntervals.addAll(walker.isActiveCalls);
 
         boolean allGenomeLocsSeen = true;
         for (GenomeLoc loc : intervals) {
             boolean thisGenomeLocSeen = false;
-            for (ActivityProfileResult active : t.profile.getActiveList()) {
-                if (loc.equals(active.getLoc())) {
+            for (GenomeLoc activeLoc : activeIntervals) {
+                if (loc.equals(activeLoc)) {
                     thisGenomeLocSeen = true;
                     break;
                 }

From e8defcb20dfcc50cec565fbb7fd2e93479162ae5 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 19 Nov 2012 14:44:00 -0500
Subject: [PATCH 111/236] Test multiple bases and intervals

---
 .../traversals/TraverseActiveRegionsTest.java | 65 +++++++++++++------
 1 file changed, 45 insertions(+), 20 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index edc818aca..d61da5a83 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -83,49 +83,74 @@ public class TraverseActiveRegionsTest extends BaseTest {
     }
 
     @Test
-    public void testAllIntervalsSeen() throws Exception {
+    public void testAllBasesSeenSuite() {
         List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
         List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
 
         GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
         intervals.add(interval);
+        testAllBasesSeen(intervals);
 
-        LocusShardDataProvider dataProvider = createDataProvider(intervals);
+        interval = genomeLocParser.createGenomeLoc("1", 10, 20);
+        intervals.add(interval);
+        testAllBasesSeen(intervals);
+    }
 
-        t.traverse(walker, dataProvider, 0);
-        activeIntervals.addAll(walker.isActiveCalls);
+    public void testAllBasesSeen(List<GenomeLoc> intervals) {
+        List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
+        for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
+            t.traverse(walker, dataProvider, 0);
+            activeIntervals.addAll(walker.isActiveCalls);
+        }
 
-        boolean allGenomeLocsSeen = true;
-        for (GenomeLoc loc : intervals) {
-            boolean thisGenomeLocSeen = false;
+        boolean allBasesSeen = true;
+        for (GenomeLoc base : toBases(intervals)) {
+            boolean thisBaseSeen = false;
             for (GenomeLoc activeLoc : activeIntervals) {
-                if (loc.equals(activeLoc)) {
-                    thisGenomeLocSeen = true;
+                if (base.equals(activeLoc)) {
+                    thisBaseSeen = true;
                     break;
                 }
             }
-            if (!thisGenomeLocSeen) {
-                allGenomeLocsSeen = false;
+            if (!thisBaseSeen) {
+                allBasesSeen = false;
                 break;
             }
         }
 
-        Assert.assertTrue(allGenomeLocsSeen, "Some intervals missing from activity profile");
+        Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile");
     }
 
-    private LocusShardDataProvider createDataProvider(List<GenomeLoc> intervals) {
+    private List<GenomeLoc> toBases(List<GenomeLoc> intervals) {
+        List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
+        for (GenomeLoc interval : intervals) {
+            if (interval.size() == 1)
+                bases.add(interval);
+            else {
+                for (int location = interval.getStart(); location <= interval.getStop(); location++) {
+                    bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
+                }
+            }
+        }
+        return bases;
+    }
+
+    private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
         walker = new DummyActiveRegionWalker();
 
-        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
-        Shard shard = new MockLocusShard(genomeLocParser, intervals);
-        WindowMaker windowMaker = new WindowMaker(shard, genomeLocParser,iterator,shard.getGenomeLocs());
-        WindowMaker.WindowMakerIterator window = windowMaker.next();
-
         GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
-        //engine.setReferenceDataSource(reference);
         engine.setGenomeLocParser(genomeLocParser);
         t.initialize(engine);
 
-        return new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>());
+        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
+        Shard shard = new MockLocusShard(genomeLocParser, intervals);
+
+        List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
+        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
+        for (WindowMaker.WindowMakerIterator window : windowMaker) {
+            providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
+        }
+
+        return providers;
     }
 }

From 3fa3b00f4abd3159b67a8013505606bf96db1a38 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 19 Nov 2012 14:45:19 -0500
Subject: [PATCH 112/236] Add ActiveRegion tests and refactor

---
 .../traversals/TraverseActiveRegionsTest.java | 156 +++++++++++++-----
 1 file changed, 115 insertions(+), 41 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index d61da5a83..ce4d400b4 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -28,20 +28,23 @@ import org.testng.annotations.Test;
 
 import java.io.File;
 import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
 
 /**
  * Created with IntelliJ IDEA.
  * User: thibault
  * Date: 11/13/12
  * Time: 2:47 PM
+ *
+ * Test the Active Region Traversal Contract
+ * http://iwww.broadinstitute.org/gsa/wiki/index.php/Active_Region_Traversal_Contract
  */
 public class TraverseActiveRegionsTest extends BaseTest {
 
     private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
         private final double prob;
         public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
+        public List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
 
         public DummyActiveRegionWalker() {
             this.prob = 1.0;
@@ -55,6 +58,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         @Override
         public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
+            mappedActiveRegions.add(activeRegion);
             return 0;
         }
 
@@ -73,7 +77,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
     private IndexedFastaSequenceFile reference;
     private GenomeLocParser genomeLocParser;
-    private DummyActiveRegionWalker walker;
 
     @BeforeClass
     private void init() throws FileNotFoundException {
@@ -83,61 +86,133 @@ public class TraverseActiveRegionsTest extends BaseTest {
     }
 
     @Test
-    public void testAllBasesSeenSuite() {
+    public void testAllBasesSeen() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
         List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
-        List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
 
-        GenomeLoc interval = genomeLocParser.createGenomeLoc("1", 1, 1);
-        intervals.add(interval);
-        testAllBasesSeen(intervals);
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1));
+        List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
+        // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
+        verifyEqualIntervals(intervals, activeIntervals);
 
-        interval = genomeLocParser.createGenomeLoc("1", 10, 20);
-        intervals.add(interval);
-        testAllBasesSeen(intervals);
+        intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
+        activeIntervals = getIsActiveIntervals(walker, intervals);
+        // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
+        verifyEqualIntervals(intervals, activeIntervals);
+
+        // TODO: more tests and edge cases
     }
 
-    public void testAllBasesSeen(List<GenomeLoc> intervals) {
+    private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
         List<GenomeLoc> activeIntervals = new ArrayList<GenomeLoc>();
         for (LocusShardDataProvider dataProvider : createDataProviders(intervals)) {
             t.traverse(walker, dataProvider, 0);
             activeIntervals.addAll(walker.isActiveCalls);
         }
 
-        boolean allBasesSeen = true;
-        for (GenomeLoc base : toBases(intervals)) {
-            boolean thisBaseSeen = false;
-            for (GenomeLoc activeLoc : activeIntervals) {
-                if (base.equals(activeLoc)) {
-                    thisBaseSeen = true;
-                    break;
-                }
-            }
-            if (!thisBaseSeen) {
-                allBasesSeen = false;
-                break;
-            }
-        }
-
-        Assert.assertTrue(allBasesSeen, "Some intervals missing from activity profile");
+        return activeIntervals;
     }
 
-    private List<GenomeLoc> toBases(List<GenomeLoc> intervals) {
-        List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
+    @Test
+    public void testActiveRegionCoverage() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
+        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
+
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        List<ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        verifyActiveRegionCoverage(intervals, activeRegions);
+
+        // TODO: more tests and edge cases
+    }
+
+    private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, List<ActiveRegion> activeRegions) {
+        List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
+        List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
+
         for (GenomeLoc interval : intervals) {
-            if (interval.size() == 1)
-                bases.add(interval);
-            else {
-                for (int location = interval.getStart(); location <= interval.getStop(); location++) {
-                    bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
-                }
-            }
+            intervalStarts.add(interval.getStartLocation());
+            intervalStops.add(interval.getStopLocation());
         }
+
+        Map<GenomeLoc, ActiveRegion> baseRegionMap = new HashMap<GenomeLoc, ActiveRegion>();
+
+        for (ActiveRegion activeRegion : activeRegions) {
+            for (GenomeLoc activeLoc : toSingleBaseLocs(activeRegion.getLocation())) {
+                // Contract: Regions do not overlap
+                Assert.assertFalse(baseRegionMap.containsKey(activeLoc), "Genome location " + activeLoc + " is assigned to more than one region");
+                baseRegionMap.put(activeLoc, activeRegion);
+            }
+
+            GenomeLoc start = activeRegion.getLocation().getStartLocation();
+            if (intervalStarts.contains(start))
+                intervalStarts.remove(start);
+
+            GenomeLoc stop = activeRegion.getLocation().getStopLocation();
+            if (intervalStops.contains(stop))
+                intervalStops.remove(stop);
+        }
+
+        for (GenomeLoc baseLoc : toSingleBaseLocs(intervals)) {
+            // Contract: Each location in the interval(s) is in exactly one region
+            // Contract: The total set of regions exactly matches the analysis interval(s)
+            Assert.assertTrue(baseRegionMap.containsKey(baseLoc), "Genome location " + baseLoc + " is not assigned to any region");
+            baseRegionMap.remove(baseLoc);
+        }
+
+        // Contract: The total set of regions exactly matches the analysis interval(s)
+        Assert.assertEquals(baseRegionMap.size(), 0, "Active regions contain base(s) outside of the given intervals");
+
+        // Contract: All explicit interval boundaries must also be region boundaries
+        Assert.assertEquals(intervalStarts.size(), 0, "Interval start location does not match an active region start location");
+        Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
+    }
+
+    private List<ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
+        for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
+            t.traverse(walker, dataProvider, 0);
+
+        return walker.mappedActiveRegions;
+    }
+
+    private Collection<GenomeLoc> toSingleBaseLocs(GenomeLoc interval) {
+        List<GenomeLoc> bases = new ArrayList<GenomeLoc>();
+        if (interval.size() == 1)
+            bases.add(interval);
+        else {
+            for (int location = interval.getStart(); location <= interval.getStop(); location++)
+                bases.add(genomeLocParser.createGenomeLoc(interval.getContig(), location, location));
+        }
+
         return bases;
     }
 
-    private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
-        walker = new DummyActiveRegionWalker();
+    private Collection<GenomeLoc> toSingleBaseLocs(List<GenomeLoc> intervals) {
+        Set<GenomeLoc> bases = new TreeSet<GenomeLoc>();    // for sorting and uniqueness
+        for (GenomeLoc interval : intervals)
+            bases.addAll(toSingleBaseLocs(interval));
 
+        return bases;
+    }
+
+    private void verifyEqualIntervals(List<GenomeLoc> aIntervals, List<GenomeLoc> bIntervals) {
+        Collection<GenomeLoc> aBases = toSingleBaseLocs(aIntervals);
+        Collection<GenomeLoc> bBases = toSingleBaseLocs(bIntervals);
+
+        Assert.assertTrue(aBases.size() == bBases.size(), "Interval lists have a differing number of bases: " + aBases.size() + " vs. " + bBases.size());
+
+        Iterator<GenomeLoc> aIter = aBases.iterator();
+        Iterator<GenomeLoc> bIter = bBases.iterator();
+        while (aIter.hasNext() && bIter.hasNext()) {
+            GenomeLoc aLoc = aIter.next();
+            GenomeLoc bLoc = bIter.next();
+            Assert.assertTrue(aLoc.equals(bLoc), "Interval locations do not match: " + aLoc + " vs. " + bLoc);
+        }
+    }
+
+    private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
         GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
         engine.setGenomeLocParser(genomeLocParser);
         t.initialize(engine);
@@ -146,8 +221,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         Shard shard = new MockLocusShard(genomeLocParser, intervals);
 
         List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
-        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
-        for (WindowMaker.WindowMakerIterator window : windowMaker) {
+        for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
             providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
         }
 

From 3ad9128800922ab40aeb5504ff5b0e5a6402e591 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Tue, 20 Nov 2012 13:11:24 -0500
Subject: [PATCH 113/236] Add some reads - Move intervals and reads to init -
 Update intervals and reads

---
 .../traversals/TraverseActiveRegionsTest.java | 73 ++++++++++++++-----
 1 file changed, 53 insertions(+), 20 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index ce4d400b4..2780b7421 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -1,9 +1,10 @@
 package org.broadinstitute.sting.gatk.traversals;
 
-import org.testng.Assert;
+import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
+import org.broadinstitute.sting.utils.interval.IntervalUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMRecord;
-import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
@@ -22,6 +23,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
 
@@ -43,8 +45,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
     private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
         private final double prob;
-        public List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
-        public List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
+        protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
+        protected List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
 
         public DummyActiveRegionWalker() {
             this.prob = 1.0;
@@ -76,30 +78,46 @@ public class TraverseActiveRegionsTest extends BaseTest {
     private final TraverseActiveRegions<Integer, Integer> t = new TraverseActiveRegions<Integer, Integer>();
 
     private IndexedFastaSequenceFile reference;
+    private SAMSequenceDictionary dictionary;
     private GenomeLocParser genomeLocParser;
 
+    private List<GenomeLoc> intervals;
+    private List<SAMRecord> reads;
+
     @BeforeClass
     private void init() throws FileNotFoundException {
         reference = new CachingIndexedFastaSequenceFile(new File(hg19Reference));
-        SAMSequenceDictionary dictionary = reference.getSequenceDictionary();
+        dictionary = reference.getSequenceDictionary();
         genomeLocParser = new GenomeLocParser(dictionary);
+
+        intervals = new ArrayList<GenomeLoc>();
+        intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
+        // TODO: this fails!
+        //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
+        intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
+
+        reads = new ArrayList<SAMRecord>();
+        reads.add(buildSAMRecord("overlap_overlapped_equal", "1", 10, 20));
+        reads.add(buildSAMRecord("overlap_overlapped_unequal", "1", 10, 21));
+        reads.add(buildSAMRecord("overlap_boundary_equal", "1", 1990, 2009));
+        reads.add(buildSAMRecord("overlap_boundary_unequal", "1", 1995, 2050));
+        reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
+        reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
+        reads.add(buildSAMRecord("simple", "20", 1000100, 1000150));
     }
 
     @Test
     public void testAllBasesSeen() {
         DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
-        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
 
-        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 1));
         List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
         // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
         verifyEqualIntervals(intervals, activeIntervals);
 
-        intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
-        activeIntervals = getIsActiveIntervals(walker, intervals);
-        // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
-        verifyEqualIntervals(intervals, activeIntervals);
-
         // TODO: more tests and edge cases
     }
 
@@ -116,11 +134,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
     @Test
     public void testActiveRegionCoverage() {
         DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
-        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
-
-        intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
-        intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
-        intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
 
         List<ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
         verifyActiveRegionCoverage(intervals, activeRegions);
@@ -212,17 +225,37 @@ public class TraverseActiveRegionsTest extends BaseTest {
         }
     }
 
+    // copied from LocusViewTemplate
+    protected GATKSAMRecord buildSAMRecord(String readName, String contig, int alignmentStart, int alignmentEnd) {
+        SAMFileHeader header = new SAMFileHeader();
+        header.setSequenceDictionary(dictionary);
+        GATKSAMRecord record = new GATKSAMRecord(header);
+
+        record.setReadName(readName);
+        record.setReferenceIndex(dictionary.getSequenceIndex(contig));
+        record.setAlignmentStart(alignmentStart);
+
+        Cigar cigar = new Cigar();
+        int len = alignmentEnd - alignmentStart + 1;
+        cigar.add(new CigarElement(len, CigarOperator.M));
+        record.setCigar(cigar);
+        record.setReadBases(new byte[len]);
+        record.setBaseQualities(new byte[len]);
+
+        return record;
+    }
+
     private List<LocusShardDataProvider> createDataProviders(List<GenomeLoc> intervals) {
         GenomeAnalysisEngine engine = new GenomeAnalysisEngine();
         engine.setGenomeLocParser(genomeLocParser);
         t.initialize(engine);
 
-        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>());
+        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
         Shard shard = new MockLocusShard(genomeLocParser, intervals);
 
         List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();
         for (WindowMaker.WindowMakerIterator window : new WindowMaker(shard, genomeLocParser, iterator, shard.getGenomeLocs())) {
-            providers.add(new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
+            providers.add(new LocusShardDataProvider(shard, shard.getReadProperties(), genomeLocParser, window.getLocus(), window, reference, new ArrayList<ReferenceOrderedDataSource>()));
         }
 
         return providers;

From c68bc95db6635ccbe90462d18aa5d3a4ae7ace38 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Wed, 21 Nov 2012 16:22:57 -0500
Subject: [PATCH 114/236] Initial read mapping tests

- Failing tests are commented out
---
 .../traversals/TraverseActiveRegionsTest.java | 115 ++++++++++++++++--
 1 file changed, 105 insertions(+), 10 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index 2780b7421..e4c7b2db0 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -46,7 +46,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
     private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
         private final double prob;
         protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
-        protected List<ActiveRegion> mappedActiveRegions = new ArrayList<ActiveRegion>();
+        protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
 
         public DummyActiveRegionWalker() {
             this.prob = 1.0;
@@ -60,7 +60,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         @Override
         public Integer map(ActiveRegion activeRegion, RefMetaDataTracker metaDataTracker) {
-            mappedActiveRegions.add(activeRegion);
+            mappedActiveRegions.put(activeRegion.getLocation(), activeRegion);
             return 0;
         }
 
@@ -101,13 +101,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
         intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
 
         reads = new ArrayList<SAMRecord>();
-        reads.add(buildSAMRecord("overlap_overlapped_equal", "1", 10, 20));
-        reads.add(buildSAMRecord("overlap_overlapped_unequal", "1", 10, 21));
-        reads.add(buildSAMRecord("overlap_boundary_equal", "1", 1990, 2009));
-        reads.add(buildSAMRecord("overlap_boundary_unequal", "1", 1995, 2050));
+        reads.add(buildSAMRecord("simple", "1", 100, 200));
+        reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
+        reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
+        reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
+        reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
         reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
-        reads.add(buildSAMRecord("simple", "20", 1000100, 1000150));
+        reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
+        // TODO
+        //reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
     }
 
     @Test
@@ -135,13 +138,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
     public void testActiveRegionCoverage() {
         DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
 
-        List<ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
         verifyActiveRegionCoverage(intervals, activeRegions);
 
         // TODO: more tests and edge cases
     }
 
-    private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, List<ActiveRegion> activeRegions) {
+    private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
         List<GenomeLoc> intervalStarts = new ArrayList<GenomeLoc>();
         List<GenomeLoc> intervalStops = new ArrayList<GenomeLoc>();
 
@@ -183,7 +186,99 @@ public class TraverseActiveRegionsTest extends BaseTest {
         Assert.assertEquals(intervalStops.size(), 0, "Interval stop location does not match an active region stop location");
     }
 
-    private List<ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
+    @Test
+    public void testReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
+        // Contract: Each read has the Non-Primary state in all other regions it overlaps
+        // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
+
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // extended_only: Extended in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+
+        // TODO
+        // simple20: Primary in 20:10000-20000
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        verifyReadPrimary(region, "simple");
+        verifyReadPrimary(region, "overlap_equal");
+        verifyReadPrimary(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        // TODO: fail verifyReadPrimary(region, "boundary_equal");
+        // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        // TODO: fail verifyReadPrimary(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
+        verifyReadPrimary(region, "boundary_unequal");
+        // TODO: fail verifyReadExtended(region, "extended_only");
+        // TODO: fail verifyReadExtended(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+
+        // TODO: more tests and edge cases
+    }
+
+    private void verifyReadPrimary(ActiveRegion region, String readName) {
+        SAMRecord read = getRead(region, readName);
+        Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
+    }
+
+    private void verifyReadNonPrimary(ActiveRegion region, String readName) {
+        SAMRecord read = getRead(region, readName);
+        Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
+    }
+
+    private void verifyReadExtended(ActiveRegion region, String readName) {
+        Assert.fail("The Extended read state has not been implemented");
+    }
+
+    private void verifyReadNotPlaced(ActiveRegion region, String readName) {
+        for (SAMRecord read : region.getReads()) {
+            if (read.getReadName().equals(readName))
+                Assert.fail("Read " + readName + " found in active region " + region);
+        }
+    }
+
+    private SAMRecord getRead(ActiveRegion region, String readName) {
+        for (SAMRecord read : region.getReads()) {
+            if (read.getReadName().equals(readName))
+                return read;
+        }
+
+        Assert.fail("Read " + readName + " not found in active region " + region);
+        return null;
+    }
+
+    private Map<GenomeLoc, ActiveRegion> getActiveRegions(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
         for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
             t.traverse(walker, dataProvider, 0);
 

From 48f271c5bd825dc0f57249dc6164ec4bc3c35541 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 21 Nov 2012 17:23:41 -0500
Subject: [PATCH 117/236] Adding 80% support for multi-allelic variants

-- Multi-allelic variants are split into their bi-allelic version, trimmed, and we attempt to provide a meaningful genotype for NA12878 here.  It's not perfect and needs some discussion on how to handle het/alt variants
-- Adding splitInBiallelic funtion to VariantContextUtils as well as extensive unit tests that also indirectly test reverseTrimAlleles (which worked perfectly FYI)
---
 .../variantcontext/VariantContextUtils.java   |  34 +++++
 .../VariantContextTestProvider.java           |   2 +-
 .../VariantContextUtilsUnitTest.java          | 119 +++++++++++++++++-
 3 files changed, 150 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 81959c998..1f1867f75 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -979,6 +979,40 @@ public class VariantContextUtils {
     private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
     public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
 
+    /**
+     * Split variant context into its biallelic components if there are more than 2 alleles
+     *
+     * For VC has A/B/C alleles, returns A/B and A/C contexts.
+     * Genotypes are all no-calls now (it's not possible to fix them easily)
+     * Alleles are right trimmed to satisfy VCF conventions
+     *
+     * If vc is biallelic or non-variant it is just returned
+     *
+     * Chromosome counts are updated (but they are by definition 0)
+     *
+     * @param vc a potentially multi-allelic variant context
+     * @return a list of bi-allelic (or monomorphic) variant context
+     */
+    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
+        if ( ! vc.isVariant() || vc.isBiallelic() )
+            // non variant or biallelics already satisfy the contract
+            return Collections.singletonList(vc);
+        else {
+            final List<VariantContext> biallelics = new LinkedList<VariantContext>();
+
+            for ( final Allele alt : vc.getAlternateAlleles() ) {
+                VariantContextBuilder builder = new VariantContextBuilder(vc);
+                final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
+                builder.alleles(alleles);
+                builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
+                calculateChromosomeCounts(builder, true);
+                biallelics.add(reverseTrimAlleles(builder.make()));
+            }
+
+            return biallelics;
+        }
+    }
+
     /**
      * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
      *
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
index 6785fa816..c57b2a44d 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java
@@ -782,7 +782,7 @@ public class VariantContextTestProvider {
         Assert.assertEquals(actual.getStart(), expected.getStart(), "start");
         Assert.assertEquals(actual.getEnd(), expected.getEnd(), "end");
         Assert.assertEquals(actual.getID(), expected.getID(), "id");
-        Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles");
+        Assert.assertEquals(actual.getAlleles(), expected.getAlleles(), "alleles for " + expected + " vs " + actual);
 
         assertAttributesEquals(actual.getAttributes(), expected.getAttributes());
         Assert.assertEquals(actual.filtersWereApplied(), expected.filtersWereApplied(), "filtersWereApplied");
diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
index 114104d42..f3daa9e4c 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
@@ -26,7 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.testng.Assert;
@@ -39,7 +39,7 @@ import java.io.FileNotFoundException;
 import java.util.*;
 
 public class VariantContextUtilsUnitTest extends BaseTest {
-    Allele Aref, T, C, Cref, ATC, ATCATC;
+    Allele Aref, T, C, G, Cref, ATC, ATCATC;
     private GenomeLocParser genomeLocParser;
 
     @BeforeSuite
@@ -58,6 +58,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
         Cref = Allele.create("C", true);
         T = Allele.create("T");
         C = Allele.create("C");
+        G = Allele.create("G");
         ATC = Allele.create("ATC");
         ATCATC = Allele.create("ATCATC");
     }
@@ -697,10 +698,120 @@ public class VariantContextUtilsUnitTest extends BaseTest {
         return ReverseClippingPositionTestProvider.getTests(ReverseClippingPositionTestProvider.class);
     }
 
-
     @Test(dataProvider = "ReverseClippingPositionTestProvider")
     public void testReverseClippingPositionTestProvider(ReverseClippingPositionTestProvider cfg) {
         int result = VariantContextUtils.computeReverseClipping(cfg.alleles, cfg.ref.getBytes(), 0, false);
         Assert.assertEquals(result, cfg.expectedClip);
     }
-}
+
+    // --------------------------------------------------------------------------------
+    //
+    // test splitting into bi-allelics
+    //
+    // --------------------------------------------------------------------------------
+
+    @DataProvider(name = "SplitBiallelics")
+    public Object[][] makeSplitBiallelics() throws CloneNotSupportedException {
+        List<Object[]> tests = new ArrayList<Object[]>();
+
+        final VariantContextBuilder root = new VariantContextBuilder("x", "20", 10, 10, Arrays.asList(Aref, C));
+
+        // biallelic -> biallelic
+        tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
+
+        // monos -> monos
+        root.alleles(Arrays.asList(Aref));
+        tests.add(new Object[]{root.make(), Arrays.asList(root.make())});
+
+        root.alleles(Arrays.asList(Aref, C, T));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Aref, C)).make(),
+                        root.alleles(Arrays.asList(Aref, T)).make())});
+
+        root.alleles(Arrays.asList(Aref, C, T, G));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Aref, C)).make(),
+                        root.alleles(Arrays.asList(Aref, T)).make(),
+                        root.alleles(Arrays.asList(Aref, G)).make())});
+
+        final Allele C      = Allele.create("C");
+        final Allele CA      = Allele.create("CA");
+        final Allele CAA     = Allele.create("CAA");
+        final Allele CAAAA   = Allele.create("CAAAA");
+        final Allele CAAAAA  = Allele.create("CAAAAA");
+        final Allele Cref      = Allele.create("C", true);
+        final Allele CAref     = Allele.create("CA", true);
+        final Allele CAAref    = Allele.create("CAA", true);
+        final Allele CAAAref   = Allele.create("CAAA", true);
+
+        root.alleles(Arrays.asList(Cref, CA, CAA));
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Cref, CA)).make(),
+                        root.alleles(Arrays.asList(Cref, CAA)).make())});
+
+        root.alleles(Arrays.asList(CAAref, C, CA)).stop(12);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(CAAref, C)).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
+
+        root.alleles(Arrays.asList(CAAAref, C, CA, CAA)).stop(13);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(CAAAref, C)).make(),
+                        root.alleles(Arrays.asList(CAAref, C)).stop(12).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make())});
+
+        root.alleles(Arrays.asList(CAAAref, CAAAAA, CAAAA, CAA, C)).stop(13);
+        tests.add(new Object[]{root.make(),
+                Arrays.asList(
+                        root.alleles(Arrays.asList(Cref, CAA)).stop(10).make(),
+                        root.alleles(Arrays.asList(Cref, CA)).stop(10).make(),
+                        root.alleles(Arrays.asList(CAref, C)).stop(11).make(),
+                        root.alleles(Arrays.asList(CAAAref, C)).stop(13).make())});
+
+        return tests.toArray(new Object[][]{});
+    }
+
+    @Test(dataProvider = "SplitBiallelics")
+    public void testSplitBiallelicsNoGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
+        final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vc);
+        Assert.assertEquals(biallelics.size(), expectedBiallelics.size());
+        for ( int i = 0; i < biallelics.size(); i++ ) {
+            final VariantContext actual = biallelics.get(i);
+            final VariantContext expected = expectedBiallelics.get(i);
+            VariantContextTestProvider.assertEquals(actual, expected);
+        }
+    }
+
+    @Test(dataProvider = "SplitBiallelics", dependsOnMethods = "testSplitBiallelicsNoGenotypes")
+    public void testSplitBiallelicsGenotypes(final VariantContext vc, final List<VariantContext> expectedBiallelics) {
+        final List<Genotype> genotypes = new ArrayList<Genotype>();
+
+        int sampleI = 0;
+        for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
+            genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
+        }
+        genotypes.add(GenotypeBuilder.createMissing("missing", 2));
+
+        final VariantContext vcWithGenotypes = new VariantContextBuilder(vc).genotypes(genotypes).make();
+
+        final List<VariantContext> biallelics = VariantContextUtils.splitVariantContextToBiallelics(vcWithGenotypes);
+        for ( int i = 0; i < biallelics.size(); i++ ) {
+            final VariantContext actual = biallelics.get(i);
+            Assert.assertEquals(actual.getNSamples(), vcWithGenotypes.getNSamples()); // not dropping any samples
+
+            for ( final Genotype inputGenotype : genotypes ) {
+                final Genotype actualGenotype = actual.getGenotype(inputGenotype.getSampleName());
+                Assert.assertNotNull(actualGenotype);
+                if ( ! vc.isVariant() || vc.isBiallelic() )
+                    Assert.assertEquals(actualGenotype, vcWithGenotypes.getGenotype(inputGenotype.getSampleName()));
+                else
+                    Assert.assertTrue(actualGenotype.isNoCall());
+            }
+        }
+    }
+}
\ No newline at end of file

From 2306518ab6be1c323b46d41b33d481cc1bb65197 Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Thu, 22 Nov 2012 01:45:18 -0500
Subject: [PATCH 118/236] Fix to deal with 'proper' options of casting

---
 .../pileup/AbstractReadBackedPileup.java      | 22 ++++++++++++++-----
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index d0ae68912..42938d2a6 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -1022,7 +1022,7 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
      * @return
      */
     @Override
-    public ReadBackedPileup getStartSortedPileup() {
+    public RBP getStartSortedPileup() {
 
         final TreeSet<PE> sortedElements = new TreeSet<PE>(new Comparator<PE>() {
             @Override
@@ -1031,15 +1031,27 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
                 return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
             }
         });
-        UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
-        for (PE pile : tracker)
-            sortedElements.add(pile);
+
+        if (pileupElementTracker instanceof PerSamplePileupElementTracker) {
+            PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
+
+            for (final String sample : tracker.getSamples()) {
+                PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
+                for (PE pile : perSampleElements)
+                    sortedElements.add(pile);
+            }
+        }
+        else {
+            UnifiedPileupElementTracker<PE> tracker = (UnifiedPileupElementTracker<PE>) pileupElementTracker;
+            for (PE pile : tracker)
+                sortedElements.add(pile);
+        }
 
         UnifiedPileupElementTracker<PE> sortedTracker = new UnifiedPileupElementTracker<PE>();
         for (PE pile : sortedElements)
             sortedTracker.add(pile);
 
-        return (RBP) createNewPileup(this.getLocation(), sortedTracker);
+        return (RBP) createNewPileup(loc, sortedTracker);
     }
 
     @Override

From 9719ba7adce0574a48281d6baac7db18ff3208fa Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 22 Nov 2012 21:53:42 -0500
Subject: [PATCH 119/236] Remove -number example from the docs since it's no
 longer supported.

---
 .../sting/gatk/walkers/variantutils/SelectVariants.java   | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
index d28fe34d6..9253446c8 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@@ -151,14 +151,6 @@ import java.util.*;
  *   -mvq 50 \
  *   -o violations.vcf
  *
- * Creating a sample of exactly 1000 variants randomly chosen with equal probability from the variant VCF:
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -R ref.fasta \
- *   -T SelectVariants \
- *   --variant input.vcf \
- *   -o output.vcf \
- *   -number 1000
- *
  * Creating a set with 50% of the total number of variants in the variant VCF:
  * java -Xmx2g -jar GenomeAnalysisTK.jar \
  *   -R ref.fasta \

From a3f59325016b9ffb78f71941aa9fd54bb9e4b3d6 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Mon, 26 Nov 2012 11:12:27 -0500
Subject: [PATCH 122/236] Fixed null pointer exception in Integration Tests

When running Utils.setupWriter with NO_PG_TAG set, the writer was attempting to create a program record with the null pointer. Fixed.
---
 public/java/src/org/broadinstitute/sting/utils/Utils.java | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/Utils.java b/public/java/src/org/broadinstitute/sting/utils/Utils.java
index b780d0966..544030f73 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Utils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Utils.java
@@ -701,11 +701,13 @@ public class Utils {
         List<SAMProgramRecord> oldRecords = header.getProgramRecords();
         List<SAMProgramRecord> newRecords = new ArrayList<SAMProgramRecord>(oldRecords.size()+1);
         for ( SAMProgramRecord record : oldRecords )
-            if ( !record.getId().startsWith(programRecord.getId()) || KEEP_ALL_PG_RECORDS )
+            if ( (programRecord != null && !record.getId().startsWith(programRecord.getId())) || KEEP_ALL_PG_RECORDS )
                 newRecords.add(record);
 
-        newRecords.add(programRecord);
-        header.setProgramRecords(newRecords);
+        if (programRecord != null) {
+            newRecords.add(programRecord);
+            header.setProgramRecords(newRecords);
+        }
         return header;
     }
 

From 4f7fa3009a4adb8617501f35d61730f62b6ca0b1 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 26 Nov 2012 11:34:59 -0500
Subject: [PATCH 123/236] I forget why I thought that the VariantAnnotator
 couldn't run multi-threaded because it works just fine.  Now you can specify
 -nt with VA.

---
 .../walkers/annotator/VariantAnnotator.java   | 28 +++++++------------
 .../VariantAnnotatorIntegrationTest.java      |  8 +++++-
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
index c4de9ed45..92060b4a3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@@ -82,7 +82,7 @@ import java.util.*;
 @Allows(value={DataSource.READS, DataSource.REFERENCE})
 @Reference(window=@Window(start=-50,stop=50))
 @By(DataSource.REFERENCE)
-public class VariantAnnotator extends RodWalker<Integer, Integer> implements AnnotatorCompatible {
+public class VariantAnnotator extends RodWalker<Integer, Integer> implements AnnotatorCompatible, TreeReducible<Integer> {
 
     @ArgumentCollection
     protected StandardVariantContextInputArgumentCollection variantCollection = new StandardVariantContextInputArgumentCollection();
@@ -275,14 +275,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
         return true;
     }
 
-    /**
-     * Initialize the number of loci processed to zero.
-     *
-     * @return 0
-     */
-    public Integer reduceInit() { return 0; }
-
-
     /**
      * We want reads that span deletions
      *
@@ -323,15 +315,15 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
         return 1;
     }
 
-    /**
-     * Increment the number of loci processed.
-     *
-     * @param value result of the map.
-     * @param sum   accumulator for the reduce.
-     * @return the new number of loci processed.
-     */
-    public Integer reduce(Integer value, Integer sum) {
-        return sum + value;
+    @Override
+    public Integer reduceInit() { return 0; }
+
+    @Override
+    public Integer reduce(Integer value, Integer sum) { return value + sum; }
+
+    @Override
+    public Integer treeReduce(Integer lhs, Integer rhs) {
+        return lhs + rhs;
     }
 
     /**
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index 01dff0089..b097e3d34 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -151,7 +151,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
     }
 
     @Test
-    public void testTabixAnnotations() {
+    public void testTabixAnnotationsAndParallelism() {
         final String MD5 = "99938d1e197b8f10c408cac490a00a62";
         for ( String file : Arrays.asList("CEU.exon.2010_03.sites.vcf", "CEU.exon.2010_03.sites.vcf.gz")) {
             WalkerTestSpec spec = new WalkerTestSpec(
@@ -159,6 +159,12 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
                     Arrays.asList(MD5));
             executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
         }
+
+        WalkerTestSpec spec = new WalkerTestSpec(
+                baseTestString() + " -A HomopolymerRun -nt 2 --variant:vcf " + validationDataLocation + "CEU.exon.2010_03.sites.vcf -L " + validationDataLocation + "CEU.exon.2010_03.sites.vcf --no_cmdline_in_header", 1,
+                Arrays.asList(MD5));
+
+        executeTest("Testing lookup vcf tabix vs. vcf tribble plus parallelism", spec);
     }
 
     @Test

From c3b7dd1374ece9cb8bffedd6518f5d6d9582eec0 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 26 Nov 2012 12:19:11 -0500
Subject: [PATCH 124/236] Misc cleanup in the HaplotypeCaller. Cleaning up
 unused arguments after recent changes to HC-GenotypingEngine

---
 .../haplotypecaller/GenotypingEngine.java     |  4 +-
 .../haplotypecaller/HaplotypeCaller.java      | 66 ++-----------------
 .../LikelihoodCalculationEngine.java          |  1 -
 .../HaplotypeCallerIntegrationTest.java       | 17 ++---
 .../annotator/MappingQualityRankSumTest.java  |  5 +-
 .../gatk/walkers/annotator/RankSumTest.java   |  2 +-
 .../walkers/annotator/ReadPosRankSumTest.java |  2 +-
 7 files changed, 21 insertions(+), 76 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index beec8a92e..4fc2dc8f7 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -41,13 +41,11 @@ import java.util.*;
 public class GenotypingEngine {
 
     private final boolean DEBUG;
-    private final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
     private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
     private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
 
-    public GenotypingEngine( final boolean DEBUG, final boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE ) {
+    public GenotypingEngine( final boolean DEBUG ) {
         this.DEBUG = DEBUG;
-        this.OUTPUT_FULL_HAPLOTYPE_SEQUENCE = OUTPUT_FULL_HAPLOTYPE_SEQUENCE;
         noCall.add(Allele.NO_CALL);
     }
 
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 2b739a321..24b3309f1 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -131,14 +131,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
     @Argument(fullName="minPruning", shortName="minPruning", doc = "The minimum allowed pruning factor in assembly graph. Paths with <= X supporting kmers are pruned from the graph", required = false)
     protected int MIN_PRUNE_FACTOR = 1;
 
-    @Advanced
-    @Argument(fullName="genotypeFullActiveRegion", shortName="genotypeFullActiveRegion", doc = "If specified, alternate alleles are considered to be the full active region for the purposes of genotyping", required = false)
-    protected boolean GENOTYPE_FULL_ACTIVE_REGION = false;
-
-    @Advanced
-    @Argument(fullName="fullHaplotype", shortName="fullHaplotype", doc = "If specified, output the full haplotype sequence instead of converting to individual variants w.r.t. the reference", required = false)
-    protected boolean OUTPUT_FULL_HAPLOTYPE_SEQUENCE = false;
-
     @Advanced
     @Argument(fullName="gcpHMM", shortName="gcpHMM", doc="Flat gap continuation penalty for use in the Pair HMM", required = false)
     protected int gcpHMM = 10;
@@ -248,10 +240,11 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
 
         // create a UAC but with the exactCallsLog = null, so we only output the log for the HC caller itself, if requested
         UnifiedArgumentCollection simpleUAC = new UnifiedArgumentCollection(UAC);
-        simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY; // low values used for isActive determination only, default/user-specified values used for actual calling
-        simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING );
-        simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.max( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING );
+        simpleUAC.OutputMode = UnifiedGenotyperEngine.OUTPUT_MODE.EMIT_VARIANTS_ONLY;
+        simpleUAC.GenotypingMode = GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.DISCOVERY;
+        simpleUAC.STANDARD_CONFIDENCE_FOR_CALLING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_CALLING ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.STANDARD_CONFIDENCE_FOR_EMITTING = Math.min( 4.0, UAC.STANDARD_CONFIDENCE_FOR_EMITTING ); // low values used for isActive determination only, default/user-specified values used for actual calling
+        simpleUAC.CONTAMINATION_FRACTION = 0.0;
         simpleUAC.exactCallsLog = null;
         UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
 
@@ -273,15 +266,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
                 VCFConstants.GENOTYPE_QUALITY_KEY,
                 VCFConstants.DEPTH_KEY,
                 VCFConstants.GENOTYPE_PL_KEY);
-        // header lines for the experimental HaplotypeCaller-specific annotations
-        headerInfo.add(new VCFInfoHeaderLine("NVH", 1, VCFHeaderLineType.Integer, "Number of variants found on the haplotype that contained this variant"));
-        headerInfo.add(new VCFInfoHeaderLine("NumHapEval", 1, VCFHeaderLineType.Integer, "Number of haplotypes that were chosen for evaluation in this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("NumHapAssembly", 1, VCFHeaderLineType.Integer, "Number of haplotypes created during the assembly of this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("ActiveRegionSize", 1, VCFHeaderLineType.Integer, "Number of base pairs that comprise this active region"));
-        headerInfo.add(new VCFInfoHeaderLine("EVENTLENGTH", 1, VCFHeaderLineType.Integer, "Max length of all the alternate alleles"));
-        headerInfo.add(new VCFInfoHeaderLine("TYPE", 1, VCFHeaderLineType.String, "Type of event: SNP or INDEL"));
-        headerInfo.add(new VCFInfoHeaderLine("extType", 1, VCFHeaderLineType.String, "Extended type of event: SNP, MNP, INDEL, or COMPLEX"));
-        headerInfo.add(new VCFInfoHeaderLine("QDE", 1, VCFHeaderLineType.Float, "QD value divided by the number of variants found on the haplotype that contained this variant"));
 
         // FILTER fields are added unconditionally as it's not always 100% certain the circumstances
         // where the filters are used.  For example, in emitting all sites the lowQual field is used
@@ -298,7 +282,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
 
         assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
         likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
-        genotypingEngine = new GenotypingEngine( DEBUG, OUTPUT_FULL_HAPLOTYPE_SEQUENCE );
+        genotypingEngine = new GenotypingEngine( DEBUG );
     }
 
     //---------------------------------------------------------------------------------------------------------------
@@ -428,43 +412,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
             final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
             final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
             final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
-
-            if( !GENOTYPE_FULL_ACTIVE_REGION ) {
-                // add some custom annotations to the calls
-
-                // Calculate the number of variants on the haplotype
-                int maxNumVar = 0;
-                for( final Allele allele : callResult.getFirst().getAlleles() ) {
-                    if( !allele.isReference() ) {
-                        for( final Haplotype haplotype : callResult.getSecond().get(allele) ) {
-                            final int numVar = haplotype.getEventMap().size();
-                            if( numVar > maxNumVar ) { maxNumVar = numVar; }
-                        }
-                    }
-                }
-                // Calculate the event length
-                int maxLength = 0;
-                for ( final Allele a : annotatedCall.getAlternateAlleles() ) {
-                    final int length = a.length() - annotatedCall.getReference().length();
-                    if( Math.abs(length) > Math.abs(maxLength) ) { maxLength = length; }
-                }
-
-                myAttributes.put("NVH", maxNumVar);
-                myAttributes.put("NumHapEval", bestHaplotypes.size());
-                myAttributes.put("NumHapAssembly", haplotypes.size());
-                myAttributes.put("ActiveRegionSize", activeRegion.getLocation().size());
-                myAttributes.put("EVENTLENGTH", maxLength);
-                myAttributes.put("TYPE", (annotatedCall.isSNP() || annotatedCall.isMNP() ? "SNP" : "INDEL") );
-                myAttributes.put("extType", annotatedCall.getType().toString() );
-
-                //if( likelihoodCalculationEngine.haplotypeScore != null ) {
-                //    myAttributes.put("HaplotypeScore", String.format("%.4f", likelihoodCalculationEngine.haplotypeScore));
-                //}
-                if( annotatedCall.hasAttribute("QD") ) {
-                    myAttributes.put("QDE", String.format("%.2f", Double.parseDouble((String)annotatedCall.getAttribute("QD")) / ((double)maxNumVar)) );
-                }
-            }
-
             vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
         }
 
@@ -520,6 +467,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
             if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
                 final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
                 // protect against INTERVALS with abnormally high coverage
+                // BUGBUG: remove when positinal downsampler is hooked up to ART/HC
                 if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
                     activeRegion.add(clippedRead);
                 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 304f8d5cb..29622ca17 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -169,7 +169,6 @@ public class LikelihoodCalculationEngine {
         }
 
         // compute the diploid haplotype likelihoods
-        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
         for( int iii = 0; iii < numHaplotypes; iii++ ) {
             for( int jjj = 0; jjj <= iii; jjj++ ) {
                 for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index a57462d1d..007df3602 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -21,18 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "56aa4b84606b6b0b7dc78a383974d1b3");
+        HCTest(CEUTRIO_BAM, "", "2b39732ff8e0de5bc2ae949aaf7a6f21");
     }
 
     @Test
     public void testHaplotypeCallerSingleSample() {
-        HCTest(NA12878_BAM, "", "baabae06c85d416920be434939124d7f");
+        HCTest(NA12878_BAM, "", "8b217638ff585effb9cc70e9a9aa544f");
     }
 
     // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf", "f2d0309fdf50d5827e9c60ed0dd07e3f");
+        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
+                "541aa8291f03ba33bd1ad3d731fd5657");
     }
 
     private void HCTestComplexVariants(String bam, String args, String md5) {
@@ -43,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "966d338f423c86a390d685aa6336ec69");
+        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd7170cbde7df04d4fbe1da7903c31c6");
     }
 
     private void HCTestSymbolicVariants(String bam, String args, String md5) {
@@ -54,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "7fbc6b9e27e374f2ffe4be952d88c7c6");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "99456fc7207c1fe9f367a0d0afae87cd");
     }
 
     private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -65,7 +66,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "2581e760279291a3901a506d060bfac8");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6c1631785b3f832aecab1a99f0454762");
     }
 
     @Test
@@ -78,7 +79,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     @Test
     public void HCTestStructuralIndels() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("96ab8253d242b851ccfc218759f79784"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("237601bbc39694c7413a332cbb656c8e"));
         executeTest("HCTestStructuralIndels: ", spec);
     }
 
@@ -92,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     public void HCTestReducedBam() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("425f1a0fb00d7145edf1c55e54346fae"));
+                Arrays.asList("40bf739fb2b1743642498efe79ea6342"));
         executeTest("HC calling on a ReducedRead BAM", spec);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
index 82596a501..2679a169b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/MappingQualityRankSumTest.java
@@ -29,7 +29,7 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
                                        final List<Double> refQuals, final List<Double> altQuals) {
 
         if (pileup != null && likelihoodMap == null) {
-            // no per-read likelihoods available:
+            // old UG snp-only path through the annotations
             for ( final PileupElement p : pileup ) {
                 if ( isUsableBase(p) ) {
                     if ( allAlleles.get(0).equals(Allele.create(p.getBase(), true)) ) {
@@ -43,14 +43,13 @@ public class MappingQualityRankSumTest extends RankSumTest implements StandardAn
         }
         for (Map.Entry<GATKSAMRecord,Map<Allele,Double>> el : likelihoodMap.getLikelihoodReadMap().entrySet()) {
             final Allele a = PerReadAlleleLikelihoodMap.getMostLikelyAllele(el.getValue());
+            // BUGBUG: There needs to be a comparable isUsableBase check here
             if (a.isNoCall())
                 continue; // read is non-informative
             if (a.isReference())
                 refQuals.add((double)el.getKey().getMappingQuality());
             else if (allAlleles.contains(a))
                 altQuals.add((double)el.getKey().getMappingQuality());
-
-
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
index 0df7aff71..e7c0e6b14 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/RankSumTest.java
@@ -49,7 +49,7 @@ public abstract class RankSumTest extends InfoFieldAnnotation implements ActiveR
             ReadBackedPileup pileup = null;
 
 
-            if (stratifiedContexts != null) {
+            if (stratifiedContexts != null) { // the old UG SNP-only path through the annotations
                 final AlignmentContext context = stratifiedContexts.get(genotype.getSampleName());
                 if ( context != null )
                     pileup = context.getBasePileup();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
index d01233bb2..334b89f01 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/ReadPosRankSumTest.java
@@ -39,7 +39,7 @@ public class ReadPosRankSumTest extends RankSumTest implements StandardAnnotatio
                                        final List<Double> refQuals, final List<Double> altQuals) {
 
         if (alleleLikelihoodMap == null) {
-            // use fast SNP-based version if we don't have per-read allele likelihoods
+            // use old UG SNP-based version if we don't have per-read allele likelihoods
             for ( final PileupElement p : pileup ) {
                 if ( isUsableBase(p) ) {
                     int readPos = AlignmentUtils.calcAlignmentByteArrayOffset(p.getRead().getCigar(), p, 0, 0);

From 59cef880d195937e2e2eee9344a7bcc0d2ff016b Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 26 Nov 2012 12:20:07 -0500
Subject: [PATCH 125/236] Updating HC integration tests because experimental,
 HC-specific annotations have been removed.

---
 .../walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index 007df3602..f8ba1f4cc 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -72,7 +72,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     @Test
     public void HCTestProblematicReadsModifiedInActiveRegions() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("788176e1717bd28fc7cbc8e3efbb6100"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ec437d2d9f3ae07d155983be0155c8ed"));
         executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
     }
 

From 405f3c675d9daa589942e830db0870931741f113 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 27 Nov 2012 01:07:00 -0500
Subject: [PATCH 126/236] Fix for GSA-649: GenomeLocSortedSet.overlaps is crazy
 slow.  Also improved GenomeLocSortedSet.sizeBeforeLoc.

---
 .../traversals/TraverseActiveRegions.java     |  1 -
 .../sting/utils/GenomeLocSortedSet.java       | 49 +++++++++++++++----
 .../utils/GenomeLocSortedSetUnitTest.java     | 36 ++++++++++++++
 3 files changed, 76 insertions(+), 10 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index a2c37944a..3f20db0af 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -80,7 +80,6 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
             }
 
             // skip this location -- it's not part of our engine intervals
-            // TODO -- this is dangerously slow with current overlaps implementation : GSA-649 / GenomeLocSortedSet.overlaps is crazy slow
             if ( outsideEngineIntervals(location) )
                 continue;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
index d11adf9e3..ca1d385a2 100755
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
@@ -43,6 +43,9 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
     // our private storage for the GenomeLoc's
     private List<GenomeLoc> mArray = new ArrayList<GenomeLoc>();
 
+    // cache this to make overlap checking much more efficient
+    private int previousOverlapSearchIndex = -1;
+
     /** default constructor */
     public GenomeLocSortedSet(GenomeLocParser parser) {
         this.genomeLocParser = parser;
@@ -101,7 +104,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
      * Return the number of bps before loc in the sorted set
      *
      * @param loc the location before which we are counting bases
-     * @return
+     * @return the number of base pairs over all previous intervals
      */
     public long sizeBeforeLoc(GenomeLoc loc) {
         long s = 0;
@@ -110,7 +113,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
             if ( e.isBefore(loc) )
                 s += e.size();
             else if ( e.isPast(loc) )
-                ; // don't do anything
+                break; // we are done
             else // loc is inside of s
                 s += loc.getStart() - e.getStart();
         }
@@ -131,15 +134,43 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
      * Determine if the given loc overlaps any loc in the sorted set
      *
      * @param loc the location to test
-     * @return
+     * @return trip if the location overlaps any loc
      */
     public boolean overlaps(final GenomeLoc loc) {
-        for(final GenomeLoc e : mArray) {
-            if(e.overlapsP(loc)) {
-                return true;
-            }
+        // edge condition
+        if ( mArray.isEmpty() )
+            return false;
+
+        // use the cached version first
+        if ( previousOverlapSearchIndex != -1 && overlapsAtOrImmediatelyAfterCachedIndex(loc, true) )
+            return true;
+
+        // update the cached index
+        previousOverlapSearchIndex = Collections.binarySearch(mArray, loc);
+
+        // if it matches an interval exactly, we are done
+        if ( previousOverlapSearchIndex > 0 )
+            return true;
+
+        // check whether it overlaps the interval before or after the insertion point
+        previousOverlapSearchIndex = Math.max(0, -1 * previousOverlapSearchIndex - 2);
+        return overlapsAtOrImmediatelyAfterCachedIndex(loc, false);
+    }
+
+    private boolean overlapsAtOrImmediatelyAfterCachedIndex(final GenomeLoc loc, final boolean updateCachedIndex) {
+        // check the cached entry
+        if ( mArray.get(previousOverlapSearchIndex).overlapsP(loc) )
+            return true;
+
+        // check the entry after the cached entry since we may have moved to it
+        boolean returnValue = false;
+        if ( previousOverlapSearchIndex < mArray.size() - 1 ) {
+            returnValue = mArray.get(previousOverlapSearchIndex + 1).overlapsP(loc);
+            if ( updateCachedIndex )
+                previousOverlapSearchIndex++;
         }
-        return false;
+
+        return returnValue;
     }
 
     /**
@@ -155,7 +186,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
             mArray.add(e);
             return true;
         } else {
-            int loc = Collections.binarySearch(mArray,e);
+            final int loc = Collections.binarySearch(mArray,e);
             if (loc >= 0) {
                 throw new ReviewedStingException("Genome Loc Sorted Set already contains the GenomicLoc " + e.toString());
             } else {
diff --git a/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java
index 3d21e654f..6138e7396 100755
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocSortedSetUnitTest.java
@@ -6,6 +6,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 
 import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
 import static org.testng.Assert.assertTrue;
 
 import org.testng.annotations.BeforeClass;
@@ -117,6 +118,41 @@ public class GenomeLocSortedSetUnitTest extends BaseTest {
         assertTrue(loc.getContigIndex() == 1);
     }
 
+    @Test
+    public void overlap() {
+        for ( int i = 1; i < 6; i++ ) {
+            final int start = i * 10;
+            mSortedSet.add(genomeLocParser.createGenomeLoc(contigOneName, start, start + 1));
+        }
+
+        // test matches in and around interval
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 9, 9)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 10, 10)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 11)));
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 12)));
+
+        // test matches spanning intervals
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 14, 20)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 11, 15)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 30, 40)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
+
+        // test miss
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 12, 19)));
+
+        // test exact match after miss
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 40, 41)));
+
+        // test matches at beginning of intervals
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 5, 6)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 0, 10)));
+
+        // test matches at end of intervals
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
+        assertTrue(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 51, 53)));
+        assertFalse(mSortedSet.overlaps(genomeLocParser.createGenomeLoc(contigOneName, 52, 53)));
+    }
+
     @Test
     public void mergingOverlappingAbove() {
         GenomeLoc e = genomeLocParser.createGenomeLoc(contigOneName, 0, 50);

From cc72aaefebfed89723c40403ae0dfd3a932ff78d Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 27 Nov 2012 01:11:23 -0500
Subject: [PATCH 127/236] Minor efficiency: use >= instead of > in test

---
 .../src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java  | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
index ca1d385a2..394220106 100755
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocSortedSet.java
@@ -149,7 +149,7 @@ public class GenomeLocSortedSet extends AbstractSet<GenomeLoc> {
         previousOverlapSearchIndex = Collections.binarySearch(mArray, loc);
 
         // if it matches an interval exactly, we are done
-        if ( previousOverlapSearchIndex > 0 )
+        if ( previousOverlapSearchIndex >= 0 )
             return true;
 
         // check whether it overlaps the interval before or after the insertion point

From b1969a66bdcf755757517e77582b9d4e55caeb54 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Tue, 27 Nov 2012 08:24:41 -0500
Subject: [PATCH 128/236] Update docs

---
 .../gatk/walkers/fasta/FastaAlternateReferenceMaker.java    | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java
index 2b9744b89..22c6097cf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/fasta/FastaAlternateReferenceMaker.java
@@ -47,6 +47,12 @@ import java.util.List;
  * <p>
  * Given variant tracks, it replaces the reference bases at variation sites with the bases supplied by the ROD(s).
  * Additionally, allows for one or more "snpmask" VCFs to set overlapping bases to 'N'.
+ *
+ * The output format can be partially controlled using the provided command-line arguments.
+ * Specify intervals with the usual -L argument to output only the reference bases within your intervals.
+ * Overlapping intervals are automatically merged; reference bases for each disjoint interval will be output as a
+ * separate fasta sequence (named numerically in order).
+ *
  * Several important notes:
  * 1) if there are multiple variants that start at a site, it chooses one of them randomly.
  * 2) when there are overlapping indels (but with different start positions) only the first will be chosen.

From cc550b4145bd8f439a46e407ba015eaeea36edea Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 26 Nov 2012 11:48:05 -0500
Subject: [PATCH 129/236] Add a read and interval on a different contig

---
 .../traversals/TraverseActiveRegionsTest.java | 27 ++++++++++++++-----
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index e4c7b2db0..018e92d84 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -96,8 +96,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
-        // TODO: this fails!
-        //intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 20000));
+        intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
         intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
 
         reads = new ArrayList<SAMRecord>();
@@ -109,8 +108,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
         reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
-        // TODO
-        //reads.add(buildSAMRecord("simple20", "20", 10100, 10150));
+        reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
     }
 
     @Test
@@ -204,9 +202,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // extended_only: Extended in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
-
-        // TODO
-        // simple20: Primary in 20:10000-20000
+        // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
         ActiveRegion region;
@@ -221,6 +217,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "extended_only");
         // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
 
@@ -232,6 +229,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "extended_only");
         // TODO: fail verifyReadPrimary(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
 
@@ -243,6 +241,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // TODO: fail verifyReadExtended(region, "extended_only");
         // TODO: fail verifyReadExtended(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadPrimary(region, "simple20");
 
         // TODO: more tests and edge cases
     }
@@ -282,6 +293,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         for (LocusShardDataProvider dataProvider : createDataProviders(intervals))
             t.traverse(walker, dataProvider, 0);
 
+        t.endTraversal(walker, 0);
+
         return walker.mappedActiveRegions;
     }
 

From d83ad906eff14027908376eceb37dd502a6fdd78 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 26 Nov 2012 13:44:13 -0500
Subject: [PATCH 130/236] Add profile range contract

---
 .../sting/gatk/walkers/ActiveRegionWalker.java  |  2 ++
 .../traversals/TraverseActiveRegionsTest.java   | 17 +++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
index fed2c995e..c6e28df05 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.walkers;
 
+import com.google.java.contract.Ensures;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broad.tribble.Feature;
 import org.broadinstitute.sting.commandline.Input;
@@ -75,6 +76,7 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
     }
 
     // Determine probability of active status over the AlignmentContext
+    @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
     public abstract ActivityProfileResult isActive(final RefMetaDataTracker tracker, final ReferenceContext ref, final AlignmentContext context);
 
     // Map over the ActiveRegion
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index 018e92d84..8a4be48be 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -1,5 +1,6 @@
 package org.broadinstitute.sting.gatk.traversals;
 
+import com.google.java.contract.PreconditionError;
 import net.sf.samtools.*;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
@@ -52,6 +53,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
             this.prob = 1.0;
         }
 
+        public DummyActiveRegionWalker(double constProb) {
+            this.prob = constProb;
+        }
+
         @Override
         public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
             isActiveCalls.add(ref.getLocus());
@@ -132,6 +137,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
         return activeIntervals;
     }
 
+    @Test (expectedExceptions = PreconditionError.class)
+    public void testIsActiveRangeLow () {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(-0.1);
+        getActiveRegions(walker, intervals).values();
+    }
+
+    @Test (expectedExceptions = PreconditionError.class)
+    public void testIsActiveRangeHigh () {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(1.1);
+        getActiveRegions(walker, intervals).values();
+    }
+
     @Test
     public void testActiveRegionCoverage() {
         DummyActiveRegionWalker walker = new DummyActiveRegionWalker();

From 9bfe39411ee4465860d6cf1a1cb1f0fe32d0a1b3 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 26 Nov 2012 14:29:22 -0500
Subject: [PATCH 131/236] Equal overlap should match right/later region

---
 .../gatk/traversals/TraverseActiveRegionsTest.java | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index 8a4be48be..66504da11 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -109,7 +109,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
         reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
         reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
-        reads.add(buildSAMRecord("boundary_unequal", "1", 1995, 2050));
+        reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
         reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
         reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
@@ -214,8 +214,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // simple: Primary in 1:1-999
         // overlap_equal: Primary in 1:1-999
         // overlap_unequal: Primary in 1:1-999
-        // boundary_equal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
-        // boundary_unequal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_only: Extended in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
@@ -241,8 +241,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "simple");
         verifyReadNotPlaced(region, "overlap_equal");
         verifyReadNotPlaced(region, "overlap_unequal");
-        // TODO: fail verifyReadPrimary(region, "boundary_equal");
-        // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
+        // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
+        verifyReadPrimary(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_only");
         // TODO: fail verifyReadPrimary(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
@@ -253,8 +253,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "simple");
         verifyReadNotPlaced(region, "overlap_equal");
         verifyReadNotPlaced(region, "overlap_unequal");
-        // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
-        verifyReadPrimary(region, "boundary_unequal");
+        verifyReadPrimary(region, "boundary_equal");
+        // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
         // TODO: fail verifyReadExtended(region, "extended_only");
         // TODO: fail verifyReadExtended(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");

From 7e4b9c9e6e38a1f20999fa0b6b48a5ce2e313c5f Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 27 Nov 2012 10:12:39 -0500
Subject: [PATCH 133/236] Fix failing unit tests for
 VariantContextUtilsUnitTest

-- Previous version was adding multiple samples with the same name to the variant context
---
 .../sting/utils/variantcontext/VariantContextUtilsUnitTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
index f3daa9e4c..3ad438b26 100644
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUtilsUnitTest.java
@@ -793,7 +793,7 @@ public class VariantContextUtilsUnitTest extends BaseTest {
 
         int sampleI = 0;
         for ( final List<Allele> alleles : Utils.makePermutations(vc.getAlleles(), 2, true) ) {
-            genotypes.add(GenotypeBuilder.create("sample" + sampleI, alleles));
+            genotypes.add(GenotypeBuilder.create("sample" + sampleI++, alleles));
         }
         genotypes.add(GenotypeBuilder.createMissing("missing", 2));
 

From 1cc0b48caab07426a3d54b34db3043ca96a28a4e Mon Sep 17 00:00:00 2001
From: Jacob Silterra <jacob@broadinstitute.org>
Date: Tue, 27 Nov 2012 17:44:35 -0500
Subject: [PATCH 139/236] Abstract connection to MongoDB so we can specify it
 through JSON file. Include 2 JSON spec files in GenomeAnalysisTK.jar

Create MongoDBManager, which keeps track of connections based on Locator class. Locators can be instantiated directly, or read from JSON files (NA12878DBArgumentCollection uses the GSon library)
---
 build.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/build.xml b/build.xml
index a93918ec8..3a264b476 100644
--- a/build.xml
+++ b/build.xml
@@ -681,6 +681,9 @@
               <include name="**/gatk/**/*.class" />
               <include name="**/alignment/**/*.class"/>
             </fileset>
+            <fileset dir="${java.private.source.dir}">
+                <include name="**/gatk/**/resources/*.*" />
+            </fileset>
             <fileset dir="${R.script.staging.dir}">
                 <include name="**/gatk/**/*.R"/>
                 <include name="**/alignment/**/*.R"/>

From 79bc878e6a9a280fd7873eca7b2861690dbf2628 Mon Sep 17 00:00:00 2001
From: Menachem Fromer <fromer@broadinstitute.org>
Date: Tue, 27 Nov 2012 22:37:41 -0500
Subject: [PATCH 140/236] Allow debugging to be set from the command line

---
 .../sting/gatk/walkers/phasing/ReadBackedPhasing.java    | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
index d8ae6b28b..eda43e6a5 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@@ -95,7 +95,8 @@ import static org.broadinstitute.sting.utils.codecs.vcf.VCFUtils.getVCFHeadersFr
 
 @DocumentedGATKFeature( groupName = "Variant Discovery Tools", extraDocs = {CommandLineGATK.class} )
 public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingStats> {
-    private static final boolean DEBUG = false;
+    @Argument(fullName="debug", shortName="debug", doc="If specified, print out very verbose debug information (if -l DEBUG is also specified)", required = false)
+    protected boolean DEBUG = false;
     /**
      * The VCF file we are phasing variants from.
      *
@@ -949,7 +950,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
         }
 
         if (DEBUG) logger.debug("\nPhasing table [AFTER CALCULATION]:\n" + sampleHaps + "\n");
-        MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, true);
+        MaxHaplotypeAndQuality maxHapQual = new MaxHaplotypeAndQuality(sampleHaps, DEBUG);
         double posteriorProb = maxHapQual.maxEntry.getScore().getValue();
 
         if (DEBUG)
@@ -971,7 +972,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
         public MaxHaplotypeAndQuality(PhasingTable hapTable, boolean printDebug) {
             // Marginalize each haplotype to its first 2 positions:
             hapTable = HaplotypeTableCreator.marginalizeAsNewTable(hapTable);
-            if (DEBUG && printDebug)
+            if (printDebug)
                 logger.debug("\nPhasing table [AFTER MAPPING]:\n" + hapTable + "\n");
 
             calculateMaxHapAndPhasingQuality(hapTable, printDebug);
@@ -981,7 +982,7 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
 
         private void calculateMaxHapAndPhasingQuality(PhasingTable hapTable, boolean printDebug) {
             hapTable.normalizeScores();
-            if (DEBUG && printDebug)
+            if (printDebug)
                 logger.debug("\nPhasing table [AFTER NORMALIZATION]:\n" + hapTable + "\n");
 
             // Determine the phase at this position:

From f0395b457ac4a2c7e40a573f6a46d8e0065b33d4 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Wed, 28 Nov 2012 13:56:32 -0500
Subject: [PATCH 143/236] Adding the work-in-progress, experimental
 RepeatLengthCovariate to the BQSR so Chris can continue the development.

---
 .../covariates/RepeatLengthCovariate.java     | 64 +++++++++++++++++++
 .../variantcontext/VariantContextUtils.java   |  2 +-
 2 files changed, 65 insertions(+), 1 deletion(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java

diff --git a/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
new file mode 100644
index 000000000..d4e4ab65e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/recalibration/covariates/RepeatLengthCovariate.java
@@ -0,0 +1,64 @@
+package org.broadinstitute.sting.utils.recalibration.covariates;
+
+import org.broadinstitute.sting.gatk.walkers.bqsr.RecalibrationArgumentCollection;
+import org.broadinstitute.sting.gatk.walkers.varianteval.stratifications.TandemRepeat;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.recalibration.ReadCovariates;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
+
+import java.util.Arrays;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: rpoplin
+ * Date: 11/3/12
+ */
+
+public class RepeatLengthCovariate implements ExperimentalCovariate {
+    final int MAX_REPEAT_LENGTH = 20;
+
+    // Initialize any member variables using the command-line arguments passed to the walkers
+    @Override
+    public void initialize(final RecalibrationArgumentCollection RAC) {}
+
+    @Override
+    public void recordValues(final GATKSAMRecord read, final ReadCovariates values) {
+        byte[] readBytes = read.getReadBases();
+        for (int i = 0; i < readBytes.length; i++) {
+            int maxRL = 0;
+            for (int str = 1; str <= 8; str++) {
+                if (i + str <= readBytes.length) {
+                    maxRL = Math.max(maxRL, VariantContextUtils.findNumberofRepetitions(
+                            Arrays.copyOfRange(readBytes,i,i + str),
+                            Arrays.copyOfRange(readBytes,i,readBytes.length)
+                    ));
+                }
+            }
+            if(maxRL > MAX_REPEAT_LENGTH) { maxRL = MAX_REPEAT_LENGTH; }
+            values.addCovariate(maxRL, maxRL, maxRL, i);
+        }
+    }
+
+    // Used to get the covariate's value from input csv file during on-the-fly recalibration
+    @Override
+    public final Object getValue(final String str) {
+        return Byte.parseByte(str);
+    }
+
+    @Override
+    public String formatKey(final int key) {
+        return String.format("%d", key);
+    }
+
+    @Override
+    public int keyFromValue(final Object value) {
+        return (value instanceof String) ? Integer.parseInt((String) value) : (Integer) value;
+    }
+
+    @Override
+    public int maximumKeyValue() {
+        return MAX_REPEAT_LENGTH + 1;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 1f1867f75..b3e3cf8df 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -1267,7 +1267,7 @@ public class VariantContextUtils {
      * @param testString             String to test
      * @return                       Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
      */
-    protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+    public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
         int numRepeats = 0;
         for (int start = 0; start < testString.length; start += repeatUnit.length) {
             int end = start + repeatUnit.length;

From 198923b597e3635ec5f71aaa192ac4bcaca36ddd Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Mon, 26 Nov 2012 15:01:13 -0500
Subject: [PATCH 144/236] Add ActiveRegionReadState handling

---
 .../haplotypecaller/HaplotypeCaller.java      |  11 +-
 .../traversals/TraverseActiveRegions.java     |  16 +-
 .../gatk/walkers/ActiveRegionWalker.java      |  20 +-
 .../activeregion/ActiveRegionReadState.java   |  16 ++
 .../traversals/TraverseActiveRegionsTest.java | 207 +++++++++++++++---
 5 files changed, 230 insertions(+), 40 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 24b3309f1..d194e2620 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -45,6 +45,7 @@ import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedArgumentCollection
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.VariantCallContext;
 import org.broadinstitute.sting.utils.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.clipping.ReadClipper;
 import org.broadinstitute.sting.utils.codecs.vcf.*;
@@ -295,9 +296,15 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
     @Override
     public boolean includeReadsWithDeletionAtLoci() { return true; }
 
-    // enable non primary reads in the active region
+    // enable non primary and extended reads in the active region
     @Override
-    public boolean wantsNonPrimaryReads() { return true; }
+    public EnumSet<ActiveRegionReadState> desiredReadStates() {
+        return EnumSet.of(
+                ActiveRegionReadState.PRIMARY,
+                ActiveRegionReadState.NONPRIMARY,
+                ActiveRegionReadState.EXTENDED
+        );
+    }
 
     @Override
     @Ensures({"result.isActiveProb >= 0.0", "result.isActiveProb <= 1.0"})
diff --git a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
index 3f20db0af..06fc01232 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegions.java
@@ -258,13 +258,23 @@ public class TraverseActiveRegions <M,T> extends TraversalEngine<M,T,ActiveRegio
                         activeRegion.add( read );
                     }
                     for( final ActiveRegion otherRegionToTest : workQueue ) {
-                        if( !bestRegion.equals(otherRegionToTest) && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
-                            otherRegionToTest.add( read );
+                        if( !bestRegion.equals(otherRegionToTest) ) {
+                            // check for non-primary vs. extended
+                            if ( otherRegionToTest.getLocation().overlapsP( readLoc ) ) {
+                                otherRegionToTest.add( read );
+                            } else if ( walker.wantsExtendedReads() && otherRegionToTest.getExtendedLoc().overlapsP( readLoc ) ) {
+                                otherRegionToTest.add( read );
+                            }
                         }
                     }
                 }
                 placedReads.add( read );
-            } else if( activeRegion.getExtendedLoc().overlapsP( readLoc ) && walker.wantsNonPrimaryReads() ) {
+                // check for non-primary vs. extended
+            } else if( activeRegion.getLocation().overlapsP( readLoc ) ) {
+                if ( walker.wantsNonPrimaryReads() ) {
+                    activeRegion.add( read );
+                }
+            } else if( walker.wantsExtendedReads() && activeRegion.getExtendedLoc().overlapsP( readLoc )) {
                 activeRegion.add( read );
             }
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
index c6e28df05..e1eb3a053 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/ActiveRegionWalker.java
@@ -14,14 +14,14 @@ import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalSetRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
 
 import java.io.PrintStream;
-import java.util.ArrayList;
-import java.util.List;
+import java.util.*;
 
 /**
  * Base class for all the Active Region Walkers.
@@ -71,8 +71,20 @@ public abstract class ActiveRegionWalker<MapType, ReduceType> extends Walker<Map
         return true;    // We are keeping all the reads
     }
 
-    public boolean wantsNonPrimaryReads() {
-        return false;
+    public EnumSet<ActiveRegionReadState> desiredReadStates() {
+        return EnumSet.of(ActiveRegionReadState.PRIMARY);
+    }
+
+    public final boolean wantsNonPrimaryReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.NONPRIMARY);
+    }
+
+    public boolean wantsExtendedReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.EXTENDED);
+    }
+
+    public boolean wantsUnmappedReads() {
+        return desiredReadStates().contains(ActiveRegionReadState.UNMAPPED);
     }
 
     // Determine probability of active status over the AlignmentContext
diff --git a/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
new file mode 100644
index 000000000..00e491eb0
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/activeregion/ActiveRegionReadState.java
@@ -0,0 +1,16 @@
+package org.broadinstitute.sting.utils.activeregion;
+
+/**
+ * Created with IntelliJ IDEA.
+ * User: thibault
+ * Date: 11/26/12
+ * Time: 2:35 PM
+ *
+ * Describes how a read relates to an assigned ActiveRegion
+ */
+public enum ActiveRegionReadState {
+    PRIMARY,        // This is the read's primary region
+    NONPRIMARY,     // This region overlaps the read, but it is not primary
+    EXTENDED,       // This region would overlap the read if it were extended
+    UNMAPPED        // This read is not mapped
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index 66504da11..b70085eff 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.traversals;
 
 import com.google.java.contract.PreconditionError;
 import net.sf.samtools.*;
+import org.broadinstitute.sting.utils.activeregion.ActiveRegionReadState;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -46,6 +47,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
     private class DummyActiveRegionWalker extends ActiveRegionWalker<Integer, Integer> {
         private final double prob;
+        private EnumSet<ActiveRegionReadState> states = super.desiredReadStates();
+
         protected List<GenomeLoc> isActiveCalls = new ArrayList<GenomeLoc>();
         protected Map<GenomeLoc, ActiveRegion> mappedActiveRegions = new HashMap<GenomeLoc, ActiveRegion>();
 
@@ -57,6 +60,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
             this.prob = constProb;
         }
 
+        public DummyActiveRegionWalker(EnumSet<ActiveRegionReadState> wantStates) {
+            this.prob = 1.0;
+            this.states = wantStates;
+        }
+
+        @Override
+        public EnumSet<ActiveRegionReadState> desiredReadStates() {
+            return states;
+        }
+
         @Override
         public ActivityProfileResult isActive(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
             isActiveCalls.add(ref.getLocus());
@@ -202,12 +215,158 @@ public class TraverseActiveRegionsTest extends BaseTest {
     }
 
     @Test
-    public void testReadMapping() {
+    public void testPrimaryReadMapping() {
         DummyActiveRegionWalker walker = new DummyActiveRegionWalker();
 
         // Contract: Each read has the Primary state in a single region (or none)
         // This is the region of maximum overlap for the read (earlier if tied)
 
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // extended_only: Extended in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+        // simple20: Primary in 20:10000-10100
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        // TODO: fail getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "simple20");
+
+        // TODO: more tests and edge cases
+    }
+
+    @Test
+    public void testNonPrimaryReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+                EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY));
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
+        // Contract: Each read has the Non-Primary state in all other regions it overlaps
+
+        // simple: Primary in 1:1-999
+        // overlap_equal: Primary in 1:1-999
+        // overlap_unequal: Primary in 1:1-999
+        // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
+        // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
+        // extended_only: Extended in 1:2000-2999
+        // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
+        // outside_intervals: none
+        // simple20: Primary in 20:10000-10100
+
+        Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
+        ActiveRegion region;
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
+
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        // TODO: fail getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        // TODO: fail getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "simple20");
+
+        // TODO: more tests and edge cases
+    }
+
+    @Test
+    public void testExtendedReadMapping() {
+        DummyActiveRegionWalker walker = new DummyActiveRegionWalker(
+                EnumSet.of(ActiveRegionReadState.PRIMARY, ActiveRegionReadState.NONPRIMARY, ActiveRegionReadState.EXTENDED));
+
+        // Contract: Each read has the Primary state in a single region (or none)
+        // This is the region of maximum overlap for the read (earlier if tied)
+
         // Contract: Each read has the Non-Primary state in all other regions it overlaps
         // Contract: Each read has the Extended state in regions where it only overlaps if the region is extended
 
@@ -226,13 +385,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1, 999));
 
-        verifyReadPrimary(region, "simple");
-        verifyReadPrimary(region, "overlap_equal");
-        verifyReadPrimary(region, "overlap_unequal");
+        getRead(region, "simple");
+        getRead(region, "overlap_equal");
+        getRead(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail verifyReadNonPrimary(region, "extended_and_np");
+        // TODO: fail getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -241,10 +400,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "simple");
         verifyReadNotPlaced(region, "overlap_equal");
         verifyReadNotPlaced(region, "overlap_unequal");
-        // TODO: fail verifyReadNonPrimary(region, "boundary_equal");
-        verifyReadPrimary(region, "boundary_unequal");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail verifyReadPrimary(region, "extended_and_np");
+        // TODO: fail getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -253,10 +412,10 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "simple");
         verifyReadNotPlaced(region, "overlap_equal");
         verifyReadNotPlaced(region, "overlap_unequal");
-        verifyReadPrimary(region, "boundary_equal");
-        // TODO: fail verifyReadNonPrimary(region, "boundary_unequal");
-        // TODO: fail verifyReadExtended(region, "extended_only");
-        // TODO: fail verifyReadExtended(region, "extended_and_np");
+        getRead(region, "boundary_equal");
+        getRead(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_only");
+        verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -267,33 +426,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        verifyReadNotPlaced(region, "extended_and_np");
+        // TODO: fail getRead(region, "extended_only");
+        // TODO: fail getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
-        verifyReadPrimary(region, "simple20");
+        getRead(region, "simple20");
 
         // TODO: more tests and edge cases
     }
 
-    private void verifyReadPrimary(ActiveRegion region, String readName) {
-        SAMRecord read = getRead(region, readName);
-        Assert.assertFalse(read.getNotPrimaryAlignmentFlag(), "Read " + read + " not primary in active region " + region);
-    }
-
-    private void verifyReadNonPrimary(ActiveRegion region, String readName) {
-        SAMRecord read = getRead(region, readName);
-        Assert.assertTrue(read.getNotPrimaryAlignmentFlag(), "Read " + read + " primary in active region " + region);
-    }
-
-    private void verifyReadExtended(ActiveRegion region, String readName) {
-        Assert.fail("The Extended read state has not been implemented");
-    }
-
     private void verifyReadNotPlaced(ActiveRegion region, String readName) {
         for (SAMRecord read : region.getReads()) {
             if (read.getReadName().equals(readName))
                 Assert.fail("Read " + readName + " found in active region " + region);
-        }
+         }
     }
 
     private SAMRecord getRead(ActiveRegion region, String readName) {
@@ -302,7 +447,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
                 return read;
         }
 
-        Assert.fail("Read " + readName + " not found in active region " + region);
+        Assert.fail("Read " + readName + " not assigned to active region " + region);
         return null;
     }
 

From c76c808268596f12d81ca247c826653f4ffa2f56 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Wed, 28 Nov 2012 11:09:12 -0500
Subject: [PATCH 145/236] Reads are required to be sorted - Remove the
 extended_only case because it's outside intervals

---
 .../traversals/TraverseActiveRegionsTest.java | 42 +++++++------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index b70085eff..a65b0cb45 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -25,6 +25,7 @@ import org.broadinstitute.sting.utils.activeregion.ActiveRegion;
 import org.broadinstitute.sting.utils.activeregion.ActivityProfileResult;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
 import org.testng.Assert;
 import org.testng.annotations.BeforeClass;
 import org.testng.annotations.Test;
@@ -100,7 +101,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
     private GenomeLocParser genomeLocParser;
 
     private List<GenomeLoc> intervals;
-    private List<SAMRecord> reads;
+    private List<GATKSAMRecord> reads;
 
     @BeforeClass
     private void init() throws FileNotFoundException {
@@ -117,16 +118,18 @@ public class TraverseActiveRegionsTest extends BaseTest {
         intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
         intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
 
-        reads = new ArrayList<SAMRecord>();
+        reads = new ArrayList<GATKSAMRecord>();
         reads.add(buildSAMRecord("simple", "1", 100, 200));
         reads.add(buildSAMRecord("overlap_equal", "1", 10, 20));
         reads.add(buildSAMRecord("overlap_unequal", "1", 10, 21));
         reads.add(buildSAMRecord("boundary_equal", "1", 1990, 2009));
         reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
-        reads.add(buildSAMRecord("extended_only", "1", 3000, 3100));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
         reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
         reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
+
+        // required by LocusIteratorByState, and I prefer to list them in test case order above
+        ReadUtils.sortReadsByCoordinate(reads);
     }
 
     @Test
@@ -226,7 +229,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // overlap_unequal: Primary in 1:1-999
         // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
-        // extended_only: Extended in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
         // simple20: Primary in 20:10000-10100
@@ -241,7 +243,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
@@ -253,8 +254,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         getRead(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -265,7 +265,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         getRead(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
@@ -277,7 +276,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
@@ -300,7 +298,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // overlap_unequal: Primary in 1:1-999
         // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
-        // extended_only: Extended in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
         // simple20: Primary in 20:10000-10100
@@ -315,8 +312,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -327,8 +323,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         getRead(region, "boundary_equal");
         getRead(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -339,7 +334,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         getRead(region, "boundary_equal");
         getRead(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
@@ -351,7 +345,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
@@ -375,7 +368,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // overlap_unequal: Primary in 1:1-999
         // boundary_equal: Non-Primary in 1:1000-1999, Primary in 1:2000-2999
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
-        // extended_only: Extended in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
         // simple20: Primary in 20:10000-10100
@@ -390,8 +382,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -402,8 +393,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         getRead(region, "boundary_equal");
         getRead(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -414,8 +404,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         getRead(region, "boundary_equal");
         getRead(region, "boundary_unequal");
-        verifyReadNotPlaced(region, "extended_only");
-        verifyReadNotPlaced(region, "extended_and_np");
+        getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         verifyReadNotPlaced(region, "simple20");
 
@@ -426,8 +415,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "overlap_unequal");
         verifyReadNotPlaced(region, "boundary_equal");
         verifyReadNotPlaced(region, "boundary_unequal");
-        // TODO: fail getRead(region, "extended_only");
-        // TODO: fail getRead(region, "extended_and_np");
+        verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
 
@@ -438,7 +426,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         for (SAMRecord read : region.getReads()) {
             if (read.getReadName().equals(readName))
                 Assert.fail("Read " + readName + " found in active region " + region);
-         }
+        }
     }
 
     private SAMRecord getRead(ActiveRegion region, String readName) {
@@ -520,7 +508,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         engine.setGenomeLocParser(genomeLocParser);
         t.initialize(engine);
 
-        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(reads);
+        StingSAMIterator iterator = ArtificialSAMUtils.createReadIterator(new ArrayList<SAMRecord>(reads));
         Shard shard = new MockLocusShard(genomeLocParser, intervals);
 
         List<LocusShardDataProvider> providers = new ArrayList<LocusShardDataProvider>();

From 26d9c41615ccd502b75f23a42110af925995beba Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Wed, 28 Nov 2012 14:06:58 -0500
Subject: [PATCH 146/236] Allow arbitrary resources to be packaged in the GATK
 jar, selecting among public/private/protected appropriately

-Resources must be in a "resources" or "templates" subdirectory within the Java package hierarchy

-Remove direct inclusion of private resources from the main jar packaging target added in Jacob's
patch: this would break builds where the private directory was absent, and did not respect build
settings (include.private, etc.)
---
 build.xml | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/build.xml b/build.xml
index 3a264b476..4db71a9ab 100644
--- a/build.xml
+++ b/build.xml
@@ -226,10 +226,17 @@
     <!-- the path for resources that need to go into the GATK jar;
          any additional resources should go into this set. -->
     <path id="gatk.resources">
-        <fileset dir="${basedir}">
-            <include name="${java.public.source.dir}/**/templates/*" />
-            <include name="${java.private.source.dir}/**/templates/*" if="include.private" />
-            <include name="${java.protected.source.dir}/**/templates/*" if="include.protected" />
+        <fileset dir="${java.public.source.dir}">
+            <include name="**/resources/*" />
+            <include name="**/templates/*" />
+        </fileset>
+        <fileset dir="${java.private.source.dir}" erroronmissingdir="false">
+            <include name="**/resources/*" if="include.private" />
+            <include name="**/templates/*" if="include.private" />
+        </fileset>
+        <fileset dir="${java.protected.source.dir}" erroronmissingdir="false">
+            <include name="**/resources/*" if="include.protected" />
+            <include name="**/templates/*" if="include.protected" />
         </fileset>
     </path>
 
@@ -681,9 +688,6 @@
               <include name="**/gatk/**/*.class" />
               <include name="**/alignment/**/*.class"/>
             </fileset>
-            <fileset dir="${java.private.source.dir}">
-                <include name="**/gatk/**/resources/*.*" />
-            </fileset>
             <fileset dir="${R.script.staging.dir}">
                 <include name="**/gatk/**/*.R"/>
                 <include name="**/alignment/**/*.R"/>

From b2e699169cf1c545e92db2536be2cab656c472fe Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Wed, 28 Nov 2012 15:26:05 -0500
Subject: [PATCH 147/236] Update GATK packaging settings to package arbitrary
 resources

With the newly-added support for packaging arbitrary resources, the
resources were getting packaged in a normal build but not when
creating a standalone GATK jar. This corrects this oversight.
---
 public/packages/GATKEngine.xml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/packages/GATKEngine.xml b/public/packages/GATKEngine.xml
index 2de0273f3..d0b4a52b5 100644
--- a/public/packages/GATKEngine.xml
+++ b/public/packages/GATKEngine.xml
@@ -36,6 +36,9 @@
       <dir name="org/broadinstitute/sting/utils/R" includes="**/*.tar.gz" />
       <!-- All R scripts in org.broadinstitute.sting -->
       <dir name="org/broadinstitute/sting" includes="**/*.R" />
+      <!-- Resources in org.broadinstitute.sting -->
+      <dir name="org/broadinstitute/sting" includes="**/resources/*" />
+      <dir name="org/broadinstitute/sting" includes="**/templates/*" />
       <!-- The GATK public key -->
       <file path="GATK_public.key" />
     </dependencies>

From b06e71cedf057e3848a46a9002cf83c496d6b8ef Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Wed, 28 Nov 2012 20:44:09 -0500
Subject: [PATCH 155/236] Use build jars in test classpaths by default

-Allows packaged resource files to be accessed within tests

-Guards against packaging errors in dist/ jars by testing the
jars that actually get run rather than unpackaged class files.
Previously we were only protected against packaging errors in the
monolithic jars posted to our website, not the dist/ jars used in
everyday runs.

-"ant fasttest" still uses the unpackaged class files for speed
(don't want to have to rebuild the jars in fasttest). Relies on
dubious methods to get at the resource files that would end up
in the jars.

-Eliminated the stupid separate "test" ivy config. Now we only
invoke ivy ONCE during an ant build that includes tests.
---
 build.xml | 64 +++++++++++++++++++++++++++----------------------------
 ivy.xml   | 11 ++++------
 2 files changed, 36 insertions(+), 39 deletions(-)

diff --git a/build.xml b/build.xml
index 4db71a9ab..cc45467d8 100644
--- a/build.xml
+++ b/build.xml
@@ -185,10 +185,7 @@
         <include name="**/*.class"/>
     </fileset>
 
-    <patternset id="dependency.mask" includes="*.jar">
-        <exclude name="testng*.jar" />
-        <exclude name="bcel*.jar" />
-    </patternset>
+    <patternset id="dependency.mask" includes="*.jar" />
 
     <path id="external.dependencies">
         <fileset dir="${lib.dir}" erroronmissingdir="false">
@@ -205,6 +202,16 @@
         <pathelement location="${scala.classes}" />
     </path>
 
+    <path id="build.results">
+        <!-- Ensure that GenomeAnalysisTK.jar comes first in the path, as it contains overrides for certain classes in our dependencies -->
+        <pathelement location="${dist.dir}/GenomeAnalysisTK.jar" />
+        <!-- After GenomeAnalysisTK.jar we include all of the other jars in the dist directory -->
+        <fileset dir="${dist.dir}" erroronmissingdir="false">
+            <patternset refid="dependency.mask" />
+            <exclude name="GenomeAnalysisTK.jar" />
+        </fileset>
+    </path>
+
     <fileset id="external.source.files" dir="${external.dir}" erroronmissingdir="false">
         <include name="**/*.java" />
     </fileset>
@@ -240,13 +247,6 @@
         </fileset>
     </path>
 
-    <path id="build.results">
-        <fileset dir="${dist.dir}">
-            <patternset refid="dependency.mask" />
-         </fileset>
-    </path>
-
-
     <!-- ******************************************************************************** -->
     <!-- Ivy Retrieve                                                                     -->
     <!-- ******************************************************************************** -->
@@ -1110,15 +1110,10 @@
     </path>
 
     <path id="testng.default.classpath">
-        <pathelement location="${java.classes}" />
-        <pathelement location="${scala.classes}" />
+        <path refid="build.results" />
         <pathelement location="${java.contracts.dir}" />
         <pathelement location="${java.test.classes}" />
         <pathelement location="${scala.test.classes}" />
-        <pathelement location="${R.tar.dir}" />
-        <path refid="R.script.source.path" />
-        <pathelement location="${key.dir}" />
-        <path refid="external.dependencies" />
     </path>
 
     <!-- Test targets -->
@@ -1126,9 +1121,6 @@
     <target name="test.init.compile">
         <mkdir dir="${java.test.classes}"/>
         <mkdir dir="${scala.test.classes}"/>
-        <antcall target="resolve">
-          <param name="ivy.conf" value="test"/>
-        </antcall>
     </target>
 
    <target name="test.java.internal.compile" depends="dist,test.init.compile">
@@ -1136,10 +1128,8 @@
         <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
             <src refid="java.test.source.path" />
             <classpath>
-                <path refid="external.dependencies" />
-                <pathelement location="${java.classes}"/>
+                <path refid="build.results" />
                 <pathelement location="${java.contracts.dir}"/>
-                <pathelement location="${testng.jar}"/>
             </classpath>
             <compilerarg value="-proc:none"/>
         </javac>
@@ -1150,11 +1140,9 @@
         <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}" srcdir="${external.dir}">
             <include name="*/test/**/*.java"/>
             <classpath>
-                <path refid="external.dependencies" />
+                <path refid="build.results" />
                 <pathelement location="${java.test.classes}"/>
-                <pathelement location="${java.classes}"/>
                 <pathelement location="${java.contracts.dir}"/>
-                <pathelement location="${testng.jar}"/>
             </classpath>
             <compilerarg value="-proc:none"/>
         </javac>
@@ -1167,9 +1155,8 @@
         <scalac fork="true" jvmargs="-Xmx512m" destdir="${scala.test.classes}" deprecation="yes" unchecked="yes">
 	        <src refid="scala.test.source.path" />
             <classpath>
-                <path refid="scala.dependencies"/>
+                <path refid="build.results"/>
                 <pathelement location="${java.test.classes}"/>
-                <pathelement location="${testng.jar}"/>
             </classpath>
         </scalac>
     </target>
@@ -1376,14 +1363,13 @@
 
     <!-- Fast test target that cuts major corners for speed. Requires that a full build has been done first. Java-only, single test class only -->
     <!-- Usage: ant fasttest -Dsingle=TestClass -->
-    <target name="fasttest" depends="init.javaonly,init,test.init">
+    <target name="fasttest" depends="init.javaonly,init">
         <condition property="not.clean">
             <and>
                 <available file="${build.dir}" />
                 <available file="${lib.dir}" />
                 <available file="${dist.dir}" />
                 <available file="${java.test.classes}" />
-                <available file="${testng.jar}" />
             </and>
         </condition>
         <fail message="fasttest requires a NON-CLEAN working directory (INCLUDING test classes). Do a full test build using ant test.compile first." unless="not.clean" />
@@ -1401,13 +1387,27 @@
         <javac fork="true" memoryMaximumSize="512m" destdir="${java.test.classes}" debug="true" optimize="on" tempdir="${java.io.tmpdir}">
             <src refid="java.test.source.path" />
             <classpath>
-                <path refid="external.dependencies" />
                 <pathelement location="${java.classes}"/>
-                <pathelement location="${testng.jar}"/>
+                <path refid="external.dependencies" />
             </classpath>
             <compilerarg value="-proc:none"/>
         </javac>
 
+        <!-- fasttest uses the unpackaged class files in its test classpath to avoid having to rebuild the jars in dist/ -->
+        <path id="testng.fasttest.classpath">
+            <pathelement location="${java.classes}" />
+            <pathelement location="${scala.classes}" />
+            <pathelement location="${java.contracts.dir}" />
+            <pathelement location="${java.test.classes}" />
+            <pathelement location="${scala.test.classes}" />
+            <pathelement location="${R.tar.dir}" />
+            <path refid="R.script.source.path" />
+            <pathelement location="${key.dir}" />
+            <path refid="external.dependencies" />
+            <path refid="java.source.path" />  <!-- Terrible hack to allow fasttest to see resource files stored in the source tree -->
+        </path>
+        <property name="testng.classpath" value="testng.fasttest.classpath" />
+
         <run-test testtype="${single}" outputdir="${report}/${single}" runfailed="false"/>
     </target>
 </project>
diff --git a/ivy.xml b/ivy.xml
index 1d2f95dc1..b7ca65406 100644
--- a/ivy.xml
+++ b/ivy.xml
@@ -24,11 +24,8 @@
 
 <ivy-module version="1.0">
     <info organisation="org.broadinstitute" module="Sting"/>
-    <configurations defaultconfmapping="test->default">
+    <configurations>
         <conf name="default" description="the core dependencies for the GATK"/>
-        <conf name="test" extends="default" description="external dependencies used for testing and metrics"/>
-        <conf name="scala" extends="default" description="the dependencies for scala"/>
-        <conf name="queue" extends="scala" description="the dependencies for Queue"/>
     </configurations>
     <dependencies defaultconf="default">
         <dependency org="net.sf" name="sam" rev="latest.integration"/>
@@ -83,9 +80,9 @@
         <dependency org="org.scala-lang" name="scala-library" rev="2.9.2"/>
 
         <!-- testing and evaluation dependencies -->
-        <dependency org="org.testng" name="testng" rev="5.14.1" conf="test"/>
-        <dependency org="org.uncommons" name="reportng" rev="1.1.2" conf="test"/>
-        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT" conf="test"/>
+        <dependency org="org.testng" name="testng" rev="5.14.1"/>
+        <dependency org="org.uncommons" name="reportng" rev="1.1.2"/>
+        <dependency org="com.google.code.caliper" name="caliper" rev="1.0-SNAPSHOT"/>
 
         <!-- Contracts for Java and dependencies -->
         <dependency org="com.google.code.cofoja" name="cofoja" rev="1.0-r139"/>

From a6c1fcd151463e96b68aef72d7e8824730706330 Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Thu, 29 Nov 2012 13:31:08 -0500
Subject: [PATCH 159/236] Removed default use of @Output syntax. If compile
 completes for QScripts, sending runtime errors during execute.

---
 .../sting/queue/QCommandLine.scala            | 172 +++++++++---------
 .../broadinstitute/sting/queue/QScript.scala  |  35 +---
 .../sting/queue/engine/QStatusMessenger.scala |   2 +-
 3 files changed, 97 insertions(+), 112 deletions(-)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
index 637174557..f899af86d 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QCommandLine.scala
@@ -110,95 +110,103 @@ class QCommandLine extends CommandLineProgram with Logging {
    * functions, and then builds and runs a QGraph based on the dependencies.
    */
   def execute = {
-    ClassFieldCache.parsingEngine = this.parser
+    var success = false
+    var result = 1
+    try {
+      ClassFieldCache.parsingEngine = this.parser
 
-    if (settings.qSettings.runName == null)
-      settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
-    if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory))
-      settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
-    qGraph.initializeWithSettings(settings)
+      if (settings.qSettings.runName == null)
+        settings.qSettings.runName = FilenameUtils.removeExtension(scripts.head.getName)
+      if (IOUtils.isDefaultTempDir(settings.qSettings.tempDirectory))
+        settings.qSettings.tempDirectory = IOUtils.absolute(settings.qSettings.runDirectory, ".queue/tmp")
+      qGraph.initializeWithSettings(settings)
 
-    for (commandPlugin <- allCommandPlugins) {
-      loadArgumentsIntoObject(commandPlugin)
-    }
-
-    for (commandPlugin <- allCommandPlugins) {
-      if (commandPlugin.statusMessenger != null)
-        commandPlugin.statusMessenger.started()
-    }
-
-    qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq
-
-    // TODO: Default command plugin argument?
-    val remoteFileConverter = (
-      for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
-        yield commandPlugin.remoteFileConverter
-      ).headOption.getOrElse(null)
-
-    if (remoteFileConverter != null)
-      loadArgumentsIntoObject(remoteFileConverter)
-
-    val allQScripts = qScriptPluginManager.createAllTypes()
-    for (script <- allQScripts) {
-      logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
-      loadArgumentsIntoObject(script)
-      allCommandPlugins.foreach(_.initScript(script))
-      // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now.
-      //if (settings.run)
-      script.pullInputs()
-      script.qSettings = settings.qSettings
-      try {
-        script.script()
-      } catch {
-        case e: Exception =>
-          throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
+      for (commandPlugin <- allCommandPlugins) {
+        loadArgumentsIntoObject(commandPlugin)
       }
 
-      if (remoteFileConverter != null) {
-        if (remoteFileConverter.convertToRemoteEnabled)
-          script.mkRemoteOutputs(remoteFileConverter)
-      }
-
-      script.functions.foreach(qGraph.add(_))
-      logger.info("Added " + script.functions.size + " functions")
-    }
-    // Execute the job graph
-    qGraph.run()
-
-    val functionsAndStatus = qGraph.getFunctionsAndStatus
-    val success = qGraph.success
-
-    // walk over each script, calling onExecutionDone
-    for (script <- allQScripts) {
-      val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f))
-      script.onExecutionDone(scriptFunctions, success)
-    }
-
-    logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
-
-    // write the final complete job report
-    logger.info("Writing final jobs report...")
-    qGraph.writeJobsReport()
-
-    if (!success) {
-      logger.info("Done with errors")
-      qGraph.logFailed()
-      for (commandPlugin <- allCommandPlugins)
+      for (commandPlugin <- allCommandPlugins) {
         if (commandPlugin.statusMessenger != null)
-          commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts))
-      1
-    } else {
-      if (settings.run) {
-        allQScripts.foreach(_.pushOutputs())
-        for (commandPlugin <- allCommandPlugins)
-          if (commandPlugin.statusMessenger != null) {
-            val allInputs = allQScripts.map(_.remoteInputs)
-            val allOutputs = allQScripts.map(_.remoteOutputs)
-            commandPlugin.statusMessenger.done(allInputs, allOutputs)
-          }
+          commandPlugin.statusMessenger.started()
+      }
+
+      qGraph.messengers = allCommandPlugins.filter(_.statusMessenger != null).map(_.statusMessenger).toSeq
+
+      // TODO: Default command plugin argument?
+      val remoteFileConverter = (
+        for (commandPlugin <- allCommandPlugins if (commandPlugin.remoteFileConverter != null))
+        yield commandPlugin.remoteFileConverter
+        ).headOption.getOrElse(null)
+
+      if (remoteFileConverter != null)
+        loadArgumentsIntoObject(remoteFileConverter)
+
+      val allQScripts = qScriptPluginManager.createAllTypes()
+      for (script <- allQScripts) {
+        logger.info("Scripting " + qScriptPluginManager.getName(script.getClass.asSubclass(classOf[QScript])))
+        loadArgumentsIntoObject(script)
+        allCommandPlugins.foreach(_.initScript(script))
+        // TODO: Pulling inputs can be time/io expensive! Some scripts are using the files to generate functions-- even for dry runs-- so pull it all down for now.
+        //if (settings.run)
+        script.pullInputs()
+        script.qSettings = settings.qSettings
+        try {
+          script.script()
+        } catch {
+          case e: Exception =>
+            throw new UserException.CannotExecuteQScript(script.getClass.getSimpleName + ".script() threw the following exception: " + e, e)
+        }
+
+        if (remoteFileConverter != null) {
+          if (remoteFileConverter.convertToRemoteEnabled)
+            script.mkRemoteOutputs(remoteFileConverter)
+        }
+
+        script.functions.foreach(qGraph.add(_))
+        logger.info("Added " + script.functions.size + " functions")
+      }
+      // Execute the job graph
+      qGraph.run()
+
+      val functionsAndStatus = qGraph.getFunctionsAndStatus
+
+      // walk over each script, calling onExecutionDone
+      for (script <- allQScripts) {
+        val scriptFunctions = functionsAndStatus.filterKeys(f => script.functions.contains(f))
+        script.onExecutionDone(scriptFunctions, success)
+      }
+
+      logger.info("Script %s with %d total jobs".format(if (success) "completed successfully" else "failed", functionsAndStatus.size))
+
+      // write the final complete job report
+      logger.info("Writing final jobs report...")
+      qGraph.writeJobsReport()
+
+      if (qGraph.success) {
+        if (settings.run) {
+          allQScripts.foreach(_.pushOutputs())
+          for (commandPlugin <- allCommandPlugins)
+            if (commandPlugin.statusMessenger != null) {
+              val allInputs = allQScripts.map(_.remoteInputs)
+              val allOutputs = allQScripts.map(_.remoteOutputs)
+              commandPlugin.statusMessenger.done(allInputs, allOutputs)
+            }
+        }
+        success = true
+        result = 0
+      }
+    } finally {
+      if (!success) {
+        logger.info("Done with errors")
+        qGraph.logFailed()
+        if (settings.run) {
+          for (commandPlugin <- allCommandPlugins)
+            if (commandPlugin.statusMessenger != null)
+              commandPlugin.statusMessenger.exit("Done with errors: %s".format(qGraph.formattedStatusCounts))
+        }
       }
-      0
     }
+    result
   }
 
   /**
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index eb8be183a..5b67ae913 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -124,49 +124,26 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
   }
 
   /**
-   * Pull all remote files to the local disk.
+   * Pull all remote files to the local disk
    */
   def pullInputs() {
-    val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
-    for (remoteFile <- filterRemoteFiles(inputs)) {
-      logger.info("Pulling %s from %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription))
-      remoteFile.pullToLocal()
-    }
   }
 
   /**
-   * Push all remote files from the local disk.
+   * Push all remote files from the local disk
    */
   def pushOutputs() {
-    val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
-    for (remoteFile <- filterRemoteFiles(outputs)) {
-      logger.info("Pushing %s to %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription))
-      remoteFile.pushToRemote()
-    }
   }
 
   /**
-   * List out the remote outputs
-   * @return the RemoteFile outputs by argument source
+   * @return the inputs or null if there are no inputs
    */
-  def remoteInputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(inputFields))
+  def remoteInputs: AnyRef = null
 
   /**
-   * List out the remote outputs
-   * @return the RemoteFile outputs by argument source
+   * @return the outputs or null if there are no outputs
    */
-  def remoteOutputs: Map[String, Seq[RemoteFile]] = tagMap(remoteFieldMap(outputFields))
-
-  private def tagMap(remoteFieldMap: Map[ArgumentSource, Seq[RemoteFile]]): Map[String, Seq[RemoteFile]] = {
-    remoteFieldMap.collect{ case (k, v) => ClassFieldCache.fullName(k) -> v }.toMap
-  }
-
-  private def remoteFieldMap(fields: Seq[ArgumentSource]): Map[ArgumentSource, Seq[RemoteFile]] = {
-    fields.map(field => (field -> filterRemoteFiles(ClassFieldCache.getFieldFiles(this, field)))).filter(tuple => !tuple._2.isEmpty).toMap
-  }
-
-  private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] =
-    fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile])
+  def remoteOutputs: AnyRef = null
 
   /** The complete list of fields. */
   def functionFields: Seq[ArgumentSource] = ClassFieldCache.classFunctionFields(this.getClass)
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
index a1133b944..a69f68b8e 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/QStatusMessenger.scala
@@ -7,7 +7,7 @@ import org.broadinstitute.sting.queue.util.RemoteFile
  */
 trait QStatusMessenger {
   def started()
-  def done(inputs: Seq[Map[String, Seq[RemoteFile]]], outputs: Seq[Map[String, Seq[RemoteFile]]])
+  def done(inputs: Seq[_], outputs: Seq[_])
   def exit(message: String)
 
   def started(job: String)

From f837e6ced7ac1fc187eb98f3f2c707a18e7f4a8c Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Thu, 29 Nov 2012 14:38:09 -0500
Subject: [PATCH 160/236] Refactored entire NA12878KB to allow us to easily
 build a na12878kb.jar for IGV integration

-- Just separated infrastructure into core package, away from the walkers themselves.
-- Added na12878kb.jar target that builds a jar that can run a test main function (see testNA12878kbJar.csh)
---
 build.xml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/build.xml b/build.xml
index cc45467d8..834aef3cd 100644
--- a/build.xml
+++ b/build.xml
@@ -679,6 +679,24 @@
         </jar>
     </target>
 
+    <target name="na12878kb.jar" depends="gatk.compile,init.jar">
+        <jar jarfile="${dist.dir}/na12878kb.jar">
+            <fileset dir="${java.classes}">
+              <include name="org/broadinstitute/sting/utils/GenomeLocParser*.class"/>
+              <include name="org/broadinstitute/sting/utils/GenomeLoc.class"/>
+              <include name="org/broadinstitute/sting/utils/HasGenomeLocation.class"/>
+              <include name="org/broadinstitute/sting/utils/BaseUtils.class"/>
+              <include name="org/broadinstitute/sting/utils/Utils.class"/>
+              <include name="org/broadinstitute/sting/utils/exceptions/**/*.class"/>
+              <include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/**/*.class"/>
+              <include name="net/sf/picard/reference/FastaSequenceFile.class"/>
+            </fileset>
+	    <fileset dir="${java.private.source.dir}">
+              <include name="org/broadinstitute/sting/gatk/walkers/na12878kb/core/resources/**/*"/>
+	    </fileset>
+        </jar>
+    </target>
+
     <target name="gatk.jar" depends="gatk.compile, init.jar, R.script.stage" description="generate the GATK distribution">
         <jar jarfile="${dist.dir}/GenomeAnalysisTK.jar">
             <path refid="gatk.resources"/>

From daf6269b6503055b5f1a932ec4debb0a43a41bd3 Mon Sep 17 00:00:00 2001
From: Johan Dahlberg <johan.dahlberg@medsci.uu.se>
Date: Mon, 1 Oct 2012 11:28:46 +0200
Subject: [PATCH 161/236] Setting the walltime

Signed-off-by: Joel Thibault <thibault@broadinstitute.org>
---
 .../src/org/broadinstitute/sting/queue/QSettings.scala |  4 ++++
 .../sting/queue/engine/drmaa/DrmaaJobRunner.scala      |  3 +++
 .../sting/queue/function/CommandLineFunction.scala     | 10 ++++++++++
 3 files changed, 17 insertions(+)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index 2c0f43bac..fb21700ac 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -31,6 +31,10 @@ import org.broadinstitute.sting.commandline.{ClassType, Argument}
  * Default settings settable on the command line and passed to CommandLineFunctions.
  */
 class QSettings {
+  
+  @Argument(fullName="job_walltime", shortName="wallTime", doc="Setting the required walltime when using the drmaa job runner.", required=false)
+  var jobWalltime: Option[Long] = None
+  
   @Argument(fullName="run_name", shortName="runName", doc="A name for this run used for various status messages.", required=false)
   var runName: String = _
 
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
index 2aae2fc6b..31b314c79 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
@@ -65,6 +65,9 @@ class DrmaaJobRunner(val session: Session, val function: CommandLineFunction) ex
         drmaaJob.setJoinFiles(true)
       }
 
+      if(function.wallTime != null)
+    	  drmaaJob.setHardWallclockTimeLimit(function.wallTime.get)      
+      
       drmaaJob.setNativeSpecification(functionNativeSpec)
 
       // Instead of running the function.commandLine, run "sh <jobScript>"
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
index eb426d301..d5870a6c3 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
@@ -33,6 +33,9 @@ import org.broadinstitute.sting.commandline.Argument
 trait CommandLineFunction extends QFunction with Logging {
   def commandLine: String
 
+  /** Setting the wall time request for drmaa job*/
+  var wallTime: Option[Long] = None
+  
   /** Upper memory limit */
   @Argument(doc="Memory limit", required=false)
   var memoryLimit: Option[Double] = None
@@ -67,6 +70,9 @@ trait CommandLineFunction extends QFunction with Logging {
     super.copySettingsTo(function)
     function match {
       case commandLineFunction: CommandLineFunction =>
+        if(commandLineFunction.wallTime.isEmpty)
+          commandLineFunction.wallTime = this.wallTime
+        
         if (commandLineFunction.memoryLimit.isEmpty)
           commandLineFunction.memoryLimit = this.memoryLimit
 
@@ -110,6 +116,10 @@ trait CommandLineFunction extends QFunction with Logging {
    * Sets all field values.
    */
   override def freezeFieldValues() {
+   
+    if(wallTime.isEmpty)
+      wallTime = qSettings.jobWalltime
+    
     if (jobQueue == null)
       jobQueue = qSettings.jobQueue
 

From 97d29f203e35fd98393f61a28e78134da2b84755 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Fri, 12 Oct 2012 17:16:56 -0400
Subject: [PATCH 162/236] Add walltime changes to LSF - Check whether the
 specified attribute is available - Add pipeline test (disabled due to missing
 attribute)

---
 .../sting/jna/drmaa/v1_0/JnaSession.java       | 18 ++++++++++++++----
 .../broadinstitute/sting/queue/QSettings.scala |  8 ++++----
 .../queue/engine/drmaa/DrmaaJobRunner.scala    |  2 +-
 .../queue/engine/lsf/Lsf706JobRunner.scala     |  7 +++++--
 .../queue/function/CommandLineFunction.scala   |  2 +-
 .../examples/HelloWorldPipelineTest.scala      | 11 +++++++++++
 6 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
index 480113e1e..830c6590d 100644
--- a/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
+++ b/public/java/src/org/broadinstitute/sting/jna/drmaa/v1_0/JnaSession.java
@@ -210,13 +210,23 @@ public class JnaSession implements Session {
     }
 
     public static void setAttribute(Pointer jt, String name, String value) throws DrmaaException {
-        checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+        if (getAttrNames().contains(name)) {
+            checkError(LibDrmaa.drmaa_set_attribute(jt, name, value, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+        }
+        else {
+            throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+        }
     }
 
     public static String getAttribute(Pointer jt, String name) throws DrmaaException {
-        Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
-        checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
-        return attrBuffer.getString(0);
+        if (getAttrNames().contains(name)) {
+            Memory attrBuffer = new Memory(LibDrmaa.DRMAA_ATTR_BUFFER);
+            checkError(LibDrmaa.drmaa_get_attribute(jt, name, attrBuffer, LibDrmaa.DRMAA_ATTR_BUFFER_LEN, getError(), LibDrmaa.DRMAA_ERROR_STRING_BUFFER_LEN));
+            return attrBuffer.getString(0);
+        }
+        else {
+            throw new InvalidAttributeValueException("Attribute " + name + " is not supported by this implementation of DRMAA");
+        }
     }
 
     public static void setVectorAttribute(Pointer jt, String name, Collection<String> values) throws DrmaaException {
diff --git a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
index fb21700ac..b1e98a0e2 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QSettings.scala
@@ -31,10 +31,6 @@ import org.broadinstitute.sting.commandline.{ClassType, Argument}
  * Default settings settable on the command line and passed to CommandLineFunctions.
  */
 class QSettings {
-  
-  @Argument(fullName="job_walltime", shortName="wallTime", doc="Setting the required walltime when using the drmaa job runner.", required=false)
-  var jobWalltime: Option[Long] = None
-  
   @Argument(fullName="run_name", shortName="runName", doc="A name for this run used for various status messages.", required=false)
   var runName: String = _
 
@@ -76,6 +72,10 @@ class QSettings {
   @Argument(fullName="resident_memory_request_parameter", shortName="resMemReqParam", doc="Parameter for resident memory requests. By default not requested.", required=false)
   var residentRequestParameter: String = _
 
+  @Argument(fullName="job_walltime", shortName="wallTime", doc="Setting the required DRMAA walltime or LSF run limit.", required=false)
+  @ClassType(classOf[Long])
+  var jobWalltime: Option[Long] = None
+
   /** The name of the parallel environment (required for SGE, for example) */
   @Argument(fullName="job_parallel_env", shortName="jobParaEnv", doc="An SGE style parallel environment to use for jobs requesting more than 1 core.  Equivalent to submitting jobs with -pe ARG nt for jobs with nt > 1", required=false)
   var parallelEnvironmentName: String = "smp_pe" // Broad default
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
index 31b314c79..1dca22981 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/drmaa/DrmaaJobRunner.scala
@@ -65,7 +65,7 @@ class DrmaaJobRunner(val session: Session, val function: CommandLineFunction) ex
         drmaaJob.setJoinFiles(true)
       }
 
-      if(function.wallTime != null)
+      if(!function.wallTime.isEmpty)
     	  drmaaJob.setHardWallclockTimeLimit(function.wallTime.get)      
       
       drmaaJob.setNativeSpecification(functionNativeSpec)
diff --git a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
index 2fbea1497..5dc126e49 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/engine/lsf/Lsf706JobRunner.scala
@@ -151,8 +151,11 @@ class Lsf706JobRunner(val function: CommandLineFunction) extends CommandLineJobR
           throw new QException("setOption_() returned -1 while setting esub");
       }
 
-      // LSF specific: get the max runtime for the jobQueue and pass it for this job
-      request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)
+      if(!function.wallTime.isEmpty)
+        request.rLimits(LibLsf.LSF_RLIMIT_RUN) = function.wallTime.get.toInt
+      else
+        // LSF specific: get the max runtime for the jobQueue and pass it for this job
+        request.rLimits(LibLsf.LSF_RLIMIT_RUN) = Lsf706JobRunner.getRlimitRun(function.jobQueue)
 
       // Run the command as sh <jobScript>
       request.command = "sh " + jobScript
diff --git a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
index d5870a6c3..2453cc50a 100644
--- a/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/function/CommandLineFunction.scala
@@ -33,7 +33,7 @@ import org.broadinstitute.sting.commandline.Argument
 trait CommandLineFunction extends QFunction with Logging {
   def commandLine: String
 
-  /** Setting the wall time request for drmaa job*/
+  /** Setting the wall time request for DRMAA / run limit for LSF */
   var wallTime: Option[Long] = None
   
   /** Upper memory limit */
diff --git a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
index 50fc529dd..c8085784d 100644
--- a/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
+++ b/public/scala/test/org/broadinstitute/sting/queue/pipeline/examples/HelloWorldPipelineTest.scala
@@ -126,4 +126,15 @@ class HelloWorldPipelineTest {
     spec.jobRunners = Seq("GridEngine")
     PipelineTest.executeTest(spec)
   }
+
+  // disabled because our DRMAA implementation doesn't support wallTime
+  @Test(enabled=false, timeOut=36000000)
+  def testHelloWorldWithWalltime() {
+    val spec = new PipelineTestSpec
+    spec.name = "HelloWorldWithWalltime"
+    spec.args = "-S public/scala/qscript/org/broadinstitute/sting/queue/qscripts/examples/HelloWorld.scala" +
+      " -wallTime 100"
+    spec.jobRunners = PipelineTest.allJobRunners
+    PipelineTest.executeTest(spec)
+  }
 }

From fc7fab5f3b0798671d10b3371c87e62eb50c0ffe Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Thu, 29 Nov 2012 22:10:25 -0500
Subject: [PATCH 164/236] Fixed ReadBackedPileup downsampling

Downsampling in the PerSampleReadBackedPileup was broken, it didn't downsample anything, always returning a copy the original pileup.
---
 .../utils/pileup/AbstractReadBackedPileup.java     | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index 42938d2a6..25f0bfa6d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -652,23 +652,19 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
             PerSamplePileupElementTracker<PE> tracker = (PerSamplePileupElementTracker<PE>) pileupElementTracker;
             PerSamplePileupElementTracker<PE> filteredTracker = new PerSamplePileupElementTracker<PE>();
 
-            int current = 0;
 
             for (final String sample : tracker.getSamples()) {
                 PileupElementTracker<PE> perSampleElements = tracker.getElements(sample);
 
-                List<PileupElement> filteredPileup = new ArrayList<PileupElement>();
-                for (PileupElement p : perSampleElements) {
+                int current = 0;
+                UnifiedPileupElementTracker<PE> filteredPileup = new UnifiedPileupElementTracker<PE>();
+                for (PE p : perSampleElements) {
                     if (positions.contains(current))
                         filteredPileup.add(p);
-                }
+                    current++;
 
-                if (!filteredPileup.isEmpty()) {
-                    AbstractReadBackedPileup<RBP, PE> pileup = createNewPileup(loc, perSampleElements);
-                    filteredTracker.addElements(sample, pileup.pileupElementTracker);
                 }
-
-                current++;
+                filteredTracker.addElements(sample, filteredPileup);
             }
 
             return (RBP) createNewPileup(loc, filteredTracker);

From 8020ba14db0bb38422510729d50efcf044a71543 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 30 Nov 2012 15:04:33 -0500
Subject: [PATCH 171/236] Minor cleanup of SAMDataSource as part of my system
 review

-- Changed a few function from public to protected, as they are only used by the package contents, to simplify the SAMDataSource interface
---
 .../sting/gatk/datasources/reads/SAMDataSource.java       | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index bb788c89f..88de3ac9b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -30,12 +30,10 @@ import net.sf.samtools.*;
 import net.sf.samtools.util.CloseableIterator;
 import net.sf.samtools.util.RuntimeIOException;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.downsampling.*;
-import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
-import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadMetrics;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.downsampling.*;
 import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.*;
@@ -567,7 +565,7 @@ public class SAMDataSource {
      *
      * @return the start positions of the first chunk of reads for all BAM files
      */
-    public Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
+    protected Map<SAMReaderID, GATKBAMFileSpan> getInitialReaderPositions() {
         Map<SAMReaderID, GATKBAMFileSpan> initialPositions = new HashMap<SAMReaderID, GATKBAMFileSpan>();
         SAMReaders readers = resourcePool.getAvailableReaders();
 
@@ -585,7 +583,7 @@ public class SAMDataSource {
      * @param shard The shard specifying the data limits.
      * @return An iterator over the selected data.
      */
-    public StingSAMIterator getIterator( Shard shard ) {
+    protected StingSAMIterator getIterator( Shard shard ) {
         return getIterator(resourcePool.getAvailableReaders(), shard, shard instanceof ReadShard);
     }
 

From 969c995298f2b6cd17872f6887464e8709614f45 Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Sat, 1 Dec 2012 00:08:19 -0500
Subject: [PATCH 174/236] work under development - catVariants.   Changes to
 AssessRRQuals based on Eric todo comments.  bug fix in CombineVariants

---
 .../gatk/walkers/qc/AssessReducedQuals.java   | 44 ++++++++++++-------
 .../variantcontext/VariantContextUtils.java   |  8 ++--
 2 files changed, 31 insertions(+), 21 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
index 78bcf1228..2c70d44e2 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
@@ -45,7 +45,6 @@ import java.util.List;
 public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
 
     private static final String reduced = "reduced";
-    private static final String original = "original";
     private static final int originalQualsIndex = 0;
     private static final int reducedQualsIndex = 1;
 
@@ -55,14 +54,14 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
     @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
     public int qual_epsilon = 0;
 
-    @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug mode on")   // TODO -- best to make this optional
-    public int debugLevel = 0;   // TODO -- best to make this an enum or boolean
+    @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug level: NO_DEBUG, PRINT_LOCI,PRINT_PILEUPS", required = false)
+    public DebugLevel debugLevel = DebugLevel.NO_DEBUG;
 
     @Output
     protected PrintStream out;
 
     public void initialize() {
-        if (debugLevel != 0)
+        if (debugLevel != DebugLevel.NO_DEBUG)
             out.println("  Debug mode" +
                         "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
                         "Debug:\tqual_epsilon: "+qual_epsilon);
@@ -78,20 +77,20 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
 
         boolean reportLocus;
         final int[] quals = getPileupQuals(context.getBasePileup());
-        if (debugLevel != 0)
+        if (debugLevel != DebugLevel.NO_DEBUG)
             out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
         final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
         final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
         final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
         final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
         reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
-        if(debugLevel != 0 && reportLocus)
+        if(debugLevel != DebugLevel.NO_DEBUG && reportLocus)
             out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
 
         return reportLocus ? ref.getLocus() : null;
     }
 
-    private final int[] getPileupQuals(final ReadBackedPileup readPileup) {
+    private int[] getPileupQuals(final ReadBackedPileup readPileup) {
 
         final int[] quals = new int[2];
         String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
@@ -99,30 +98,29 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
 
         for( PileupElement p : readPileup ){
             final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
-            if ( isGoodRead(p,tags) ){
+            if ( isGoodRead(p) ){
                 final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
                 final int tagIndex = getTagIndex(tags);
                 quals[tagIndex] += tempQual;
-                if(debugLevel == 2)
+                if(debugLevel == DebugLevel.PRINT_PILEUPS)
                     printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
             }
         }
-        if(debugLevel == 2){
+        if(debugLevel == DebugLevel.PRINT_PILEUPS){
             out.println(printPileup[originalQualsIndex]);
             out.println(printPileup[reducedQualsIndex]);
         }
         return quals;
     }
 
-    // TODO -- arguments/variables should be final, not method declaration
-    private final boolean isGoodRead(PileupElement p, List<String> tags){
-        // TODO -- this isn't quite right.  You don't need the tags here.  Instead, you want to check whether the read itself (which
-        // TODO --  you can get from the PileupElement) is a reduced read (not all reads from the reduced bam are reduced) and only
-        // TODO --  for them do you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
-        return !p.isDeletion() && (tags.contains(reduced) || (tags.contains(original) && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
+
+    private boolean isGoodRead(PileupElement p){
+        // TODO -- You want to check whether the read itself is a reduced read and only
+        // TODO --  for them you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
+        return !p.isDeletion() && ((p.getRead().isReducedRead()) || (!p.getRead().isReducedRead() && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
     }
 
-    private final int getTagIndex(List<String> tags){
+    private int getTagIndex(List<String> tags){
         return tags.contains(reduced) ? 1 : 0;
     }
 
@@ -170,4 +168,16 @@ public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implem
         out.println(sum);
         return value;
     }
+
+    public enum DebugLevel {
+        NO_DEBUG,
+        /**
+         * Print locus level information (such as locus quals) and loci with unmatch quals
+         */
+        PRINT_LOCI,
+        /**
+         * Print the pileup infomarion of the reduced bam files and the original bam files
+         */
+        PRINT_PILEUPS
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 614b234e9..3b6bd0182 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -451,7 +451,7 @@ public class VariantContextUtils {
         if ( unsortedVCs == null || unsortedVCs.size() == 0 )
             return null;
 
-        if ( annotateOrigin && priorityListOfVCs == null )
+        if ( annotateOrigin && priorityListOfVCs == null && genotypeMergeOptions == GenotypeMergeType.PRIORITIZE)
             throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");
 
         if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
@@ -597,7 +597,7 @@ public class VariantContextUtils {
 
         if ( annotateOrigin ) { // we care about where the call came from
             String setValue;
-            if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
+            if ( nFiltered == 0 && variantSources.size() == preFilteredVCs.size() ) // nothing was unfiltered
                 setValue = MERGE_INTERSECTION;
             else if ( nFiltered == VCs.size() )     // everything was filtered out
                 setValue = MERGE_FILTER_IN_ALL;
@@ -840,9 +840,9 @@ public class VariantContextUtils {
         if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
             throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
 
-        if ( mergeOption == GenotypeMergeType.UNSORTED ){
+        if ( mergeOption != GenotypeMergeType.PRIORITIZE ){
             if (priorityListOfVCs != null )
-                logger.info("Priority string was provided but is not used since GenotypeMergeType is UNSORTED");
+                logger.info("Priority string was provided but is not used since GenotypeMergeType is not PRIORITIZE");
             return new ArrayList<VariantContext>(unsortedVCs);
         }
         else {

From 2849889af5ccb850f297c644c23ca0e40e887f2f Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Sat, 1 Dec 2012 14:23:57 -0500
Subject: [PATCH 175/236] Updating md5 for UG

---
 .../gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 9d12b0ded..8ded61af8 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -351,7 +351,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
                 baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
                         "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
-                Arrays.asList("c526c234947482d1cd2ffc5102083a08"));
+                Arrays.asList("1256a7eceff2c2374c231ff981df486d"));
         executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
     }
 

From 1bdf17ef53710e1b1c8633e6dcc4b2c549d48a82 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Sun, 2 Dec 2012 11:58:32 -0500
Subject: [PATCH 178/236] Reworking of how the likelihood calculation is
 organized in the HaplotypeCaller to facilitate the inclusion of per allele
 downsampling. We now use the downsampling for both the GL calculations and
 the annotation calculations.

---
 .../haplotypecaller/GenotypingEngine.java     | 137 ++++++++++++----
 .../haplotypecaller/HaplotypeCaller.java      |  35 ++---
 .../LikelihoodCalculationEngine.java          | 148 ++++--------------
 .../indels/PairHMMIndelErrorModel.java        |   1 -
 .../broadinstitute/sting/utils/Haplotype.java |  27 ----
 .../genotyper/PerReadAlleleLikelihoodMap.java |   8 +-
 6 files changed, 157 insertions(+), 199 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index 4fc2dc8f7..6f94e2657 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -30,12 +30,16 @@ import com.google.java.contract.Requires;
 import net.sf.samtools.Cigar;
 import net.sf.samtools.CigarElement;
 import org.apache.commons.lang.ArrayUtils;
+import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.broadinstitute.sting.utils.variantcontext.*;
 
+import java.io.PrintStream;
 import java.util.*;
 
 public class GenotypingEngine {
@@ -43,23 +47,27 @@ public class GenotypingEngine {
     private final boolean DEBUG;
     private final static List<Allele> noCall = new ArrayList<Allele>(); // used to noCall all genotypes until the exact model is applied
     private final static Allele SYMBOLIC_UNASSEMBLED_EVENT_ALLELE = Allele.create("<UNASSEMBLED_EVENT>", false);
+    private final VariantAnnotatorEngine annotationEngine;
 
-    public GenotypingEngine( final boolean DEBUG ) {
+    public GenotypingEngine( final boolean DEBUG, final VariantAnnotatorEngine annotationEngine ) {
         this.DEBUG = DEBUG;
+        this.annotationEngine = annotationEngine;
         noCall.add(Allele.NO_CALL);
     }
 
-    // BUGBUG: Create a class to hold this complicated return type
     @Requires({"refLoc.containsP(activeRegionWindow)", "haplotypes.size() > 0"})
-    public List<Pair<VariantContext, Map<Allele, List<Haplotype>>>> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
-                                                                                                                       final List<Haplotype> haplotypes,
-                                                                                                                       final byte[] ref,
-                                                                                                                       final GenomeLoc refLoc,
-                                                                                                                       final GenomeLoc activeRegionWindow,
-                                                                                                                       final GenomeLocParser genomeLocParser,
-                                                                                                                       final List<VariantContext> activeAllelesToGenotype ) {
+    public List<VariantContext> assignGenotypeLikelihoodsAndCallIndependentEvents( final UnifiedGenotyperEngine UG_engine,
+                                                                                   final List<Haplotype> haplotypes,
+                                                                                   final List<String> samples,
+                                                                                   final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
+                                                                                   final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
+                                                                                   final byte[] ref,
+                                                                                   final GenomeLoc refLoc,
+                                                                                   final GenomeLoc activeRegionWindow,
+                                                                                   final GenomeLocParser genomeLocParser,
+                                                                                   final List<VariantContext> activeAllelesToGenotype ) {
 
-        final ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>> returnCalls = new ArrayList<Pair<VariantContext, Map<Allele, List<Haplotype>>>>();
+        final List<VariantContext> returnCalls = new ArrayList<VariantContext>();
         final boolean in_GGA_mode = !activeAllelesToGenotype.isEmpty();
 
         // Using the cigar from each called haplotype figure out what events need to be written out in a VCF file
@@ -79,8 +87,8 @@ public class GenotypingEngine {
         }
 
         cleanUpSymbolicUnassembledEvents( haplotypes );
-        if( !in_GGA_mode && haplotypes.get(0).getSampleKeySet().size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
-            mergeConsecutiveEventsBasedOnLD( haplotypes, startPosKeySet, ref, refLoc );
+        if( !in_GGA_mode && samples.size() >= 10 ) { // if not in GGA mode and have at least 10 samples try to create MNP and complex events by looking at LD structure
+            mergeConsecutiveEventsBasedOnLD( haplotypes, samples, haplotypeReadMap, startPosKeySet, ref, refLoc );
         }
         if( in_GGA_mode ) {
             for( final VariantContext compVC : activeAllelesToGenotype ) {
@@ -90,7 +98,7 @@ public class GenotypingEngine {
 
         // Walk along each position in the key set and create each event to be outputted
         for( final int loc : startPosKeySet ) {
-            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) {
+            if( loc >= activeRegionWindow.getStart() && loc <= activeRegionWindow.getStop() ) { // genotyping an event inside this active region
                 final ArrayList<VariantContext> eventsAtThisLoc = new ArrayList<VariantContext>(); // the overlapping events to merge into a common reference view
                 final ArrayList<String> priorityList = new ArrayList<String>(); // used to merge overlapping events into common reference view
 
@@ -167,12 +175,14 @@ public class GenotypingEngine {
                     //System.out.println("Event/haplotype allele mapping = " + alleleMapper);
                 }
 
+                final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = convertHaplotypeReadMapToAlleleReadMap( haplotypeReadMap, alleleMapper, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
+
                 // Grab the genotype likelihoods from the appropriate places in the haplotype likelihood matrix -- calculation performed independently per sample
-                final GenotypesContext genotypes = GenotypesContext.create(haplotypes.get(0).getSampleKeySet().size());
-                for( final String sample : haplotypes.get(0).getSampleKeySet() ) { // BUGBUG: assume all haplotypes saw the same samples
+                final GenotypesContext genotypes = GenotypesContext.create(samples.size());
+                for( final String sample : samples ) {
                     final int numHaplotypes = alleleMapper.size();
                     final double[] genotypeLikelihoods = new double[numHaplotypes * (numHaplotypes+1) / 2];
-                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleMapper, alleleOrdering);
+                    final double[][] haplotypeLikelihoodMatrix = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods(sample, alleleReadMap, alleleOrdering);
                     int glIndex = 0;
                     for( int iii = 0; iii < numHaplotypes; iii++ ) {
                         for( int jjj = 0; jjj <= iii; jjj++ ) {
@@ -183,25 +193,55 @@ public class GenotypingEngine {
                 }
                 VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
                 if( call != null ) {
-                    if( call.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
-                        final VariantContext vcCallTrim = VariantContextUtils.reverseTrimAlleles(call);
-                        // also, need to update the allele -> haplotype mapping
-                        final HashMap<Allele, List<Haplotype>> alleleHashMapTrim = new HashMap<Allele, List<Haplotype>>();
-                        for( int iii = 0; iii < vcCallTrim.getAlleles().size(); iii++ ) { // BUGBUG: this is assuming that the original and trimmed alleles maintain the same ordering in the VC
-                            alleleHashMapTrim.put(vcCallTrim.getAlleles().get(iii), alleleMapper.get(call.getAlleles().get(iii)));
-                        }
+                    final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap, perSampleFilteredReadList, call );
+                    VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
 
-                        call = vcCallTrim;
-                        alleleMapper = alleleHashMapTrim;
+                    if( annotatedCall.getAlleles().size() != mergedVC.getAlleles().size() ) { // some alleles were removed so reverseTrimming might be necessary!
+                        annotatedCall = VariantContextUtils.reverseTrimAlleles(annotatedCall);
                     }
 
-                    returnCalls.add( new Pair<VariantContext, Map<Allele,List<Haplotype>>>(call, alleleMapper) );
+                    returnCalls.add( annotatedCall );
                 }
             }
         }
         return returnCalls;
     }
 
+    private static Map<String, PerReadAlleleLikelihoodMap> filterToOnlyOverlappingReads( final GenomeLocParser parser,
+                                                                                         final Map<String, PerReadAlleleLikelihoodMap> perSampleReadMap,
+                                                                                         final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
+                                                                                         final VariantContext call ) {
+
+        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
+        final GenomeLoc callLoc = parser.createGenomeLoc(call);
+        for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
+            final PerReadAlleleLikelihoodMap likelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
+
+            for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : likelihoodMap.getLikelihoodReadMap().entrySet() ) {
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(mapEntry.getKey())) ) {
+                    for( final Map.Entry<Allele,Double> a : mapEntry.getValue().entrySet() ) {
+                        likelihoodMap.add(mapEntry.getKey(), a.getKey(), a.getValue());
+                    }
+                }
+            }
+
+            // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
+            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
+                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
+                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
+                    for( final Allele a : call.getAlleles() ) {
+                        likelihoodMap.add(read, a, 0.0);
+                    }
+                }
+            }
+
+            returnMap.put(sample.getKey(), likelihoodMap);
+        }
+        return returnMap;
+    }
+
+
     protected static void cleanUpSymbolicUnassembledEvents( final List<Haplotype> haplotypes ) {
         final ArrayList<Haplotype> haplotypesToRemove = new ArrayList<Haplotype>();
         for( final Haplotype h : haplotypes ) {
@@ -221,7 +261,41 @@ public class GenotypingEngine {
         haplotypes.removeAll(haplotypesToRemove);
     }
 
-    protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes, final TreeSet<Integer> startPosKeySet, final byte[] ref, final GenomeLoc refLoc ) {
+    // BUGBUG: ugh, too complicated
+    protected Map<String, PerReadAlleleLikelihoodMap> convertHaplotypeReadMapToAlleleReadMap( final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
+                                                                                              final Map<Allele, List<Haplotype>> alleleMapper,
+                                                                                              final double downsamplingFraction,
+                                                                                              final PrintStream downsamplingLog ) {
+
+        final Map<String, PerReadAlleleLikelihoodMap> alleleReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
+        for( final Map.Entry<String, PerReadAlleleLikelihoodMap> haplotypeReadMapEntry : haplotypeReadMap.entrySet() ) { // for each sample
+            final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
+            for( final Map.Entry<Allele, List<Haplotype>> alleleMapperEntry : alleleMapper.entrySet() ) { // for each output allele
+                final List<Haplotype> mappedHaplotypes = alleleMapperEntry.getValue();
+                for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> readEntry : haplotypeReadMapEntry.getValue().getLikelihoodReadMap().entrySet() ) { // for each read
+                    double maxLikelihood = Double.NEGATIVE_INFINITY;
+                    for( final Map.Entry<Allele,Double> alleleDoubleEntry : readEntry.getValue().entrySet() ) { // for each input allele
+                        if( mappedHaplotypes.contains( new Haplotype(alleleDoubleEntry.getKey().getBases())) ) { // exact match of haplotype base string
+                            maxLikelihood = Math.max( maxLikelihood, alleleDoubleEntry.getValue() );
+                        }
+                    }
+                    perReadAlleleLikelihoodMap.add(readEntry.getKey(), alleleMapperEntry.getKey(), maxLikelihood);
+                }
+            }
+            perReadAlleleLikelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog); // perform contamination downsampling
+            alleleReadMap.put(haplotypeReadMapEntry.getKey(), perReadAlleleLikelihoodMap);
+        }
+
+        return alleleReadMap;
+    }
+
+    protected void mergeConsecutiveEventsBasedOnLD( final List<Haplotype> haplotypes,
+                                                    final List<String> samples,
+                                                    final Map<String, PerReadAlleleLikelihoodMap> haplotypeReadMap,
+                                                    final TreeSet<Integer> startPosKeySet,
+                                                    final byte[] ref,
+                                                    final GenomeLoc refLoc ) {
+
         final int MAX_SIZE_TO_COMBINE = 15;
         final double MERGE_EVENTS_R2_THRESHOLD = 0.95;
         if( startPosKeySet.size() <= 1 ) { return; }
@@ -265,12 +339,13 @@ public class GenotypingEngine {
                             }
                         }
                         // count up the co-occurrences of the events for the R^2 calculation
-                        final ArrayList<Haplotype> haplotypeList = new ArrayList<Haplotype>();
-                        haplotypeList.add(h);
-                        for( final String sample : haplotypes.get(0).getSampleKeySet() ) {
+                        for( final String sample : samples ) {
                             final HashSet<String> sampleSet = new HashSet<String>(1);
                             sampleSet.add(sample);
-                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet,  haplotypeList )[0][0];
+
+                            final List<Allele> alleleList = new ArrayList<Allele>();
+                            alleleList.add(Allele.create(h.getBases()));
+                            final double haplotypeLikelihood = LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods( sampleSet, haplotypeReadMap, alleleList )[0][0];
                             if( thisHapVC == null ) {
                                 if( nextHapVC == null ) { x11 = MathUtils.approximateLog10SumLog10(x11, haplotypeLikelihood); }
                                 else { x12 = MathUtils.approximateLog10SumLog10(x12, haplotypeLikelihood); }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
index 24b3309f1..2b3513bef 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java
@@ -202,9 +202,6 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
     // the genotyping engine
     private GenotypingEngine genotypingEngine = null;
 
-    // the annotation engine
-    private VariantAnnotatorEngine annotationEngine;
-
     // fasta reference reader to supplement the edges of the reference sequence
     private CachingIndexedFastaSequenceFile referenceReader;
 
@@ -249,7 +246,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
         UG_engine_simple_genotyper = new UnifiedGenotyperEngine(getToolkit(), simpleUAC, logger, null, null, samples, VariantContextUtils.DEFAULT_PLOIDY);
 
         // initialize the output VCF header
-        annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
+        final VariantAnnotatorEngine annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, annotationsToExclude, this, getToolkit());
 
         Set<VCFHeaderLine> headerInfo = new HashSet<VCFHeaderLine>();
 
@@ -282,7 +279,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
 
         assemblyEngine = new SimpleDeBruijnAssembler( DEBUG, graphWriter );
         likelihoodCalculationEngine = new LikelihoodCalculationEngine( (byte)gcpHMM, DEBUG, pairHMM );
-        genotypingEngine = new GenotypingEngine( DEBUG );
+        genotypingEngine = new GenotypingEngine( DEBUG, annotationEngine );
     }
 
     //---------------------------------------------------------------------------------------------------------------
@@ -398,21 +395,23 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
         Collections.sort( haplotypes, new Haplotype.HaplotypeBaseComparator() );
 
         // evaluate each sample's reads against all haplotypes
-        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList = splitReadsBySample( activeRegion.getReads() );
-        final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
-        likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, perSampleReadList );
+        final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = likelihoodCalculationEngine.computeReadLikelihoods( haplotypes, splitReadsBySample( activeRegion.getReads() ) );
+        final Map<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList = splitReadsBySample( filteredReads );
 
         // subset down to only the best haplotypes to be genotyped in all samples ( in GGA mode use all discovered haplotypes )
-        final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes ) : haplotypes );
+        final ArrayList<Haplotype> bestHaplotypes = ( UG_engine.getUAC().GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ? likelihoodCalculationEngine.selectBestHaplotypes( haplotypes, stratifiedReadMap ) : haplotypes );
 
-        for( final Pair<VariantContext, Map<Allele, List<Haplotype>>> callResult :
-                genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine, bestHaplotypes, fullReferenceWithPadding, getPaddedLoc(activeRegion), activeRegion.getLocation(), getToolkit().getGenomeLocParser(), activeAllelesToGenotype ) ) {
-            if( DEBUG ) { System.out.println(callResult.getFirst().toStringWithoutGenotypes()); }
-
-            final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = LikelihoodCalculationEngine.partitionReadsBasedOnLikelihoods( getToolkit().getGenomeLocParser(), perSampleReadList, perSampleFilteredReadList, callResult, UG_engine.getUAC().CONTAMINATION_FRACTION, UG_engine.getUAC().contaminationLog );
-            final VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, callResult.getFirst());
-            final Map<String, Object> myAttributes = new LinkedHashMap<String, Object>(annotatedCall.getAttributes());
-            vcfWriter.add( new VariantContextBuilder(annotatedCall).attributes(myAttributes).make() );
+        for( final VariantContext call : genotypingEngine.assignGenotypeLikelihoodsAndCallIndependentEvents( UG_engine,
+                                                                                                             bestHaplotypes,
+                                                                                                             samplesList,
+                                                                                                             stratifiedReadMap,
+                                                                                                             perSampleFilteredReadList,
+                                                                                                             fullReferenceWithPadding,
+                                                                                                             getPaddedLoc(activeRegion),
+                                                                                                             activeRegion.getLocation(),
+                                                                                                             getToolkit().getGenomeLocParser(),
+                                                                                                             activeAllelesToGenotype ) ) {
+            vcfWriter.add( call );
         }
 
         if( DEBUG ) { System.out.println("----------------------------------------------------------------------------------"); }
@@ -467,7 +466,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<Integer, Integer> implem
             if( postAdapterRead != null && !postAdapterRead.isEmpty() && postAdapterRead.getCigar().getReadLength() > 0 ) {
                 final GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualEnds( postAdapterRead, MIN_TAIL_QUALITY );
                 // protect against INTERVALS with abnormally high coverage
-                // BUGBUG: remove when positinal downsampler is hooked up to ART/HC
+                // BUGBUG: remove when positional downsampler is hooked up to ART/HC
                 if( clippedRead.getReadLength() > 0 && activeRegion.size() < samplesList.size() * DOWNSAMPLE_PER_SAMPLE_PER_REGION ) {
                     activeRegion.add(clippedRead);
                 }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 29622ca17..4a5c7fe9b 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -71,8 +71,9 @@ public class LikelihoodCalculationEngine {
         DEBUG = debug;
     }
 
-    public void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
+    public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList ) {
 
+        final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
         int X_METRIC_LENGTH = 0;
         for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
             for( final GATKSAMRecord read : sample.getValue() ) {
@@ -97,20 +98,16 @@ public class LikelihoodCalculationEngine {
         for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sampleEntry : perSampleReadList.entrySet() ) {
             //if( DEBUG ) { System.out.println("Evaluating sample " + sample + " with " + perSampleReadList.get( sample ).size() + " passing reads"); }
             // evaluate the likelihood of the reads given those haplotypes
-            computeReadLikelihoods( haplotypes, sampleEntry.getValue(), sampleEntry.getKey() );
+            stratifiedReadMap.put(sampleEntry.getKey(), computeReadLikelihoods(haplotypes, sampleEntry.getValue(), sampleEntry.getKey()));
         }
+        return stratifiedReadMap;
     }
 
-    private void computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample ) {
+    private PerReadAlleleLikelihoodMap computeReadLikelihoods( final ArrayList<Haplotype> haplotypes, final ArrayList<GATKSAMRecord> reads, final String sample ) {
 
+        final PerReadAlleleLikelihoodMap perReadAlleleLikelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
         final int numHaplotypes = haplotypes.size();
-        final int numReads = reads.size();
-        final double[][] readLikelihoods = new double[numHaplotypes][numReads];
-        final int[][] readCounts = new int[numHaplotypes][numReads];
-        for( int iii = 0; iii < numReads; iii++ ) {
-            final GATKSAMRecord read = reads.get(iii);
-            final int readCount = ReadUtils.getMeanRepresentativeReadCount(read);
-
+        for( final GATKSAMRecord read : reads ) {
             final byte[] overallGCP = new byte[read.getReadLength()];
             Arrays.fill( overallGCP, constantGCP ); // Is there a way to derive empirical estimates for this from the data?
             Haplotype previousHaplotypeSeen = null;
@@ -129,14 +126,12 @@ public class LikelihoodCalculationEngine {
                 final int haplotypeStart = ( previousHaplotypeSeen == null ? 0 : computeFirstDifferingPosition(haplotype.getBases(), previousHaplotypeSeen.getBases()) );
                 previousHaplotypeSeen = haplotype;
 
-                readLikelihoods[jjj][iii] = pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(),
-                        readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, jjj == 0);
-                readCounts[jjj][iii] = readCount;
+                perReadAlleleLikelihoodMap.add(read, Allele.create(haplotype.getBases()),
+                        pairHMM.computeReadLikelihoodGivenHaplotypeLog10(haplotype.getBases(), read.getReadBases(),
+                                readQuals, readInsQuals, readDelQuals, overallGCP, haplotypeStart, jjj == 0));
             }
         }
-        for( int jjj = 0; jjj < numHaplotypes; jjj++ ) {
-            haplotypes.get(jjj).addReadLikelihoods( sample, readLikelihoods[jjj], readCounts[jjj] );
-        }
+        return perReadAlleleLikelihoodMap;
     }
 
     private static int computeFirstDifferingPosition( final byte[] b1, final byte[] b2 ) {
@@ -148,19 +143,21 @@ public class LikelihoodCalculationEngine {
         return Math.min(b1.length, b2.length);
     }
 
-    // This function takes just a single sample and a haplotypeMapping
     @Requires({"haplotypeMapping.size() > 0"})
     @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final String sample,
+                                                                 final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap,
+                                                                 final List<Allele> alleleOrdering ) {
         final TreeSet<String> sampleSet = new TreeSet<String>();
         sampleSet.add(sample);
-        return computeDiploidHaplotypeLikelihoods(sampleSet, haplotypeMapping, alleleOrdering);
+        return computeDiploidHaplotypeLikelihoods(sampleSet, stratifiedReadMap, alleleOrdering);
     }
 
-    // This function takes a set of samples to pool over and a haplotypeMapping
     @Requires({"haplotypeMapping.size() > 0"})
     @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final Map<Allele, List<Haplotype>> haplotypeMapping, final List<Allele> alleleOrdering ) {
+    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples,
+                                                                 final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap,
+                                                                 final List<Allele> alleleOrdering ) {
 
         final int numHaplotypes = alleleOrdering.size();
         final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
@@ -170,59 +167,19 @@ public class LikelihoodCalculationEngine {
 
         // compute the diploid haplotype likelihoods
         for( int iii = 0; iii < numHaplotypes; iii++ ) {
+            final Allele iii_allele = alleleOrdering.get(iii);
             for( int jjj = 0; jjj <= iii; jjj++ ) {
-                for( final Haplotype iii_mapped : haplotypeMapping.get(alleleOrdering.get(iii)) ) {
-                    for( final Haplotype jjj_mapped : haplotypeMapping.get(alleleOrdering.get(jjj)) ) {
-                        double haplotypeLikelihood = 0.0;
-                        for( final String sample : samples ) {
-                            final double[] readLikelihoods_iii = iii_mapped.getReadLikelihoods(sample);
-                            final int[] readCounts_iii = iii_mapped.getReadCounts(sample);
-                            final double[] readLikelihoods_jjj = jjj_mapped.getReadLikelihoods(sample);
-                            for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
-                                // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
-                                // First term is approximated by Jacobian log with table lookup.
-                                haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
-                            }
-                        }
-                        haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
-                    }
-                }
-            }
-        }
-
-        // normalize the diploid likelihoods matrix
-        return normalizeDiploidLikelihoodMatrixFromLog10( haplotypeLikelihoodMatrix );
-    }
-
-    // This function takes a set of samples to pool over and a haplotypeMapping
-    @Requires({"haplotypeList.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypeList.size()"})
-    public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples, final List<Haplotype> haplotypeList ) {
-
-        final int numHaplotypes = haplotypeList.size();
-        final double[][] haplotypeLikelihoodMatrix = new double[numHaplotypes][numHaplotypes];
-        for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            Arrays.fill(haplotypeLikelihoodMatrix[iii], Double.NEGATIVE_INFINITY);
-        }
-
-        // compute the diploid haplotype likelihoods
-        // todo - needs to be generalized to arbitrary ploidy, cleaned and merged with PairHMMIndelErrorModel code
-        for( int iii = 0; iii < numHaplotypes; iii++ ) {
-            final Haplotype iii_haplotype = haplotypeList.get(iii);
-            for( int jjj = 0; jjj <= iii; jjj++ ) {
-                final Haplotype jjj_haplotype = haplotypeList.get(jjj);
+                final Allele jjj_allele = alleleOrdering.get(jjj);
                 double haplotypeLikelihood = 0.0;
                 for( final String sample : samples ) {
-                    final double[] readLikelihoods_iii = iii_haplotype.getReadLikelihoods(sample);
-                    final int[] readCounts_iii = iii_haplotype.getReadCounts(sample);
-                    final double[] readLikelihoods_jjj = jjj_haplotype.getReadLikelihoods(sample);
-                    for( int kkk = 0; kkk < readLikelihoods_iii.length; kkk++ ) {
+                    for( final Map.Entry<GATKSAMRecord, Map<Allele,Double>> entry : stratifiedReadMap.get(sample).getLikelihoodReadMap().entrySet() ) {
                         // Compute log10(10^x1/2 + 10^x2/2) = log10(10^x1+10^x2)-log10(2)
                         // First term is approximated by Jacobian log with table lookup.
-                        haplotypeLikelihood += readCounts_iii[kkk] * ( MathUtils.approximateLog10SumLog10(readLikelihoods_iii[kkk], readLikelihoods_jjj[kkk]) + LOG_ONE_HALF );
+                        haplotypeLikelihood += ReadUtils.getMeanRepresentativeReadCount( entry.getKey() ) *
+                                ( MathUtils.approximateLog10SumLog10(entry.getValue().get(iii_allele), entry.getValue().get(jjj_allele)) + LOG_ONE_HALF );
                     }
                 }
-                haplotypeLikelihoodMatrix[iii][jjj] = Math.max(haplotypeLikelihoodMatrix[iii][jjj], haplotypeLikelihood);
+                haplotypeLikelihoodMatrix[iii][jjj] = haplotypeLikelihood;
             }
         }
 
@@ -312,13 +269,16 @@ public class LikelihoodCalculationEngine {
 
     @Requires({"haplotypes.size() > 0"})
     @Ensures({"result.size() <= haplotypes.size()"})
-    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes ) {
+    public ArrayList<Haplotype> selectBestHaplotypes( final ArrayList<Haplotype> haplotypes, final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap ) {
 
         final int numHaplotypes = haplotypes.size();
-        final Set<String> sampleKeySet = haplotypes.get(0).getSampleKeySet(); // BUGBUG: assume all haplotypes saw the same samples
+        final Set<String> sampleKeySet = stratifiedReadMap.keySet();
         final ArrayList<Integer> bestHaplotypesIndexList = new ArrayList<Integer>();
         bestHaplotypesIndexList.add( findReferenceIndex(haplotypes) ); // always start with the reference haplotype
-        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, haplotypes ); // all samples pooled together
+        final List<Allele> haplotypesAsAlleles = new ArrayList<Allele>();
+        for( final Haplotype h : haplotypes ) { haplotypesAsAlleles.add(Allele.create(h.getBases())); }
+
+        final double[][] haplotypeLikelihoodMatrix = computeDiploidHaplotypeLikelihoods( sampleKeySet, stratifiedReadMap, haplotypesAsAlleles ); // all samples pooled together
 
         int hap1 = 0;
         int hap2 = 0;
@@ -358,52 +318,4 @@ public class LikelihoodCalculationEngine {
         }
         throw new ReviewedStingException( "No reference haplotype found in the list of haplotypes!" );
     }
-
-    public static Map<String, PerReadAlleleLikelihoodMap> partitionReadsBasedOnLikelihoods( final GenomeLocParser parser,
-                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleReadList,
-                                                                                            final HashMap<String, ArrayList<GATKSAMRecord>> perSampleFilteredReadList,
-                                                                                            final Pair<VariantContext, Map<Allele,List<Haplotype>>> call,
-                                                                                            final double downsamplingFraction,
-                                                                                            final PrintStream downsamplingLog ) {
-        final Map<String, PerReadAlleleLikelihoodMap> returnMap = new HashMap<String, PerReadAlleleLikelihoodMap>();
-        final GenomeLoc callLoc = parser.createGenomeLoc(call.getFirst());
-        for( final Map.Entry<String, ArrayList<GATKSAMRecord>> sample : perSampleReadList.entrySet() ) {
-            final PerReadAlleleLikelihoodMap likelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
-
-            final ArrayList<GATKSAMRecord> readsForThisSample = sample.getValue();
-            for( int iii = 0; iii < readsForThisSample.size(); iii++ ) {
-                final GATKSAMRecord read = readsForThisSample.get(iii); // BUGBUG: assumes read order in this list and haplotype likelihood list are the same!
-                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
-                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
-                    for( final Allele a : call.getFirst().getAlleles() ) {
-                        double maxLikelihood = Double.NEGATIVE_INFINITY;
-                        for( final Haplotype h : call.getSecond().get(a) ) { // use the max likelihood from all the haplotypes which mapped to this allele (achieved via the haplotype mapper object)
-                            final double likelihood = h.getReadLikelihoods(sample.getKey())[iii];
-                            if( likelihood > maxLikelihood ) {
-                                maxLikelihood = likelihood;
-                            }
-                        }
-                        likelihoodMap.add(read, a, maxLikelihood);
-                    }
-                }
-            }
-
-            // down-sample before adding filtered reads
-            likelihoodMap.performPerAlleleDownsampling(downsamplingFraction, downsamplingLog);
-
-            // add all filtered reads to the NO_CALL list because they weren't given any likelihoods
-            for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
-                // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
-                if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
-                    for( final Allele a : call.getFirst().getAlleles() ) {
-                        likelihoodMap.add(read, a, 0.0);
-                    }
-                }
-            }
-
-            returnMap.put(sample.getKey(), likelihoodMap);
-
-        }
-        return returnMap;
-    }
 }
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
index 79962a3e4..7b797432d 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@@ -329,7 +329,6 @@ public class PairHMMIndelErrorModel {
                     getContextHomopolymerLength(readBases,hrunProfile);
                     fillGapProbabilities(hrunProfile, contextLogGapOpenProbabilities, contextLogGapContinuationProbabilities);
 
-
                     for (Allele a: haplotypeMap.keySet()) {
 
                         Haplotype haplotype = haplotypeMap.get(a);
diff --git a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
index 30fdce75d..4c708f2bf 100755
--- a/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
+++ b/public/java/src/org/broadinstitute/sting/utils/Haplotype.java
@@ -41,8 +41,6 @@ public class Haplotype {
     protected final byte[] bases;
     protected final double[] quals;
     private GenomeLoc genomeLocation = null;
-    private HashMap<String, double[]> readLikelihoodsPerSample = null;
-    private HashMap<String, int[]> readCountsPerSample = null;
     private HashMap<Integer, VariantContext> eventMap = null;
     private boolean isRef = false;
     private Cigar cigar;
@@ -94,31 +92,6 @@ public class Haplotype {
         return Arrays.hashCode(bases);
     }
 
-    public void addReadLikelihoods( final String sample, final double[] readLikelihoods, final int[] readCounts ) {
-        if( readLikelihoodsPerSample == null ) {
-            readLikelihoodsPerSample = new HashMap<String, double[]>();
-        }
-        readLikelihoodsPerSample.put(sample, readLikelihoods);
-        if( readCountsPerSample == null ) {
-            readCountsPerSample = new HashMap<String, int[]>();
-        }
-        readCountsPerSample.put(sample, readCounts);
-    }
-
-    @Ensures({"result != null"})
-    public double[] getReadLikelihoods( final String sample ) {
-        return readLikelihoodsPerSample.get(sample);
-    }
-
-    @Ensures({"result != null"})
-    public int[] getReadCounts( final String sample ) {
-        return readCountsPerSample.get(sample);
-    }
-
-    public Set<String> getSampleKeySet() {
-        return readLikelihoodsPerSample.keySet();
-    }
-
     public HashMap<Integer, VariantContext> getEventMap() {
         return eventMap;
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
index 22d249240..9bb0e646f 100644
--- a/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
+++ b/public/java/src/org/broadinstitute/sting/utils/genotyper/PerReadAlleleLikelihoodMap.java
@@ -38,10 +38,10 @@ import java.util.*;
 
 public abstract class PerReadAlleleLikelihoodMap {
 
-    public static final double INDEL_LIKELIHOOD_THRESH = 0.1;
+    public static final double INFORMATIVE_LIKELIHOOD_THRESHOLD = 0.1;
 
     protected List<Allele> alleles;
-    protected Map<GATKSAMRecord,Map<Allele,Double>> likelihoodReadMap;
+    protected Map<GATKSAMRecord, Map<Allele, Double>> likelihoodReadMap;
 
     public abstract void performPerAlleleDownsampling(final double downsamplingFraction, final PrintStream log);
     public abstract ReadBackedPileup createPerAlleleDownsampledBasePileup(final ReadBackedPileup pileup, final double downsamplingFraction, final PrintStream log);
@@ -68,7 +68,7 @@ public abstract class PerReadAlleleLikelihoodMap {
     }
 
     public void add(PileupElement p, Allele a, Double likelihood) {
-        add(p.getRead(),a,likelihood);
+        add(p.getRead(), a, likelihood);
     }
 
     public boolean containsPileupElement(PileupElement p) {
@@ -120,7 +120,7 @@ public abstract class PerReadAlleleLikelihoodMap {
                 prevMaxLike = el.getValue();
             }
         }
-        return (maxLike - prevMaxLike > INDEL_LIKELIHOOD_THRESH ? mostLikelyAllele : Allele.NO_CALL );
+        return (maxLike - prevMaxLike > INFORMATIVE_LIKELIHOOD_THRESHOLD ? mostLikelyAllele : Allele.NO_CALL );
     }
 
     public static PerReadAlleleLikelihoodMap getBestAvailablePerReadAlleleLikelihoodMap() {

From b6839b30496daab74ea2d2b08690ff9ca4100508 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 3 Dec 2012 11:18:41 -0500
Subject: [PATCH 182/236] Added checking in the GATK for mis-encoded quality
 scores.

The check is performed by a Read Transformer that samples (currently set to once
every 1000 reads so that we don't hurt overall GATK performance) from the input
reads and checks to make sure that none of the base quals is too high (> Q60). If
we encounter such a base then we fail with a User Error.

* Can be over-ridden with --allow_potentially_misencoded_quality_scores.
* Also, the user can choose to fix his quals on the fly (presumably using PrintReads
  to write out a fixed bam) with the --fix_misencoded_quality_scores argument.

Added unit tests.
---
 .../arguments/GATKArgumentCollection.java     | 16 +++++
 .../sting/gatk/iterators/ReadTransformer.java |  4 +-
 .../sting/utils/QualityUtils.java             |  2 +-
 .../broadinstitute/sting/utils/baq/BAQ.java   |  2 +-
 .../sting/utils/exceptions/UserException.java | 10 +++
 .../MisencodedBaseQualityReadTransformer.java | 68 +++++++++++++++++++
 .../sting/utils/baq/BAQUnitTest.java          |  6 +-
 .../sam/MisencodedBaseQualityUnitTest.java    | 66 ++++++++++++++++++
 8 files changed, 165 insertions(+), 9 deletions(-)
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
 create mode 100644 public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index e2b943582..d0f3e91e0 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -206,6 +206,22 @@ public class GATKArgumentCollection {
     @Argument(fullName = "baqGapOpenPenalty", shortName="baqGOP", doc="BAQ gap open penalty (Phred Scaled).  Default value is 40.  30 is perhaps better for whole genome call sets", required = false)
     public double BAQGOP = BAQ.DEFAULT_GOP;
 
+    // --------------------------------------------------------------------------------------------------------------
+    //
+    // quality encoding checking arguments
+    //
+    // --------------------------------------------------------------------------------------------------------------
+
+    /**
+     * Q0 == ASCII 33 according to the SAM specification, whereas Illumina encoding starts at Q64.  The idea here is
+     * simple: we just iterate over all reads and subtract 31 from every quality score.
+     */
+    @Argument(fullName = "fix_misencoded_quality_scores", shortName="fixMisencodedQuals", doc="Fix mis-encoded base quality scores", required = false)
+    public boolean FIX_MISENCODED_QUALS = false;
+
+    @Argument(fullName = "allow_potentially_misencoded_quality_scores", shortName="allowPotentiallyMisencodedQuals", doc="Do not fail when encountered base qualities that are too high and seemingly indicate a problem with the base quality encoding of the BAM file", required = false)
+    public boolean ALLOW_POTENTIALLY_MISENCODED_QUALS = false;
+
     // --------------------------------------------------------------------------------------------------------------
     //
     // performance log arguments
diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java
index 28348ecc2..5525e33c9 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/ReadTransformer.java
@@ -41,7 +41,7 @@ abstract public class ReadTransformer {
     protected ReadTransformer() {}
 
     /**
-     * Master initialization routine.  Called to setup a ReadTransform, using it's overloaded initialialSub routine.
+     * Master initialization routine.  Called to setup a ReadTransform, using it's overloaded initializeSub routine.
      *
      * @param overrideTime if not null, we will run this ReadTransform at the time provided, regardless of the timing of this read transformer itself
      * @param engine the engine, for initializing values
@@ -59,7 +59,7 @@ abstract public class ReadTransformer {
     }
 
     /**
-     * Subclasses must override this to initialize themeselves
+     * Subclasses must override this to initialize themselves
      *
      * @param engine the engine, for initializing values
      * @param walker the walker we intend to run
diff --git a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
index 848beccb8..861f172d9 100755
--- a/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/QualityUtils.java
@@ -14,7 +14,7 @@ public class QualityUtils {
     public final static double ERROR_RATE_OF_MAX_QUAL_SCORE = qualToErrorProbRaw(MAX_QUAL_SCORE);
 
     public final static double MIN_REASONABLE_ERROR = 0.0001;
-    public final static byte MAX_REASONABLE_Q_SCORE = 40;
+    public final static byte MAX_REASONABLE_Q_SCORE = 60;  // quals above this value are extremely suspicious
     public final static byte MIN_USABLE_Q_SCORE = 6;
     public final static int MAPPING_QUALITY_UNAVAILABLE = 255;
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index 9ad1bf773..3d76096fb 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -414,7 +414,7 @@ public class BAQ {
                 throw new ReviewedStingException("BAQ tag calculation error.  BAQ value above base quality at " + read);
             // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
             if ( tag > Byte.MAX_VALUE )
-                throw new UserException.MalformedBAM(read, "we encountered an extremely high quality score (" + (bq - 64) + ") with BAQ correction factor of " + baq_i + "; the BAM file appears to be using the wrong encoding for quality scores");
+                throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score (" + ((int)read.getBaseQualities()[i] - 33) + ") with BAQ correction factor of " + baq_i);
             bqTag[i] = (byte)tag;
         }
         return new String(bqTag);
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index a2ec35ae2..cef8af8c1 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -240,6 +240,16 @@ public class UserException extends ReviewedStingException {
         }
     }
 
+    public static class MisencodedBAM extends UserException {
+        public MisencodedBAM(SAMRecord read, String message) {
+            this(read.getFileSource() != null ? read.getFileSource().getReader().toString() : "(none)", message);
+        }
+
+        public MisencodedBAM(String source, String message) {
+            super(String.format("SAM/BAM file %s appears to be using the wrong encoding for quality scores: %s; please see the GATK --help documentation for options related to this error", source, message));
+        }
+    }
+
     public static class MalformedVCF extends UserException {
         public MalformedVCF(String message, String line) {
             super(String.format("The provided VCF file is malformed at line %s: %s", line, message));
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
new file mode 100644
index 000000000..e841bc151
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
@@ -0,0 +1,68 @@
+package org.broadinstitute.sting.utils.sam;
+
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
+import org.broadinstitute.sting.gatk.walkers.Walker;
+import org.broadinstitute.sting.utils.QualityUtils;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+/**
+ * Checks for and errors out (or fixes if requested) when it detects reads with base qualities that are not encoded with
+ * phred-scaled quality scores.  Q0 == ASCII 33 according to the SAM specification, whereas Illumina encoding starts at
+ * Q64.  The idea here is simple: if we are asked to fix the scores then we just subtract 31 from every quality score.
+ * Otherwise, we randomly sample reads (for efficiency) and error out if we encounter a qual that's too high.
+ */
+public class MisencodedBaseQualityReadTransformer extends ReadTransformer {
+
+    private static final int samplingFrequency = 1000;  // sample 1 read for every 1000 encountered
+    private static final int encodingFixValue = 31;  // Illumina_64 - PHRED_33
+    private static final byte maxAllowedQualByte = QualityUtils.MAX_REASONABLE_Q_SCORE + 33;
+
+    private boolean disabled;
+    private boolean fixQuals;
+    private static int currentReadCounter = 0;
+
+    @Override
+    public ApplicationTime initializeSub(final GenomeAnalysisEngine engine, final Walker walker) {
+        fixQuals = engine.getArguments().FIX_MISENCODED_QUALS;
+        disabled = !fixQuals && engine.getArguments().ALLOW_POTENTIALLY_MISENCODED_QUALS;
+
+        return ReadTransformer.ApplicationTime.ON_INPUT;
+    }
+
+    @Override
+    public boolean enabled() {
+        return !disabled;
+    }
+
+    @Override
+    public GATKSAMRecord apply(final GATKSAMRecord read) {
+        if ( fixQuals )
+            return fixMisencodedQuals(read);
+
+        checkForMisencodedQuals(read);
+        return read;
+    }
+
+    protected static GATKSAMRecord fixMisencodedQuals(final GATKSAMRecord read) {
+        final byte[] quals = read.getBaseQualities();
+        for ( int i = 0; i < quals.length; i++ ) {
+            quals[i] -= encodingFixValue;
+        }
+        read.setBaseQualities(quals);
+        return read;
+    }
+
+    protected static void checkForMisencodedQuals(final GATKSAMRecord read) {
+        // sample reads randomly for checking
+        if ( ++currentReadCounter >= samplingFrequency ) {
+            currentReadCounter = 0;
+
+            final byte[] quals = read.getBaseQualities();
+            for ( final byte qual : quals ) {
+                if ( qual > maxAllowedQualByte )
+                    throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score of " + ((int)qual - 33));
+            }
+        }
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
index 67943ccb4..59b8e5ff0 100644
--- a/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/baq/BAQUnitTest.java
@@ -1,10 +1,6 @@
-// our package
 package org.broadinstitute.sting.utils.baq;
 
 
-// the imports for unit testing.
-
-
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.Assert;
 import org.testng.annotations.Test;
@@ -24,7 +20,7 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.samtools.*;
 
 /**
- * Basic unit test for GenomeLoc
+ * Basic unit test for BAQ calculation
  */
 public class BAQUnitTest extends BaseTest {
     private SAMFileHeader header;
diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java
new file mode 100644
index 000000000..bd244b49e
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java
@@ -0,0 +1,66 @@
+package org.broadinstitute.sting.utils.sam;
+
+
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.Assert;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Basic unit test for misencoded quals
+ */
+public class MisencodedBaseQualityUnitTest extends BaseTest {
+
+    private static final String readBases = "AAAAAAAAAA";
+    private static final byte[] badQuals = { 'Z', '[', 'c', 'd', 'e', 'a', 'b', 'Z', 'Y', 'X' };
+    private static final byte[] goodQuals = { '[', '[', '[', '[', '[', '[', '[', '[', '[', '[' };
+    private static final byte[] fixedQuals = { ';', '<', 'D', 'E', 'F', 'B', 'C', ';', ':', '9' };
+    private SAMFileHeader header;
+
+    @BeforeMethod
+    public void before() {
+        header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
+    }
+
+    private GATKSAMRecord createRead(final boolean useGoodBases) {
+        GATKSAMRecord read = ArtificialSAMUtils.createArtificialRead(header, "foo", 0, 10, readBases.getBytes(), useGoodBases ? goodQuals : badQuals);
+        read.setCigarString("10M");
+        return read;
+    }
+
+    @Test(enabled = true)
+    public void testGoodQuals() {
+        final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(10000);
+        for ( int i = 0; i < 10000; i++ )
+            reads.add(createRead(true));
+
+        testEncoding(reads);
+    }
+
+    @Test(enabled = true, expectedExceptions = {UserException.class})
+    public void testBadQualsThrowsError() {
+        final List<GATKSAMRecord> reads = new ArrayList<GATKSAMRecord>(10000);
+        for ( int i = 0; i < 10000; i++ )
+            reads.add(createRead(false));
+
+        testEncoding(reads);
+    }
+
+    @Test(enabled = true)
+    public void testFixBadQuals() {
+        final GATKSAMRecord read = createRead(false);
+        final GATKSAMRecord fixedRead = MisencodedBaseQualityReadTransformer.fixMisencodedQuals(read);
+        for ( int i = 0; i < fixedQuals.length; i++ )
+            Assert.assertEquals(fixedQuals[i], fixedRead.getBaseQualities()[i]);
+    }
+
+    private void testEncoding(final List<GATKSAMRecord> reads) {
+        for ( final GATKSAMRecord read : reads )
+            MisencodedBaseQualityReadTransformer.checkForMisencodedQuals(read);
+    }
+}
\ No newline at end of file

From 5fed9df2955478df22f2b5d3df6336171cd2a4ec Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 3 Dec 2012 12:18:20 -0500
Subject: [PATCH 183/236] Quick fix: base qual array in the GATKSAMRecord
 stores the actual phred values (-33) and not the original bytes (duh).

---
 public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java | 2 +-
 .../utils/sam/MisencodedBaseQualityReadTransformer.java     | 5 ++---
 .../sting/utils/sam/MisencodedBaseQualityUnitTest.java      | 6 +++---
 3 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
index 3d76096fb..3966434c0 100644
--- a/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
+++ b/public/java/src/org/broadinstitute/sting/utils/baq/BAQ.java
@@ -414,7 +414,7 @@ public class BAQ {
                 throw new ReviewedStingException("BAQ tag calculation error.  BAQ value above base quality at " + read);
             // the original quality is too high, almost certainly due to using the wrong encoding in the BAM file
             if ( tag > Byte.MAX_VALUE )
-                throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score (" + ((int)read.getBaseQualities()[i] - 33) + ") with BAQ correction factor of " + baq_i);
+                throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score (" + (int)read.getBaseQualities()[i] + ") with BAQ correction factor of " + baq_i);
             bqTag[i] = (byte)tag;
         }
         return new String(bqTag);
diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
index e841bc151..cac51239a 100644
--- a/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityReadTransformer.java
@@ -16,7 +16,6 @@ public class MisencodedBaseQualityReadTransformer extends ReadTransformer {
 
     private static final int samplingFrequency = 1000;  // sample 1 read for every 1000 encountered
     private static final int encodingFixValue = 31;  // Illumina_64 - PHRED_33
-    private static final byte maxAllowedQualByte = QualityUtils.MAX_REASONABLE_Q_SCORE + 33;
 
     private boolean disabled;
     private boolean fixQuals;
@@ -60,8 +59,8 @@ public class MisencodedBaseQualityReadTransformer extends ReadTransformer {
 
             final byte[] quals = read.getBaseQualities();
             for ( final byte qual : quals ) {
-                if ( qual > maxAllowedQualByte )
-                    throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score of " + ((int)qual - 33));
+                if ( qual > QualityUtils.MAX_REASONABLE_Q_SCORE )
+                    throw new UserException.MisencodedBAM(read, "we encountered an extremely high quality score of " + (int)qual);
             }
         }
     }
diff --git a/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java
index bd244b49e..75b7bb384 100644
--- a/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/sam/MisencodedBaseQualityUnitTest.java
@@ -17,9 +17,9 @@ import java.util.List;
 public class MisencodedBaseQualityUnitTest extends BaseTest {
 
     private static final String readBases = "AAAAAAAAAA";
-    private static final byte[] badQuals = { 'Z', '[', 'c', 'd', 'e', 'a', 'b', 'Z', 'Y', 'X' };
-    private static final byte[] goodQuals = { '[', '[', '[', '[', '[', '[', '[', '[', '[', '[' };
-    private static final byte[] fixedQuals = { ';', '<', 'D', 'E', 'F', 'B', 'C', ';', ':', '9' };
+    private static final byte[] badQuals = { 59, 60, 62, 63, 64, 61, 62, 58, 57, 56 };
+    private static final byte[] goodQuals = { 60, 60, 60, 60, 60, 60, 60, 60, 60, 60 };
+    private static final byte[] fixedQuals = { 28, 29, 31, 32, 33, 30, 31, 27, 26, 25 };
     private SAMFileHeader header;
 
     @BeforeMethod

From 156d6a5e0bbe18fc67e50ae3b03c0aa498d2cad6 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 3 Dec 2012 12:47:35 -0500
Subject: [PATCH 184/236] misc minor bug fixes to GenotypingEngine.

---
 .../haplotypecaller/GenotypingEngine.java        | 16 ++++++++--------
 .../LikelihoodCalculationEngine.java             |  8 ++++----
 .../HaplotypeCallerIntegrationTest.java          |  4 ++--
 .../LikelihoodCalculationEngineUnitTest.java     |  7 ++++---
 4 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index 6f94e2657..fee6c86f8 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -161,7 +161,7 @@ public class GenotypingEngine {
                 if( mergedVC == null ) { continue; }
 
                 // let's update the Allele keys in the mapper because they can change after merging when there are complex events
-                Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
+                final Map<Allele, List<Haplotype>> updatedAlleleMapper = new HashMap<Allele, List<Haplotype>>(alleleMapper.size());
                 for ( int i = 0; i < mergedVC.getNAlleles(); i++ ) {
                     final Allele oldAllele = alleleOrdering.get(i);
                     final Allele newAllele = mergedVC.getAlleles().get(i);
@@ -191,7 +191,7 @@ public class GenotypingEngine {
                     }
                     genotypes.add( new GenotypeBuilder(sample).alleles(noCall).PL(genotypeLikelihoods).make() );
                 }
-                VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
+                final VariantContext call = UG_engine.calculateGenotypes(new VariantContextBuilder(mergedVC).genotypes(genotypes).make(), UG_engine.getUAC().GLmodel);
                 if( call != null ) {
                     final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap = filterToOnlyOverlappingReads( genomeLocParser, alleleReadMap, perSampleFilteredReadList, call );
                     VariantContext annotatedCall = annotationEngine.annotateContext(stratifiedReadMap, call);
@@ -217,11 +217,11 @@ public class GenotypingEngine {
         for( final Map.Entry<String, PerReadAlleleLikelihoodMap> sample : perSampleReadMap.entrySet() ) {
             final PerReadAlleleLikelihoodMap likelihoodMap = PerReadAlleleLikelihoodMap.getBestAvailablePerReadAlleleLikelihoodMap();
 
-            for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : likelihoodMap.getLikelihoodReadMap().entrySet() ) {
+            for( final Map.Entry<GATKSAMRecord,Map<Allele,Double>> mapEntry : sample.getValue().getLikelihoodReadMap().entrySet() ) {
                 // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
-                if( callLoc.overlapsP(parser.createGenomeLoc(mapEntry.getKey())) ) {
-                    for( final Map.Entry<Allele,Double> a : mapEntry.getValue().entrySet() ) {
-                        likelihoodMap.add(mapEntry.getKey(), a.getKey(), a.getValue());
+                if( callLoc.overlapsP(parser.createGenomeLoc(mapEntry.getKey())) ) { // BUGBUG: This uses alignment start and stop, NOT soft start and soft end...
+                    for( final Map.Entry<Allele,Double> alleleDoubleEntry : mapEntry.getValue().entrySet() ) {
+                        likelihoodMap.add(mapEntry.getKey(), alleleDoubleEntry.getKey(), alleleDoubleEntry.getValue());
                     }
                 }
             }
@@ -230,8 +230,8 @@ public class GenotypingEngine {
             for( final GATKSAMRecord read : perSampleFilteredReadList.get(sample.getKey()) ) {
                 // only count the read if it overlaps the event, otherwise it is not added to the output read list at all
                 if( callLoc.overlapsP(parser.createGenomeLoc(read)) ) {
-                    for( final Allele a : call.getAlleles() ) {
-                        likelihoodMap.add(read, a, 0.0);
+                    for( final Allele allele : call.getAlleles() ) {
+                        likelihoodMap.add(read, allele, 0.0);
                     }
                 }
             }
diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
index 4a5c7fe9b..018102893 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java
@@ -143,8 +143,8 @@ public class LikelihoodCalculationEngine {
         return Math.min(b1.length, b2.length);
     }
 
-    @Requires({"haplotypeMapping.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    @Requires({"alleleOrdering.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == alleleOrdering.size()"})
     public static double[][] computeDiploidHaplotypeLikelihoods( final String sample,
                                                                  final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap,
                                                                  final List<Allele> alleleOrdering ) {
@@ -153,8 +153,8 @@ public class LikelihoodCalculationEngine {
         return computeDiploidHaplotypeLikelihoods(sampleSet, stratifiedReadMap, alleleOrdering);
     }
 
-    @Requires({"haplotypeMapping.size() > 0"})
-    @Ensures({"result.length == result[0].length", "result.length == haplotypeMapping.size()"})
+    @Requires({"alleleOrdering.size() > 0"})
+    @Ensures({"result.length == result[0].length", "result.length == alleleOrdering.size()"})
     public static double[][] computeDiploidHaplotypeLikelihoods( final Set<String> samples,
                                                                  final Map<String, PerReadAlleleLikelihoodMap> stratifiedReadMap,
                                                                  final List<Allele> alleleOrdering ) {
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index f8ba1f4cc..288aaebc0 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -32,7 +32,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
-        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
+        HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
                 "541aa8291f03ba33bd1ad3d731fd5657");
     }
 
@@ -48,7 +48,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     }
 
     private void HCTestSymbolicVariants(String bam, String args, String md5) {
-        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 2";
+        final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, bam) + " -L 20:5947969-5948369 -L 20:61091236-61091636 --no_cmdline_in_header -o %s -minPruning 1";
         final WalkerTestSpec spec = new WalkerTestSpec(base + " " + args, Arrays.asList(md5));
         executeTest("testHaplotypeCallerSymbolicVariants: args=" + args, spec);
     }
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
index 19ced9f42..792812c2b 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngineUnitTest.java
@@ -51,6 +51,8 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
         Assert.assertTrue(compareDoubleArrays(LikelihoodCalculationEngine.normalizeDiploidLikelihoodMatrixFromLog10(likelihoodMatrix2), normalizedMatrix2));
     }
 
+    // BUGBUG: LikelihoodCalculationEngine.computeDiploidHaplotypeLikelihoods has changed! Need to make new unit tests!
+    /*
     private class BasicLikelihoodTestProvider extends TestDataProvider {
         public Double readLikelihoodForHaplotype1;
         public Double readLikelihoodForHaplotype2;
@@ -152,10 +154,9 @@ public class LikelihoodCalculationEngineUnitTest extends BaseTest {
         logger.warn(String.format("Test: %s", cfg.toString()));
         Assert.assertTrue(compareDoubleArrays(calculatedMatrix, expectedMatrix));
     }
+    */
 
-    /**
-     * Private function to compare 2d arrays
-     */
+    //Private function to compare 2d arrays
     private boolean compareDoubleArrays(double[][] b1, double[][] b2) {
         if( b1.length != b2.length ) {
             return false; // sanity check

From d5ed184691b63c0bf8893be394b9ce6149107cd6 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Mon, 3 Dec 2012 15:38:59 -0500
Subject: [PATCH 185/236] Updating the HC integration test md5s. According to
 the NA12878 knowledge base this commit cuts down the FP rate by more than 50
 percent with no loss in sensitivity.

---
 .../HaplotypeCallerIntegrationTest.java        | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index 288aaebc0..e9c1ec605 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -21,19 +21,19 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSample() {
-        HCTest(CEUTRIO_BAM, "", "2b39732ff8e0de5bc2ae949aaf7a6f21");
+        HCTest(CEUTRIO_BAM, "", "d602d40852ad6d2d094be07e60cf95bd");
     }
 
     @Test
     public void testHaplotypeCallerSingleSample() {
-        HCTest(NA12878_BAM, "", "8b217638ff585effb9cc70e9a9aa544f");
+        HCTest(NA12878_BAM, "", "70ad9d53dda4d302b879ca2b7dd5b368");
     }
 
     // TODO -- add more tests for GGA mode, especially with input alleles that are complex variants and/or not trimmed
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
         HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
-                "541aa8291f03ba33bd1ad3d731fd5657");
+                "e2b3bf420c45c677956a2e4a56d75ea2");
     }
 
     private void HCTestComplexVariants(String bam, String args, String md5) {
@@ -44,7 +44,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerMultiSampleComplex() {
-        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "fd7170cbde7df04d4fbe1da7903c31c6");
+        HCTestComplexVariants(privateTestDir + "AFR.complex.variants.bam", "", "883871f8bb4099f69fd804f8a6181954");
     }
 
     private void HCTestSymbolicVariants(String bam, String args, String md5) {
@@ -55,7 +55,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleSymbolic() {
-        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "99456fc7207c1fe9f367a0d0afae87cd");
+        HCTestSymbolicVariants(NA12878_CHR20_BAM, "", "338ab3b7dc3d54df8af94c0811028a75");
     }
 
     private void HCTestIndelQualityScores(String bam, String args, String md5) {
@@ -66,20 +66,20 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
 
     @Test
     public void testHaplotypeCallerSingleSampleIndelQualityScores() {
-        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "6c1631785b3f832aecab1a99f0454762");
+        HCTestIndelQualityScores(NA12878_RECALIBRATED_BAM, "", "aff11b014ca42bfa301bcced5f5e54dd");
     }
 
     @Test
     public void HCTestProblematicReadsModifiedInActiveRegions() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "haplotype-problem-4.bam") + " --no_cmdline_in_header -o %s -minPruning 3 -L 4:49139026-49139965";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("ec437d2d9f3ae07d155983be0155c8ed"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("2f4ed6dc969bee041215944a9b24328f"));
         executeTest("HCTestProblematicReadsModifiedInActiveRegions: ", spec);
     }
 
     @Test
     public void HCTestStructuralIndels() {
         final String base = String.format("-T HaplotypeCaller -R %s -I %s", REF, privateTestDir + "AFR.structural.indels.bam") + " --no_cmdline_in_header -o %s -minPruning 6 -L 20:8187565-8187800 -L 20:18670537-18670730";
-        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("237601bbc39694c7413a332cbb656c8e"));
+        final WalkerTestSpec spec = new WalkerTestSpec(base, Arrays.asList("d8d6f2ebe79bca81c8a0911daa153b89"));
         executeTest("HCTestStructuralIndels: ", spec);
     }
 
@@ -93,7 +93,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     public void HCTestReducedBam() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T HaplotypeCaller -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("40bf739fb2b1743642498efe79ea6342"));
+                Arrays.asList("d01cb5f77ed5aca1d228cfbce9364c21"));
         executeTest("HC calling on a ReducedRead BAM", spec);
     }
 }

From 67932b357d4a845efe439ad49f35b08695e3edb4 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 3 Dec 2012 15:59:14 -0500
Subject: [PATCH 186/236] Bug fix for RR: don't let the softclip start position
 be less than 1

---
 .../src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java  | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 9fdb48b34..6c7a162f8 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -397,6 +397,9 @@ public class GATKSAMRecord extends BAMRecord {
                 else if (op != CigarOperator.HARD_CLIP)
                     break;
             }
+
+            if ( softStart < 1 )
+                softStart = 1;
         }
         return softStart;
     }

From bca860723a4ae6d7cfaf242065256951bcb543fe Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 3 Dec 2012 22:01:07 -0500
Subject: [PATCH 189/236] Updating tests to handle bad validation data files
 (that used the wrong qual score encoding); overrides push from stable.

---
 .../sting/gatk/walkers/bqsr/BQSRIntegrationTest.java          | 2 ++
 .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java    | 4 ++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
index de328c825..b15969fba 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/bqsr/BQSRIntegrationTest.java
@@ -37,6 +37,7 @@ public class BQSRIntegrationTest extends WalkerTest {
                     " -L " + interval +
                     args +
                     " -knownSites " + (reference.equals(b36KGReference) ? b36dbSNP129 : hg18dbSNP132) +
+                    " --allow_potentially_misencoded_quality_scores" +  // TODO -- remove me when we get new SOLiD bams
                     " -o %s";
         }
 
@@ -112,6 +113,7 @@ public class BQSRIntegrationTest extends WalkerTest {
                         " -R " + b36KGReference +
                         " -I " + privateTestDir + "NA19240.chr1.BFAST.SOLID.hasCSNoCall.bam" +
                         " -L 1:50,000-80,000" +
+                        " --allow_potentially_misencoded_quality_scores" +  // TODO -- remove me when we get new SOLiD bams
                         " -o %s",
                 1, // just one output file
                 UserException.class);
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 8ded61af8..959cdd1ce 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -436,8 +436,8 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testNsInCigar() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141799600-141814700", 1,
-                Arrays.asList("d6d40bacd540a41f305420dfea35e04a"));
+                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
+                Arrays.asList("32f18ba50406cd8c8069ba07f2f89558"));
         executeTest("test calling on reads with Ns in CIGAR", spec);
     }
 

From 8d2d0253a27a60d2ae681aebb5820e20ab2e7cd9 Mon Sep 17 00:00:00 2001
From: Randal Moore <rdmoore@appistry.com>
Date: Mon, 3 Dec 2012 12:54:48 -0600
Subject: [PATCH 190/236] introduce a level of indirection for the forum URLs -
 this new function will allow me a place to morph the URL into something that
 is supported by Confluence

Signed-off-by: Eric Banks <ebanks@broadinstitute.org>
---
 .../walkers/variantrecalibration/VariantDataManager.java     | 2 +-
 .../broadinstitute/sting/utils/exceptions/UserException.java | 4 ++--
 .../src/org/broadinstitute/sting/utils/help/HelpUtils.java   | 5 +++--
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
index 3382a1d9b..f18db412f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@@ -81,7 +81,7 @@ public class VariantDataManager {
             final double theSTD = standardDeviation(theMean, iii);
             logger.info( annotationKeys.get(iii) + String.format(": \t mean = %.2f\t standard deviation = %.2f", theMean, theSTD) );
             if( Double.isNaN(theMean) ) {
-                throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.GATK_FORUM_URL + "discussion/49/using-variant-annotator");
+                throw new UserException.BadInput("Values for " + annotationKeys.get(iii) + " annotation not detected for ANY training variant in the input callset. VariantAnnotator may be used to add these annotations. See " + HelpUtils.forumPost("discussion/49/using-variant-annotator"));
             }
 
             foundZeroVarianceAnnotation = foundZeroVarianceAnnotation || (theSTD < 1E-6);
diff --git a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
index cef8af8c1..523fd5a97 100755
--- a/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
+++ b/public/java/src/org/broadinstitute/sting/utils/exceptions/UserException.java
@@ -278,7 +278,7 @@ public class UserException extends ReviewedStingException {
 
     public static class ReadMissingReadGroup extends MalformedBAM {
         public ReadMissingReadGroup(SAMRecord read) {
-            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use " + HelpUtils.GATK_FORUM_URL + "discussion/59/companion-utilities-replacereadgroups to fix this problem", read.getReadName()));
+            super(read, String.format("Read %s is either missing the read group or its read group is not defined in the BAM header, both of which are required by the GATK.  Please use " + HelpUtils.forumPost("discussion/59/companion-utilities-replacereadgroups to fix this problem"), read.getReadName()));
         }
     }
 
@@ -354,7 +354,7 @@ public class UserException extends ReviewedStingException {
             super(String.format("Lexicographically sorted human genome sequence detected in %s."
                     + "\nFor safety's sake the GATK requires human contigs in karyotypic order: 1, 2, ..., 10, 11, ..., 20, 21, 22, X, Y with M either leading or trailing these contigs."
                     + "\nThis is because all distributed GATK resources are sorted in karyotypic order, and your processing will fail when you need to use these files."
-                    + "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.GATK_FORUM_URL + "discussion/58/companion-utilities-reordersam"
+                    + "\nYou can use the ReorderSam utility to fix this problem: " + HelpUtils.forumPost("discussion/58/companion-utilities-reordersam")
                     + "\n  %s contigs = %s",
                     name, name, ReadUtils.prettyPrintSequenceRecords(dict)));
         }
diff --git a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
index 1bc20d5a0..930bbc996 100644
--- a/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/help/HelpUtils.java
@@ -38,8 +38,9 @@ public class HelpUtils {
     public final static String GATK_FORUM_URL = "http://gatkforums.broadinstitute.org/";
     public final static String GATK_FORUM_API_URL = "https://gatkforums.broadinstitute.org/api/v1/";
 
-
-
+    public static String forumPost(String post) {
+    	return GATK_FORUM_URL + post;
+    }
 
     protected static boolean assignableToClass(ProgramElementDoc classDoc, Class lhsClass, boolean requireConcrete) {
         try {

From 61bde6210b6f01e3e1caa0ff478c3ac119237ad0 Mon Sep 17 00:00:00 2001
From: kshakir <kshakir@broadinstitute.org>
Date: Tue, 4 Dec 2012 12:32:00 -0500
Subject: [PATCH 191/236] Restored RemoteFile push and pull in base QScript.

---
 .../src/org/broadinstitute/sting/queue/QScript.scala | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
index 5b67ae913..d709d1fb4 100755
--- a/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/QScript.scala
@@ -127,14 +127,26 @@ trait QScript extends Logging with PrimitiveOptionConversions with StringFileCon
    * Pull all remote files to the local disk
    */
   def pullInputs() {
+    val inputs = ClassFieldCache.getFieldFiles(this, inputFields)
+    for (remoteFile <- filterRemoteFiles(inputs)) {
+      logger.info("Pulling %s from %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription))
+      remoteFile.pullToLocal()
+    }
   }
 
   /**
    * Push all remote files from the local disk
    */
   def pushOutputs() {
+    val outputs = ClassFieldCache.getFieldFiles(this, outputFields)
+    for (remoteFile <- filterRemoteFiles(outputs)) {
+      logger.info("Pushing %s to %s".format(remoteFile.getAbsolutePath, remoteFile.remoteDescription))
+      remoteFile.pushToRemote()
+    }
   }
 
+  private def filterRemoteFiles(fields: Seq[File]): Seq[RemoteFile] =
+    fields.filter(field => field != null && field.isInstanceOf[RemoteFile]).map(_.asInstanceOf[RemoteFile])
   /**
    * @return the inputs or null if there are no inputs
    */

From 726332db79354a7158fb3d7c6a6560db178ad24e Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 5 Dec 2012 00:54:00 -0500
Subject: [PATCH 192/236] Disabling the testNoCmdLineHeaderStdout test in UG
 because it keeps crashing when I run it locally

---
 .../gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 959cdd1ce..9f940dce5 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -177,7 +177,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
         executeTest("test using comp track", spec);
     }
 
-    @Test
+    @Test(enabled = false) // EB: for some reason this test crashes whenever I run it on my local machine
     public void testNoCmdLineHeaderStdout() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 baseCommandNoCmdLineHeaderStdout + " -glm INDEL -L 1:67,225,396-67,288,518", 0,

From 6feda540a4be919bb629deeeada76e8a8d476519 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 4 Dec 2012 23:55:35 -0500
Subject: [PATCH 193/236] Better error message for SimpleGATKReports

---
 .../src/org/broadinstitute/sting/gatk/report/GATKReport.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 6685ee12a..7ae2bb453 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -343,7 +343,7 @@ public class GATKReport {
 
         GATKReportTable table = tables.firstEntry().getValue();
         if ( table.getNumColumns() != values.length )
-            throw new ReviewedStingException("The number of arguments in writeRow() must match the number of columns in the table");
+            throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
 
         final int rowIndex = table.getNumRows();
         for ( int i = 0; i < values.length; i++ )

From 30f013aeb045b16a8dd15217e95bb31452acaa8f Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Tue, 4 Dec 2012 23:56:30 -0500
Subject: [PATCH 194/236] Added a copy() method for ReadBackedPileups

necessary to create new alignment contexts with hard-copies of the pileup.
---
 .../utils/pileup/AbstractReadBackedPileup.java  |  5 +++++
 .../utils/pileup/PileupElementTracker.java      | 17 +++++++++++++++++
 .../sting/utils/pileup/ReadBackedPileup.java    |  8 ++++++++
 3 files changed, 30 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
index 25f0bfa6d..ff274499b 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/AbstractReadBackedPileup.java
@@ -1054,6 +1054,11 @@ public abstract class AbstractReadBackedPileup<RBP extends AbstractReadBackedPil
     public FragmentCollection<PileupElement> toFragments() {
         return FragmentUtils.create(this);
     }
+
+    @Override
+    public ReadBackedPileup copy() {
+        return new ReadBackedPileupImpl(loc, (PileupElementTracker<PileupElement>) pileupElementTracker.copy());
+    }
 }
 
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
index 09b907e00..6eecaf402 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/PileupElementTracker.java
@@ -34,11 +34,20 @@ import java.util.*;
  */
 abstract class PileupElementTracker<PE extends PileupElement> implements Iterable<PE> {
     public abstract int size();
+    public abstract PileupElementTracker<PE> copy();
 }
 
 class UnifiedPileupElementTracker<PE extends PileupElement> extends PileupElementTracker<PE> {
     private final List<PE> pileup;
 
+    @Override
+    public UnifiedPileupElementTracker<PE> copy() {
+        UnifiedPileupElementTracker<PE> result = new UnifiedPileupElementTracker<PE>();
+        for(PE element : pileup)
+            result.add(element);
+        return result;
+    }
+
     public UnifiedPileupElementTracker() { pileup = new LinkedList<PE>(); }
     public UnifiedPileupElementTracker(List<PE> pileup) { this.pileup = pileup; }
 
@@ -65,6 +74,14 @@ class PerSamplePileupElementTracker<PE extends PileupElement> extends PileupElem
         pileup = new HashMap<String,PileupElementTracker<PE>>();
     }
 
+    public PerSamplePileupElementTracker<PE> copy() {
+        PerSamplePileupElementTracker<PE> result = new PerSamplePileupElementTracker<PE>();
+        for (Map.Entry<String, PileupElementTracker<PE>> entry : pileup.entrySet())
+            result.addElements(entry.getKey(), entry.getValue());
+
+        return result;
+    }
+
     /**
      * Gets a list of all the samples stored in this pileup.
      * @return List of samples in this pileup.
diff --git a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
index be61bad99..b9e9b9a52 100644
--- a/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
+++ b/public/java/src/org/broadinstitute/sting/utils/pileup/ReadBackedPileup.java
@@ -283,4 +283,12 @@ public interface ReadBackedPileup extends Iterable<PileupElement>, HasGenomeLoca
      * @return
      */
     public FragmentCollection<PileupElement> toFragments();
+
+    /**
+     * Creates a full copy (not shallow) of the ReadBacked Pileup
+     *
+     * @return
+     */
+    public ReadBackedPileup copy();
+
 }

From ef87b18e09d64dda2e483c77573b792af46d4f93 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 5 Dec 2012 02:00:35 -0500
Subject: [PATCH 196/236] In retrospect, it wasn't a good idea to have
 FisherStrand handle reduced reads since they are always on the forward
 strand.  For now, FS ignores reduced reads but I've added a note (and JIRA)
 to make this work once the RR het compression is enabled (since we will have
 directionality in reads then).

---
 .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java  | 2 +-
 .../sting/gatk/walkers/annotator/FisherStrand.java          | 6 ++++++
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 9f940dce5..9e9c7e37e 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -457,7 +457,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
 
     @Test
     public void testReducedBamSNPs() {
-        testReducedCalling("SNP", "f5ccbc96d0d66832dd9b3c5cb6507db4");
+        testReducedCalling("SNP", "dee6590e3b7079890bc3a9cb372c297e");
     }
 
     @Test
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
index bdf7baec9..52072d10c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/FisherStrand.java
@@ -276,6 +276,12 @@ public class FisherStrand extends InfoFieldAnnotation implements StandardAnnotat
 
         for ( Map.Entry<String, AlignmentContext> sample : stratifiedContexts.entrySet() ) {
             for (PileupElement p : sample.getValue().getBasePileup()) {
+
+                // ignore reduced reads because they are always on the forward strand!
+                // TODO -- when het compression is enabled in RR, we somehow need to allow those reads through into the Fisher test
+                if ( p.getRead().isReducedRead() )
+                    continue;
+
                 if ( ! RankSumTest.isUsableBase(p, false) ) // ignore deletions
                     continue;
 

From 2b601571e764c7ff9fc9afb9e3b11dcb21fa01e6 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Dec 2012 20:59:07 -0500
Subject: [PATCH 199/236] Better error handling in NanoScheduler

-- The previous nanoscheduler would deadlock in the case where an Error, not an Exception, was thrown.  Errors, like out of memory, would cause the whole system to die.  This bugfix resolves that issue
---
 .../utils/nanoScheduler/InputProducer.java    |  9 +++++-
 .../utils/nanoScheduler/NanoScheduler.java    | 29 +++++++++++++++++--
 .../nanoScheduler/NanoSchedulerUnitTest.java  | 26 ++++++++++++-----
 3 files changed, 52 insertions(+), 12 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
index bd99a9266..45c7c5096 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
@@ -103,6 +103,8 @@ class InputProducer<InputType> implements Runnable {
         } else {
             // get the next value, and return it
             final InputType input = inputReader.next();
+            if ( input == null )
+                throw new IllegalStateException("inputReader.next() returned a null value, breaking our contract");
             inputTimer.stop();
             nRead++;
             return input;
@@ -121,6 +123,9 @@ class InputProducer<InputType> implements Runnable {
                 final InputType value = readNextItem();
 
                 if ( value == null ) {
+                    if ( ! readLastValue )
+                        throw new IllegalStateException("value == null but readLastValue is false!");
+
                     // add the EOF object so our consumer knows we are done in all inputs
                     // note that we do not increase inputID here, so that variable indicates the ID
                     // of the last real value read from the queue
@@ -133,8 +138,10 @@ class InputProducer<InputType> implements Runnable {
             }
 
             latch.countDown();
-        } catch (Exception ex) {
+        } catch (Throwable ex) {
             errorTracker.notifyOfError(ex);
+        } finally {
+//            logger.info("Exiting input thread readLastValue = " + readLastValue);
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index d83a23c0f..6d769c2cf 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -320,6 +320,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         while ( true ) {
             // check that no errors occurred while we were waiting
             handleErrors();
+//            checkForDeadlocks();
 
             try {
                 final ReduceType result = reduceResult.get(100, TimeUnit.MILLISECONDS);
@@ -341,6 +342,26 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         }
     }
 
+//    private void checkForDeadlocks() {
+//        if ( deadLockCheckCounter++ % 100 == 0 ) {
+//            logger.info("Checking for deadlocks...");
+//            final ThreadMXBean bean = ManagementFactory.getThreadMXBean();
+//            final long[] threadIds = bean.findDeadlockedThreads(); // Returns null if no threads are deadlocked.
+//
+//            if (threadIds != null) {
+//                final ThreadInfo[] infos = bean.getThreadInfo(threadIds);
+//
+//                logger.error("!!! Deadlock detected !!!!");
+//                for (final ThreadInfo info : infos) {
+//                    logger.error("Thread " + info);
+//                    for ( final StackTraceElement elt : info.getStackTrace() ) {
+//                        logger.error("\t" + elt.toString());
+//                    }
+//                }
+//            }
+//        }
+//    }
+
     private void handleErrors() {
         if ( errorTracker.hasAnErrorOccurred() ) {
             masterExecutor.shutdownNow();
@@ -408,7 +429,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
                 // wait for all of the input and map threads to finish
                 return waitForCompletion(inputProducer, reducer);
-            } catch (Exception ex) {
+            } catch (Throwable ex) {
+//                logger.warn("Reduce job got exception " + ex);
                 errorTracker.notifyOfError(ex);
                 return initialValue;
             }
@@ -495,7 +517,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                     // enqueue the result into the mapResultQueue
                     result = new MapResult<MapType>(mapValue, jobID);
 
-                    if ( jobID % bufferSize == 0 && progressFunction != null )
+                    if ( progressFunction != null )
                         progressFunction.progress(input);
                 } else {
                     // push back the EOF marker so other waiting threads can read it
@@ -508,7 +530,8 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                 mapResultQueue.put(result);
 
                 final int nReduced = reducer.reduceAsMuchAsPossible(mapResultQueue);
-            } catch (Exception ex) {
+            } catch (Throwable ex) {
+//                logger.warn("Map job got exception " + ex);
                 errorTracker.notifyOfError(ex);
             } finally {
                 // we finished a map job, release the job queue semaphore
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index af2e18ad9..d415b8b4c 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -243,7 +243,7 @@ public class NanoSchedulerUnitTest extends BaseTest {
             for ( final int nThreads : Arrays.asList(8) ) {
                 for ( final boolean addDelays : Arrays.asList(true, false) ) {
                     final NanoSchedulerBasicTest test = new NanoSchedulerBasicTest(bufSize, nThreads, 1, 1000000, false);
-                    final int maxN = addDelays ? 10000 : 100000;
+                    final int maxN = addDelays ? 1000 : 10000;
                     for ( int nElementsBeforeError = 0; nElementsBeforeError < maxN; nElementsBeforeError += Math.max(nElementsBeforeError / 10, 1) ) {
                         tests.add(new Object[]{nElementsBeforeError, test, addDelays});
                     }
@@ -259,17 +259,22 @@ public class NanoSchedulerUnitTest extends BaseTest {
         executeTestErrorThrowingInput(10, new NullPointerException(), exampleTest, false);
     }
 
-    @Test(enabled = true, expectedExceptions = ReviewedStingException.class, timeOut = 10000)
+    @Test(enabled = true, expectedExceptions = ReviewedStingException.class, timeOut = 1000)
     public void testInputErrorIsThrown_RSE() throws InterruptedException {
         executeTestErrorThrowingInput(10, new ReviewedStingException("test"), exampleTest, false);
     }
 
-    @Test(enabled = true, expectedExceptions = NullPointerException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 10000, invocationCount = 1)
-    public void testInputErrorDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
+    @Test(enabled = true, expectedExceptions = NullPointerException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 1000, invocationCount = 1)
+    public void testInputRuntimeExceptionDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
         executeTestErrorThrowingInput(nElementsBeforeError, new NullPointerException(), test, addDelays);
     }
 
-    private void executeTestErrorThrowingInput(final int nElementsBeforeError, final RuntimeException ex, final NanoSchedulerBasicTest test, final boolean addDelays) {
+    @Test(enabled = true, expectedExceptions = ReviewedStingException.class, dataProvider = "NanoSchedulerInputExceptionTest", timeOut = 1000, invocationCount = 1)
+    public void testInputErrorDoesntDeadlock(final int nElementsBeforeError, final NanoSchedulerBasicTest test, final boolean addDelays ) throws InterruptedException {
+        executeTestErrorThrowingInput(nElementsBeforeError, new Error(), test, addDelays);
+    }
+
+    private void executeTestErrorThrowingInput(final int nElementsBeforeError, final Throwable ex, final NanoSchedulerBasicTest test, final boolean addDelays) {
         logger.warn("executeTestErrorThrowingInput " + nElementsBeforeError + " ex=" + ex + " test=" + test + " addInputDelays=" + addDelays);
         final NanoScheduler<Integer, Integer, Integer> nanoScheduler = test.makeScheduler();
         nanoScheduler.execute(new ErrorThrowingIterator(nElementsBeforeError, ex, addDelays), test.makeMap(), test.initReduce(), test.makeReduce());
@@ -279,9 +284,9 @@ public class NanoSchedulerUnitTest extends BaseTest {
         final int nElementsBeforeError;
         final boolean addDelays;
         int i = 0;
-        final RuntimeException ex;
+        final Throwable ex;
 
-        private ErrorThrowingIterator(final int nElementsBeforeError, RuntimeException ex, boolean addDelays) {
+        private ErrorThrowingIterator(final int nElementsBeforeError, Throwable ex, boolean addDelays) {
             this.nElementsBeforeError = nElementsBeforeError;
             this.ex = ex;
             this.addDelays = addDelays;
@@ -290,7 +295,12 @@ public class NanoSchedulerUnitTest extends BaseTest {
         @Override public boolean hasNext() { return true; }
         @Override public Integer next() {
             if ( i++ > nElementsBeforeError ) {
-                throw ex;
+                if ( ex instanceof Error )
+                    throw (Error)ex;
+                else if ( ex instanceof RuntimeException )
+                    throw (RuntimeException)ex;
+                else
+                    throw new RuntimeException("Bad exception " + ex);
             } else if ( addDelays ) {
                 maybeDelayMe(i);
                 return i;

From 465694078e5fdc8704fa12b588f1a66a0cc97783 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 4 Dec 2012 22:08:01 -0500
Subject: [PATCH 200/236] Major performance improvement to the GATK engine

-- The NanoSchedule timing code (in NSRuntimeProfile) was crazy expensive, but never showed up in the profilers.  Removed all of the timing code from the NanoScheduler, the NSRuntimeProfile itself, and updated the unit tests.
-- For tools that largely pass through data quickly, this change reduces runtimes by as much as 10x.  For the RealignerTargetCreator example, the runtime before this commit was 3 hours, and after is 30 minutes (6x improvement).
-- Took this opportunity to improve the GATK ProgressMeter.  NotifyOfProgress now just keeps track of the maximum position seen, and a separate daemon thread ProgressMeterDaemon periodically wakes up and prints the current progress.  This removes all inner loop calls to the GATK timers.
-- The history of the bug started here: http://gatkforums.broadinstitute.org/discussion/comment/2402#Comment_2402
---
 .../sting/gatk/executive/MicroScheduler.java  |  4 --
 .../broadinstitute/sting/utils/GenomeLoc.java | 10 +++
 .../utils/nanoScheduler/InputProducer.java    | 12 ----
 .../utils/nanoScheduler/NSRuntimeProfile.java | 67 -------------------
 .../utils/nanoScheduler/NanoScheduler.java    | 53 +--------------
 .../sting/utils/nanoScheduler/Reducer.java    |  9 ---
 .../utils/progressmeter/ProgressMeter.java    | 65 ++++++++++++------
 .../progressmeter/ProgressMeterDaemon.java    | 60 +++++++++++++++++
 .../nanoScheduler/InputProducerUnitTest.java  |  5 +-
 .../nanoScheduler/NanoSchedulerUnitTest.java  | 11 ---
 .../utils/nanoScheduler/ReducerUnitTest.java  |  5 +-
 11 files changed, 122 insertions(+), 179 deletions(-)
 delete mode 100644 public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java
 create mode 100644 public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java

diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
index 38170040a..8d0cefaa4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@@ -43,7 +43,6 @@ import org.broadinstitute.sting.utils.AutoFormattingTime;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.broadinstitute.sting.utils.nanoScheduler.NanoScheduler;
 import org.broadinstitute.sting.utils.progressmeter.ProgressMeter;
 import org.broadinstitute.sting.utils.threading.ThreadEfficiencyMonitor;
 
@@ -346,9 +345,6 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
         for ( final TraversalEngine te : allCreatedTraversalEngines)
             te.shutdown();
 
-        // horrible hack to print nano scheduling information across all nano schedulers, if any were used
-        NanoScheduler.printCombinedRuntimeProfile();
-
         allCreatedTraversalEngines.clear();
         availableTraversalEngines.clear();
     }
diff --git a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
index 4d2c26a79..ec82cdef2 100644
--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLoc.java
@@ -495,4 +495,14 @@ public class GenomeLoc implements Comparable<GenomeLoc>, Serializable, HasGenome
     public long sizeOfOverlap( final GenomeLoc that ) {
         return ( this.overlapsP(that) ? Math.min( getStop(), that.getStop() ) - Math.max( getStart(), that.getStart() ) + 1L : 0L );
     }
+
+    /**
+     * Returns the maximum GenomeLoc of this and other
+     * @param other another non-null genome loc
+     * @return the max of this and other
+     */
+    public GenomeLoc max(final GenomeLoc other) {
+        final int cmp = this.compareTo(other);
+        return cmp == -1 ? other : this;
+    }
 }
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
index 45c7c5096..0e0237412 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/InputProducer.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
 
 import java.util.Iterator;
 import java.util.concurrent.BlockingQueue;
@@ -19,11 +18,6 @@ class InputProducer<InputType> implements Runnable {
      */
     final Iterator<InputType> inputReader;
 
-    /**
-     * Our timer (may be null) that we use to track our input costs
-     */
-    final SimpleTimer inputTimer;
-
     /**
      * Where we put our input values for consumption
      */
@@ -51,16 +45,13 @@ class InputProducer<InputType> implements Runnable {
 
     public InputProducer(final Iterator<InputType> inputReader,
                          final MultiThreadedErrorTracker errorTracker,
-                         final SimpleTimer inputTimer,
                          final BlockingQueue<InputValue> outputQueue) {
         if ( inputReader == null ) throw new IllegalArgumentException("inputReader cannot be null");
         if ( errorTracker == null ) throw new IllegalArgumentException("errorTracker cannot be null");
-        if ( inputTimer == null ) throw new IllegalArgumentException("inputTimer cannot be null");
         if ( outputQueue == null ) throw new IllegalArgumentException("OutputQueue cannot be null");
 
         this.inputReader = inputReader;
         this.errorTracker = errorTracker;
-        this.inputTimer = inputTimer;
         this.outputQueue = outputQueue;
     }
 
@@ -94,18 +85,15 @@ class InputProducer<InputType> implements Runnable {
      * @throws InterruptedException
      */
     private synchronized InputType readNextItem() throws InterruptedException {
-        inputTimer.restart();
         if ( ! inputReader.hasNext() ) {
             // we are done, mark ourselves as such and return null
             readLastValue = true;
-            inputTimer.stop();
             return null;
         } else {
             // get the next value, and return it
             final InputType input = inputReader.next();
             if ( input == null )
                 throw new IllegalStateException("inputReader.next() returned a null value, breaking our contract");
-            inputTimer.stop();
             nRead++;
             return input;
         }
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java
deleted file mode 100644
index 0926b4c50..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NSRuntimeProfile.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package org.broadinstitute.sting.utils.nanoScheduler;
-
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.AutoFormattingTime;
-import org.broadinstitute.sting.utils.SimpleTimer;
-
-/**
- * Holds runtime profile (input, read, map) times as tracked by NanoScheduler
- *
- * User: depristo
- * Date: 9/10/12
- * Time: 8:31 PM
- */
-public class NSRuntimeProfile {
-    final SimpleTimer outsideSchedulerTimer = new SimpleTimer("outside");
-    final SimpleTimer inputTimer = new SimpleTimer("input");
-    final SimpleTimer mapTimer = new SimpleTimer("map");
-    final SimpleTimer reduceTimer = new SimpleTimer("reduce");
-
-    /**
-     * Combine the elapsed time information from other with this profile
-     *
-     * @param other a non-null profile
-     */
-    public void combine(final NSRuntimeProfile other) {
-        outsideSchedulerTimer.addElapsed(other.outsideSchedulerTimer);
-        inputTimer.addElapsed(other.inputTimer);
-        mapTimer.addElapsed(other.mapTimer);
-        reduceTimer.addElapsed(other.reduceTimer);
-    }
-
-    /**
-     * Print the runtime profiling to logger
-     *
-     * @param logger
-     */
-    public void log(final Logger logger) {
-        log1(logger, "Input   time", inputTimer);
-        log1(logger, "Map     time", mapTimer);
-        log1(logger, "Reduce  time", reduceTimer);
-        log1(logger, "Outside time", outsideSchedulerTimer);
-    }
-
-    /**
-     * @return the total runtime for all functions of this nano scheduler
-     */
-    //@Ensures("result >= 0.0")
-    public double totalRuntimeInSeconds() {
-        return inputTimer.getElapsedTime()
-                + mapTimer.getElapsedTime()
-                + reduceTimer.getElapsedTime()
-                + outsideSchedulerTimer.getElapsedTime();
-    }
-
-    /**
-     * Print to logger.info timing information from timer, with name label
-     *
-     * @param label the name of the timer to display.  Should be human readable
-     * @param timer the timer whose elapsed time we will display
-     */
-    //@Requires({"label != null", "timer != null"})
-    private void log1(final Logger logger, final String label, final SimpleTimer timer) {
-        final double myTimeInSec = timer.getElapsedTime();
-        final double myTimePercent = myTimeInSec / totalRuntimeInSeconds() * 100;
-        logger.info(String.format("%s: %s (%5.2f%%)", label, new AutoFormattingTime(myTimeInSec), myTimePercent));
-    }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
index 6d769c2cf..4cc91faa4 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/NanoScheduler.java
@@ -57,16 +57,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
     boolean debug = false;
     private NSProgressFunction<InputType> progressFunction = null;
 
-    /**
-     * Tracks the combined runtime profiles across all created nano schedulers
-     */
-    final static private NSRuntimeProfile combinedNSRuntimeProfiler = new NSRuntimeProfile();
-
-    /**
-     * The profile specific to this nano scheduler
-     */
-    final private NSRuntimeProfile myNSRuntimeProfile = new NSRuntimeProfile();
-
     /**
      * Create a new nanoscheduler with the desire characteristics requested by the argument
      *
@@ -94,9 +84,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
             this.inputExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-input-thread-%d"));
             this.masterExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory("NS-master-thread-%d"));
         }
-
-        // start timing the time spent outside of the nanoScheduler
-        myNSRuntimeProfile.outsideSchedulerTimer.start();
     }
 
     /**
@@ -123,11 +110,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
      * After this call, execute cannot be invoked without throwing an error
      */
     public void shutdown() {
-        myNSRuntimeProfile.outsideSchedulerTimer.stop();
-
-        // add my timing information to the combined NS runtime profile
-        combinedNSRuntimeProfiler.combine(myNSRuntimeProfile);
-
         if ( nThreads > 1 ) {
             shutdownExecutor("inputExecutor", inputExecutor);
             shutdownExecutor("mapExecutor", mapExecutor);
@@ -137,19 +119,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         shutdown = true;
     }
 
-    public void printRuntimeProfile() {
-        myNSRuntimeProfile.log(logger);
-    }
-
-    public static void printCombinedRuntimeProfile() {
-        if ( combinedNSRuntimeProfiler.totalRuntimeInSeconds() > 0.1 )
-            combinedNSRuntimeProfiler.log(logger);
-    }
-
-    protected double getTotalRuntime() {
-        return myNSRuntimeProfile.totalRuntimeInSeconds();
-    }
-
     /**
      * Helper function to cleanly shutdown an execution service, checking that the execution
      * state is clean when it's done.
@@ -245,8 +214,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
         if ( map == null ) throw new IllegalArgumentException("map function cannot be null");
         if ( reduce == null ) throw new IllegalArgumentException("reduce function cannot be null");
 
-        myNSRuntimeProfile.outsideSchedulerTimer.stop();
-
         ReduceType result;
         if ( ALLOW_SINGLE_THREAD_FASTPATH && getnThreads() == 1 ) {
             result = executeSingleThreaded(inputReader, map, initialValue, reduce);
@@ -254,7 +221,6 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
             result = executeMultiThreaded(inputReader, map, initialValue, reduce);
         }
 
-        myNSRuntimeProfile.outsideSchedulerTimer.restart();
         return result;
     }
 
@@ -273,28 +239,19 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
         while ( true ) {
             // start timer to ensure that both hasNext and next are caught by the timer
-            myNSRuntimeProfile.inputTimer.restart();
             if ( ! inputReader.hasNext() ) {
-                myNSRuntimeProfile.inputTimer.stop();
                 break;
             } else {
                 final InputType input = inputReader.next();
-                myNSRuntimeProfile.inputTimer.stop();
 
                 // map
-                myNSRuntimeProfile.mapTimer.restart();
-                final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
                 final MapType mapValue = map.apply(input);
-                if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
-                myNSRuntimeProfile.mapTimer.stop();
 
-                if ( i++ % this.bufferSize == 0 && progressFunction != null )
+                if ( progressFunction != null )
                     progressFunction.progress(input);
 
                 // reduce
-                myNSRuntimeProfile.reduceTimer.restart();
                 sum = reduce.apply(mapValue, sum);
-                myNSRuntimeProfile.reduceTimer.stop();
             }
         }
 
@@ -401,7 +358,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
 
             // Create the input producer and start it running
             final InputProducer<InputType> inputProducer =
-                    new InputProducer<InputType>(inputReader, errorTracker, myNSRuntimeProfile.inputTimer, inputQueue);
+                    new InputProducer<InputType>(inputReader, errorTracker, inputQueue);
             inputExecutor.submit(inputProducer);
 
             // a priority queue that stores up to bufferSize elements
@@ -410,7 +367,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                     new PriorityBlockingQueue<MapResult<MapType>>();
 
             final Reducer<MapType, ReduceType> reducer
-                    = new Reducer<MapType, ReduceType>(reduce, errorTracker, myNSRuntimeProfile.reduceTimer, initialValue);
+                    = new Reducer<MapType, ReduceType>(reduce, errorTracker, initialValue);
 
             try {
                 int nSubmittedJobs = 0;
@@ -508,11 +465,7 @@ public class NanoScheduler<InputType, MapType, ReduceType> {
                     final InputType input = inputWrapper.getValue();
 
                     // map
-                    myNSRuntimeProfile.mapTimer.restart();
-                    final long preMapTime = LOG_MAP_TIMES ? 0 : myNSRuntimeProfile.mapTimer.currentTimeNano();
                     final MapType mapValue = map.apply(input);
-                    if ( LOG_MAP_TIMES ) logger.info("MAP TIME " + (myNSRuntimeProfile.mapTimer.currentTimeNano() - preMapTime));
-                    myNSRuntimeProfile.mapTimer.stop();
 
                     // enqueue the result into the mapResultQueue
                     result = new MapResult<MapType>(mapValue, jobID);
diff --git a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
index 92c1018eb..5cae28187 100644
--- a/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
+++ b/public/java/src/org/broadinstitute/sting/utils/nanoScheduler/Reducer.java
@@ -4,7 +4,6 @@ import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
 
 import java.util.concurrent.CountDownLatch;
 import java.util.concurrent.PriorityBlockingQueue;
@@ -34,7 +33,6 @@ class Reducer<MapType, ReduceType> {
 
     final CountDownLatch countDownLatch = new CountDownLatch(1);
     final NSReduceFunction<MapType, ReduceType> reduce;
-    final SimpleTimer reduceTimer;
     final MultiThreadedErrorTracker errorTracker;
 
     /**
@@ -61,20 +59,16 @@ class Reducer<MapType, ReduceType> {
      * reduceTimer
      *
      * @param reduce the reduce function to apply
-     * @param reduceTimer the timer to time the reduce function call
      * @param initialSum the initial reduce sum
      */
     public Reducer(final NSReduceFunction<MapType, ReduceType> reduce,
                    final MultiThreadedErrorTracker errorTracker,
-                   final SimpleTimer reduceTimer,
                    final ReduceType initialSum) {
         if ( errorTracker == null ) throw new IllegalArgumentException("Error tracker cannot be null");
         if ( reduce == null ) throw new IllegalArgumentException("Reduce function cannot be null");
-        if ( reduceTimer == null ) throw new IllegalArgumentException("reduceTimer cannot be null");
 
         this.errorTracker = errorTracker;
         this.reduce = reduce;
-        this.reduceTimer = reduceTimer;
         this.sum = initialSum;
     }
 
@@ -125,10 +119,7 @@ class Reducer<MapType, ReduceType> {
                     nReducesNow++;
 
                     // apply reduce, keeping track of sum
-                    reduceTimer.restart();
                     sum = reduce.apply(result.getValue(), sum);
-                    reduceTimer.stop();
-
                 }
 
                 numJobsReduced++;
diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
index a8715e242..b69283b9d 100755
--- a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeter.java
@@ -26,6 +26,7 @@ package org.broadinstitute.sting.utils.progressmeter;
 
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Invariant;
+import com.google.java.contract.Requires;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.*;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@@ -143,6 +144,12 @@ public class ProgressMeter {
     /** We use the SimpleTimer to time our run */
     private final SimpleTimer timer = new SimpleTimer();
 
+    private GenomeLoc maxGenomeLoc = null;
+    private String positionMessage = "starting";
+    private long nTotalRecordsProcessed = 0;
+
+    final ProgressMeterDaemon progressMeterDaemon;
+
     /**
      * Create a new ProgressMeter
      *
@@ -177,21 +184,15 @@ public class ProgressMeter {
         targetSizeInBP = processingIntervals.coveredSize();
 
         // start up the timer
+        progressMeterDaemon = new ProgressMeterDaemon(this);
         start();
     }
 
     /**
-     * Forward request to notifyOfProgress
-     *
-     * Assumes that one cycle has been completed
-     *
-     * @param loc  our current location.  Null means "in unmapped reads"
-     * @param nTotalRecordsProcessed the total number of records we've processed
+     * Start up the progress meter, printing initialization message and starting up the
+     * daemon thread for periodic printing.
      */
-    public void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
-        notifyOfProgress(loc, false, nTotalRecordsProcessed);
-    }
-
+    @Requires("progressMeterDaemon != null")
     private synchronized void start() {
         timer.start();
         lastProgressPrintTime = timer.currentTime();
@@ -199,6 +200,8 @@ public class ProgressMeter {
         logger.info("[INITIALIZATION COMPLETE; STARTING PROCESSING]");
         logger.info(String.format("%15s processed.%s  runtime per.1M.%s completed total.runtime remaining",
                 "Location", processingUnitName, processingUnitName));
+
+        progressMeterDaemon.start();
     }
 
     /**
@@ -216,19 +219,41 @@ public class ProgressMeter {
      * Synchronized to ensure that even with multiple threads calling notifyOfProgress we still
      * get one clean stream of meter logs.
      *
+     * Note this thread doesn't actually print progress, unless must print is true, but just registers
+     * the progress itself.  A separate printing daemon periodically polls the meter to print out
+     * progress
+     *
      * @param loc       Current location, can be null if you are at the end of the processing unit
-     * @param mustPrint If true, will print out info, regardless of time interval
      * @param nTotalRecordsProcessed the total number of records we've processed
      */
-    private synchronized void notifyOfProgress(final GenomeLoc loc, boolean mustPrint, final long nTotalRecordsProcessed) {
+    public synchronized void notifyOfProgress(final GenomeLoc loc, final long nTotalRecordsProcessed) {
         if ( nTotalRecordsProcessed < 0 ) throw new IllegalArgumentException("nTotalRecordsProcessed must be >= 0");
 
+        // weird comparison to ensure that loc == null (in unmapped reads) is keep before maxGenomeLoc == null (on startup)
+        this.maxGenomeLoc = loc == null ? loc : (maxGenomeLoc == null ? loc : loc.max(maxGenomeLoc));
+        this.nTotalRecordsProcessed = Math.max(this.nTotalRecordsProcessed, nTotalRecordsProcessed);
+
+        // a pretty name for our position
+        this.positionMessage = maxGenomeLoc == null
+                ? "unmapped reads"
+                : String.format("%s:%d", maxGenomeLoc.getContig(), maxGenomeLoc.getStart());
+    }
+
+    /**
+     * Actually try to print out progress
+     *
+     * This function may print out if the progress print is due, but if not enough time has elapsed
+     * since the last print we will not print out information.
+     *
+     * @param mustPrint if true, progress will be printed regardless of the last time we printed progress
+     */
+    protected synchronized void printProgress(final boolean mustPrint) {
         final long curTime = timer.currentTime();
         final boolean printProgress = mustPrint || maxElapsedIntervalForPrinting(curTime, lastProgressPrintTime, progressPrintFrequency);
         final boolean printLog = performanceLog != null && maxElapsedIntervalForPrinting(curTime, lastPerformanceLogPrintTime, PERFORMANCE_LOG_PRINT_FREQUENCY);
 
         if ( printProgress || printLog ) {
-            final ProgressMeterData progressData = takeProgressSnapshot(loc, nTotalRecordsProcessed);
+            final ProgressMeterData progressData = takeProgressSnapshot(maxGenomeLoc, nTotalRecordsProcessed);
 
             final AutoFormattingTime elapsed = new AutoFormattingTime(progressData.getElapsedSeconds());
             final AutoFormattingTime bpRate = new AutoFormattingTime(progressData.secondsPerMillionBP());
@@ -241,13 +266,8 @@ public class ProgressMeter {
                 lastProgressPrintTime = curTime;
                 updateLoggerPrintFrequency(estTotalRuntime.getTimeInSeconds());
 
-                // a pretty name for our position
-                final String posName = loc == null
-                        ? (mustPrint ? "done" : "unmapped reads")
-                        : String.format("%s:%d", loc.getContig(), loc.getStart());
-
                 logger.info(String.format("%15s        %5.2e %s     %s    %5.1f%%      %s  %s",
-                        posName, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
+                        positionMessage, progressData.getUnitsProcessed()*1.0, elapsed, unitRate,
                         100*fractionGenomeTargetCompleted, estTotalRuntime, timeToCompletion));
 
             }
@@ -296,13 +316,18 @@ public class ProgressMeter {
      */
     public void notifyDone(final long nTotalRecordsProcessed) {
         // print out the progress meter
-        notifyOfProgress(null, true, nTotalRecordsProcessed);
+        this.nTotalRecordsProcessed = nTotalRecordsProcessed;
+        this.positionMessage = "done";
+        printProgress(true);
 
         logger.info(String.format("Total runtime %.2f secs, %.2f min, %.2f hours",
                 timer.getElapsedTime(), timer.getElapsedTime() / 60, timer.getElapsedTime() / 3600));
 
         if ( performanceLog != null )
             performanceLog.close();
+
+        // shutdown our daemon thread
+        progressMeterDaemon.done();
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java
new file mode 100644
index 000000000..16887400a
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/utils/progressmeter/ProgressMeterDaemon.java
@@ -0,0 +1,60 @@
+package org.broadinstitute.sting.utils.progressmeter;
+
+/**
+ * Daemon thread that periodically prints the progress of the progress meter
+ *
+ * User: depristo
+ * Date: 12/4/12
+ * Time: 9:16 PM
+ */
+public final class ProgressMeterDaemon extends Thread {
+    /**
+     * How frequently should we poll and print progress?
+     */
+    private final static long POLL_FREQUENCY_MILLISECONDS = 10 * 1000;
+
+    /**
+     * Are we to continue periodically printing status, or should we shut down?
+     */
+    boolean done = false;
+
+    /**
+     * The meter we will call print on
+     */
+    final ProgressMeter meter;
+
+    /**
+     * Create a new ProgressMeterDaemon printing progress for meter
+     * @param meter the progress meter to print progress of
+     */
+    public ProgressMeterDaemon(final ProgressMeter meter) {
+        if ( meter == null ) throw new IllegalArgumentException("meter cannot be null");
+        this.meter = meter;
+        setDaemon(true);
+        setName("ProgressMeterDaemon");
+    }
+
+    /**
+     * Tells this daemon thread to shutdown at the next opportunity, as the progress
+     * metering is complete.
+     */
+    public final void done() {
+        this.done = true;
+    }
+
+    /**
+     * Start up the ProgressMeterDaemon, polling every tens of seconds to print, if
+     * necessary, the provided progress meter.  Never exits until the JVM is complete,
+     * or done() is called, as the thread is a daemon thread
+     */
+    public void run() {
+        while (! done) {
+            meter.printProgress(false);
+            try {
+                Thread.sleep(POLL_FREQUENCY_MILLISECONDS);
+            } catch (InterruptedException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+}
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java
index 6c59f1585..489adab6b 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/InputProducerUnitTest.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
@@ -46,7 +45,7 @@ public class InputProducerUnitTest extends BaseTest {
         final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
                 new LinkedBlockingDeque<InputProducer<Integer>.InputValue>(queueSize);
 
-        final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), new SimpleTimer(), readQueue);
+        final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), readQueue);
 
         final ExecutorService es = Executors.newSingleThreadExecutor();
 
@@ -94,7 +93,7 @@ public class InputProducerUnitTest extends BaseTest {
         final LinkedBlockingDeque<InputProducer<Integer>.InputValue> readQueue =
                 new LinkedBlockingDeque<InputProducer<Integer>.InputValue>();
 
-        final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), new SimpleTimer(), readQueue);
+        final InputProducer<Integer> ip = new InputProducer<Integer>(elements.iterator(), new MultiThreadedErrorTracker(), readQueue);
 
         final ExecutorService es = Executors.newSingleThreadExecutor();
         es.submit(ip);
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
index d415b8b4c..61e8ec0a1 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/NanoSchedulerUnitTest.java
@@ -188,17 +188,6 @@ public class NanoSchedulerUnitTest extends BaseTest {
 
         Assert.assertTrue(callback.callBacks >= test.nExpectedCallbacks(), "Not enough callbacks detected.  Expected at least " + test.nExpectedCallbacks() + " but saw only " + callback.callBacks);
         nanoScheduler.shutdown();
-
-        // TODO -- need to enable only in the case where there's serious time spend in
-        // TODO -- read /map / reduce, otherwise the "outside" timer doesn't add up
-        final double myTimeEstimate = timer.getElapsedTime();
-        final double tolerance = 0.1;
-        if ( false && myTimeEstimate > 0.1 ) {
-            Assert.assertTrue(nanoScheduler.getTotalRuntime() > myTimeEstimate * tolerance,
-                    "NanoScheduler said that the total runtime was " + nanoScheduler.getTotalRuntime()
-                            + " but the overall test time was " + myTimeEstimate + ", beyond our tolerance factor of "
-                            + tolerance);
-        }
     }
 
     @Test(enabled = true && ! DEBUG, dataProvider = "NanoSchedulerBasicTest", dependsOnMethods = "testMultiThreadedNanoScheduler", timeOut = NANO_SCHEDULE_MAX_RUNTIME)
diff --git a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java
index 39133d1ed..6c17aa78d 100644
--- a/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/nanoScheduler/ReducerUnitTest.java
@@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils.nanoScheduler;
 
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.MultiThreadedErrorTracker;
-import org.broadinstitute.sting.utils.SimpleTimer;
 import org.broadinstitute.sting.utils.Utils;
 import org.testng.Assert;
 import org.testng.annotations.DataProvider;
@@ -93,7 +92,7 @@ public class ReducerUnitTest extends BaseTest {
 
         final List<List<MapResult<Integer>>> jobGroups = Utils.groupList(allJobs, groupSize);
         final ReduceSumTest reduce = new ReduceSumTest();
-        final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(reduce, new MultiThreadedErrorTracker(), new SimpleTimer(), 0);
+        final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(reduce, new MultiThreadedErrorTracker(), 0);
 
         final TestWaitingForFinalReduce waitingThread = new TestWaitingForFinalReduce(reducer, expectedSum(allJobs));
         final ExecutorService es = Executors.newSingleThreadExecutor();
@@ -155,7 +154,7 @@ public class ReducerUnitTest extends BaseTest {
     private void runSettingJobIDTwice() throws Exception {
         final PriorityBlockingQueue<MapResult<Integer>> mapResultsQueue = new PriorityBlockingQueue<MapResult<Integer>>();
 
-        final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(new ReduceSumTest(), new MultiThreadedErrorTracker(), new SimpleTimer(), 0);
+        final Reducer<Integer, Integer> reducer = new Reducer<Integer, Integer>(new ReduceSumTest(), new MultiThreadedErrorTracker(), 0);
 
         reducer.setTotalJobCount(10);
         reducer.setTotalJobCount(15);

From d0cab795b7784bca88e5543d24b85e760ef60549 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Dec 2012 14:49:01 -0500
Subject: [PATCH 201/236] Got caught in the middle of a bad integration test,
 that was fixed in independent push.  Moved test bam into testdata.

---
 .../gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 9e9c7e37e..7459d131b 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -436,7 +436,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     @Test
     public void testNsInCigar() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
-                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + validationDataLocation + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
+                "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
                 Arrays.asList("32f18ba50406cd8c8069ba07f2f89558"));
         executeTest("test calling on reads with Ns in CIGAR", spec);
     }

From 234ff645566cce18307dc4a8d2588bb4ce9ea249 Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Wed, 5 Dec 2012 15:52:57 -0500
Subject: [PATCH 202/236] Changes to AssessNA12878 to allow for 100s of input
 callsets to assess against the database.

---
 .../src/org/broadinstitute/sting/gatk/report/GATKReport.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 7ae2bb453..605a6680f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -343,7 +343,7 @@ public class GATKReport {
 
         GATKReportTable table = tables.firstEntry().getValue();
         if ( table.getNumColumns() != values.length )
-            throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
+            throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
 
         final int rowIndex = table.getNumRows();
         for ( int i = 0; i < values.length; i++ )

From dbf721968d8196f4ea425c27b74fb013db64b199 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 5 Dec 2012 21:35:27 -0500
Subject: [PATCH 203/236] PrintReads large-scale test to protect against
 another major low-level performance issue

---
 .../walkers/PrintReadsLargeScaleTest.java     | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100755 public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsLargeScaleTest.java

diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsLargeScaleTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsLargeScaleTest.java
new file mode 100755
index 000000000..ad7ac56f9
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/PrintReadsLargeScaleTest.java
@@ -0,0 +1,20 @@
+package org.broadinstitute.sting.gatk.walkers;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+
+public class PrintReadsLargeScaleTest extends WalkerTest {
+    @Test( timeOut = 1000 * 60 * 60 * 20 ) // 60 seconds * 60 seconds / minute * 20 minutes
+    public void testRealignerTargetCreator() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                "-R " + b37KGReference +
+                        " -T PrintReads" +
+                        " -I " + evaluationDataLocation + "CEUTrio.HiSeq.WEx.b37.NA12892.clean.dedup.recal.1.bam" +
+                        " -o /dev/null",
+                 0,
+                new ArrayList<String>(0));
+        executeTest("testPrintReadsWholeExomeChr1", spec);
+    }
+}

From 406adb8d44079b6f2928f0432eaf792a764b3eb6 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 5 Dec 2012 23:15:36 -0500
Subject: [PATCH 204/236] The allele biased downsampling should not abort if
 there's a reduced read.  Rather it should always keep the RR and downsample
 only original reads in the pileup.

---
 .../gatk/downsampling/AlleleBiasedDownsamplingUtils.java  | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
index a61614481..94f6ff649 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/downsampling/AlleleBiasedDownsamplingUtils.java
@@ -56,11 +56,14 @@ public class AlleleBiasedDownsamplingUtils {
         for ( int i = 0; i < 4; i++ )
             alleleStratifiedElements[i] = new ArrayList<PileupElement>();
 
+        // keep all of the reduced reads
+        final ArrayList<PileupElement> reducedReadPileups = new ArrayList<PileupElement>();
+
         // start by stratifying the reads by the alleles they represent at this position
         for( final PileupElement pe : pileup ) {
-            // abort if we have a reduced read - we do not want to remove it!
+            // we do not want to remove a reduced read
             if ( pe.getRead().isReducedRead() )
-                return pileup;
+                reducedReadPileups.add(pe);
 
             final int baseIndex = BaseUtils.simpleBaseToBaseIndex(pe.getBase());
             if ( baseIndex != -1 )
@@ -76,6 +79,7 @@ public class AlleleBiasedDownsamplingUtils {
                 return difference != 0 ? difference : element1.getRead().getReadName().compareTo(element2.getRead().getReadName());
             }
         });
+        elementsToKeep.addAll(reducedReadPileups);
 
         // make a listing of allele counts
         final int[] alleleCounts = new int[4];

From cc4e3ec58927014f95758ed28d20b1715ec23276 Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Wed, 5 Dec 2012 12:45:02 -0500
Subject: [PATCH 206/236] Update TODO list

---
 .../traversals/TraverseActiveRegionsTest.java     | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index a65b0cb45..b08210ea6 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -109,6 +109,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         dictionary = reference.getSequenceDictionary();
         genomeLocParser = new GenomeLocParser(dictionary);
 
+        // TODO: test shard boundaries
+
         intervals = new ArrayList<GenomeLoc>();
         intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
         intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
@@ -139,8 +141,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
         // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
         verifyEqualIntervals(intervals, activeIntervals);
-
-        // TODO: more tests and edge cases
     }
 
     private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
@@ -171,8 +171,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
         verifyActiveRegionCoverage(intervals, activeRegions);
-
-        // TODO: more tests and edge cases
     }
 
     private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
@@ -279,8 +277,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
-
-        // TODO: more tests and edge cases
     }
 
     @Test
@@ -348,8 +344,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
-
-        // TODO: more tests and edge cases
     }
 
     @Test
@@ -418,8 +412,11 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
         getRead(region, "simple20");
+    }
 
-        // TODO: more tests and edge cases
+    @Test
+    public void testUnmappedReads() {
+        // TODO
     }
 
     private void verifyReadNotPlaced(ActiveRegion region, String readName) {

From 3b0e3767bfbe4de06139f5de5dbd37519775983d Mon Sep 17 00:00:00 2001
From: Joel Thibault <thibault@broadinstitute.org>
Date: Fri, 7 Dec 2012 13:46:41 -0500
Subject: [PATCH 208/236] Add a test for a read that extends off the end of
 chr1

---
 .../traversals/TraverseActiveRegionsTest.java | 54 +++++++++++++++++++
 1 file changed, 54 insertions(+)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index b08210ea6..69907d485 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -117,6 +117,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+        intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
         intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
         intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
 
@@ -128,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
         reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
+        reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
         reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
 
         // required by LocusIteratorByState, and I prefer to list them in test case order above
@@ -229,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -243,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -254,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -265,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -276,6 +295,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
     }
 
@@ -296,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -310,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -321,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -332,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -343,6 +379,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
     }
 
@@ -364,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -378,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -389,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -400,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -411,6 +464,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
     }
 

From bf8421eeb72c3c52f4fe595b90bd725c4d20a08b Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Fri, 7 Dec 2012 16:02:58 -0500
Subject: [PATCH 210/236] Fixes GSA-671 / AFCalcResult.log10pNonRefByAllele
 should really be log10pRefByAllele

-- The current implementation of AFCalcResult contains a map from allele -> log10pNonRef. The only use of this field is to support the isPolymorphic function per allele. The call to this function looks like isPolymorphic(allele, QUAL). The QUAL is a phred-scaled threshold where you want to include alleles where the log10pNonRef >= QUAL (appropriately transformed). The problem is that when log10pNonRef is large, it quickly gets set to 0, while it's complementary log10pRef value has a meaningful log10 value. For example, if log10pRef = -100 (not an uncommonly large value), log10pNonRef = 0.0.
-- In order to preserve precision and allow us to more finally differentiate high QUAL from low QUAL (but still poly) sites we should store log10pRef values instead, and test that log10pRef <= threshold.
-- See https://jira.broadinstitute.org/browse/GSA-671 for more information.
---
 .../afcalc/AFCalcPerformanceTest.java         |  4 +-
 .../afcalc/AFCalcResultUnitTest.java          |  6 +--
 .../genotyper/afcalc/AFCalcUnitTest.java      |  2 +-
 ...dentAllelesDiploidExactAFCalcUnitTest.java |  2 +-
 .../genotyper/afcalc/AFCalcResult.java        | 38 +++++++++++--------
 .../genotyper/afcalc/ExactCallLogger.java     | 10 ++---
 .../IndependentAllelesDiploidExactAFCalc.java | 10 ++---
 .../afcalc/OriginalDiploidExactAFCalc.java    |  6 +--
 .../genotyper/afcalc/StateTracker.java        |  8 ++--
 9 files changed, 47 insertions(+), 39 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
index e9ed6b153..0a3512aa6 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
@@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
                 if ( a.isNonReference() ) {
                     final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
                     logger.info("\t\t   MLE       " + a + ":            " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
-                    final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
-                    logger.info("\t\t   Posterior " + a + ":            " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
+                    final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
+                    logger.info("\t\t   Posterior " + a + ":            " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
                 }
             }
         }
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
index 96e055e92..016926e12 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
         List<Object[]> tests = new ArrayList<Object[]>();
 
         final List<Double> pValues = new LinkedList<Double>();
-        for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
-            for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
+        for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
+            for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
                 pValues.add(p + espilon);
 
         for ( final double pNonRef : pValues  ) {
@@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest {
                 alleles,
                 MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
                 log10Even,
-                Collections.singletonMap(C, Math.log10(pNonRef)));
+                Collections.singletonMap(C, Math.log10(1 - pNonRef)));
     }
 
     @Test(enabled = true, dataProvider = "TestIsPolymorphic")
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
index 2d346e548..7ee909fe0 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
@@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
 
             // must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
             Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
-            Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
+            Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
         }
     }
 }
\ No newline at end of file
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
index 391c99990..663471106 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
@@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
         for ( int i = 0; i < log10LAlleles.size(); i++ ) {
             final double log10LAllele1 = log10LAlleles.get(i);
             final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
-            final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
+            final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
             originalPriors.add(result1);
             pNonRefN.add(log10pNonRef*(i+1));
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
index dbb0e8cdd..d6a5cb16d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
@@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 
@@ -52,7 +51,7 @@ public class AFCalcResult {
     private final double[] log10PriorsOfAC;
     private final double[] log10PosteriorsOfAC;
 
-    private final Map<Allele, Double> log10pNonRefByAllele;
+    private final Map<Allele, Double> log10pRefByAllele;
 
     /**
      * The AC values for all ALT alleles at the MLE
@@ -74,16 +73,16 @@ public class AFCalcResult {
                         final List<Allele> allelesUsedInGenotyping,
                         final double[] log10LikelihoodsOfAC,
                         final double[] log10PriorsOfAC,
-                        final Map<Allele, Double> log10pNonRefByAllele) {
+                        final Map<Allele, Double> log10pRefByAllele) {
         if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
         if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
         if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
         if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
         if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
         if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
-        if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
-        if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
-        if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
+        if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
+        if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
+        if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
         if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
         if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
 
@@ -94,7 +93,7 @@ public class AFCalcResult {
         this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
         this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
         this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
-        this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
+        this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
     }
 
     /**
@@ -104,7 +103,7 @@ public class AFCalcResult {
      * @return
      */
     public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
-        return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
+        return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
     }
 
     /**
@@ -219,7 +218,7 @@ public class AFCalcResult {
     public String toString() {
         final List<String> byAllele = new LinkedList<String>();
         for ( final Allele a : getAllelesUsedInGenotyping() )
-            if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
+            if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
         return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
     }
 
@@ -237,7 +236,7 @@ public class AFCalcResult {
      */
     @Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
     public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
-        return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
+        return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
     }
 
     /**
@@ -245,7 +244,7 @@ public class AFCalcResult {
      */
     public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
         if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
-        final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
+        final double log10Threshold = minPNonRefPhredScaledQual / -10;
         return isPolymorphic(allele, log10Threshold);
     }
 
@@ -263,7 +262,16 @@ public class AFCalcResult {
     }
 
     /**
-     * Returns the log10 probability that allele is segregating
+     * Returns the log10 probability that allele is not segregating
+     *
+     * Note that this function is p not segregating so that we can store
+     * internally the log10 value of AF == 0, which grows very quickly
+     * negative and yet has sufficient resolution for high confidence tests.
+     * For example, if log10pRef == -100, not an unreasonably high number,
+     * if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
+     * quickly underflows to 1.  So the logic here is backward from what
+     * you really want (the p of segregating) but we do that for numerical
+     * reasons
      *
      * Unlike the sites-level annotation, this calculation is specific to allele, and can be
      * used to separately determine how much evidence there is that allele is independently
@@ -272,11 +280,11 @@ public class AFCalcResult {
      * evidence for one allele but not so much for any other allele
      *
      * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
-     * @return the log10 probability that allele is segregating at this site
+     * @return the log10 probability that allele is not segregating at this site
      */
     @Ensures("MathUtils.goodLog10Probability(result)")
-    public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
-        final Double log10pNonRef = log10pNonRefByAllele.get(allele);
+    public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
+        final Double log10pNonRef = log10pRefByAllele.get(allele);
         if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
         return log10pNonRef;
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
index f13fe4429..b138ddf70 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
@@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
         for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
             if ( allele.isNonReference() ) {
                 printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
-                printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
+                printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
             }
         }
 
@@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
             final double[] posteriors = new double[2];
             final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
             final List<Integer> mle = new ArrayList<Integer>();
-            final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
+            final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
             long runtimeNano = -1;
 
             GenomeLoc currentLoc = null;
@@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
                         builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
                         builder.genotypes(genotypes);
                         final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
-                        final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
+                        final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
                         calls.add(new ExactCall(builder.make(), runtimeNano, result));
                     }
                     break;
@@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
                     posteriors[1] = Double.valueOf(value);
                 } else if (variable.equals("MLE")) {
                     mle.add(Integer.valueOf(value));
-                } else if (variable.equals("pNonRefByAllele")) {
+                } else if (variable.equals("pRefByAllele")) {
                     final Allele a = Allele.create(key);
-                    log10pNonRefByAllele.put(a, Double.valueOf(value));
+                    log10pRefByAllele.put(a, Double.valueOf(value));
                 } else if (variable.equals("runtime.nano")) {
                     runtimeNano = Long.valueOf(value);
                 } else {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
index d0b801a20..937ef2ffc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
@@ -125,8 +125,8 @@ import java.util.*;
          */
         final List<AFCalcResult> supporting;
 
-        private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
-            super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
+        private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
+            super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
             this.supporting = supporting;
         }
     }
@@ -323,7 +323,7 @@ import java.util.*;
         final int nAltAlleles = sortedResultsWithThetaNPriors.size();
         final int[] alleleCountsOfMLE = new int[nAltAlleles];
         final double[] log10PriorsOfAC = new double[2];
-        final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
+        final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
 
         // the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
         double log10PosteriorOfACEq0Sum = 0.0;
@@ -348,7 +348,7 @@ import java.util.*;
             log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
 
             // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
-            log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
+            log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
 
             // trivial -- update the number of evaluations
             nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
@@ -384,6 +384,6 @@ import java.util.*;
                 MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
                 // priors incorporate multiple alt alleles, must be normalized
                 MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
-                log10pNonRefByAllele, sortedResultsWithThetaNPriors);
+                log10pRefByAllele, sortedResultsWithThetaNPriors);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
index fc26111e0..67cc79646 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
@@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
         final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
         final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
 
-        final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
-        final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
+        final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
+        final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
 
         return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
                 MathUtils.normalizeFromLog10(log10Likelihoods, true),
                 MathUtils.normalizeFromLog10(log10Priors, true),
-                log10pNonRefByAllele);
+                log10pRefByAllele);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
index b82ec1d29..ad6361a3f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
@@ -165,14 +165,14 @@ final class StateTracker {
         final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
         final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
 
-        final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
+        final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
         for ( int i = 0; i < subACOfMLE.length; i++ ) {
             final Allele allele = allelesUsedInGenotyping.get(i+1);
-            final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
-            log10pNonRefByAllele.put(allele, log10PNonRef);
+            final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
+            log10pRefByAllele.put(allele, log10PRef);
         }
 
-        return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
+        return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
     }
 
     // --------------------------------------------------------------------------------

From 574d5b467fccaf48faedf8e1108afc5f930750f8 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Sun, 9 Dec 2012 02:09:34 -0500
Subject: [PATCH 212/236] Bug fix for indel HMM: protect against situation
 where long reads (e.g. Sanger) in a pileup can lead to a read starting after
 the haplotype end for a given haplotype.

---
 .../afcalc/AFCalcPerformanceTest.java         |  4 +-
 .../afcalc/AFCalcResultUnitTest.java          |  6 +-
 .../genotyper/afcalc/AFCalcUnitTest.java      |  2 +-
 ...dentAllelesDiploidExactAFCalcUnitTest.java |  2 +-
 .../sting/gatk/report/GATKReport.java         |  2 +-
 .../genotyper/afcalc/AFCalcResult.java        | 38 ++++++----
 .../genotyper/afcalc/ExactCallLogger.java     | 10 +--
 .../IndependentAllelesDiploidExactAFCalc.java | 10 +--
 .../afcalc/OriginalDiploidExactAFCalc.java    |  6 +-
 .../genotyper/afcalc/StateTracker.java        |  8 +--
 .../indels/PairHMMIndelErrorModel.java        |  7 +-
 .../traversals/TraverseActiveRegionsTest.java | 69 ++++++++++++++++---
 12 files changed, 113 insertions(+), 51 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
index e9ed6b153..0a3512aa6 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcPerformanceTest.java
@@ -240,8 +240,8 @@ public class AFCalcPerformanceTest {
                 if ( a.isNonReference() ) {
                     final String warningmeMLE = call.originalCall.getAlleleCountAtMLE(a) != result.getAlleleCountAtMLE(a) ? " DANGER-MLE-DIFFERENT" : "";
                     logger.info("\t\t   MLE       " + a + ":            " + call.originalCall.getAlleleCountAtMLE(a) + " vs " + result.getAlleleCountAtMLE(a) + warningmeMLE);
-                    final String warningmePost = call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) == 0 && result.getLog10PosteriorOfAFGt0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
-                    logger.info("\t\t   Posterior " + a + ":            " + call.originalCall.getLog10PosteriorOfAFGt0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFGt0ForAllele(a) + warningmePost);
+                    final String warningmePost = call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) == 0 && result.getLog10PosteriorOfAFEq0ForAllele(a) < -10 ? " DANGER-POSTERIORS-DIFFERENT" : "";
+                    logger.info("\t\t   Posterior " + a + ":            " + call.originalCall.getLog10PosteriorOfAFEq0ForAllele(a) + " vs " + result.getLog10PosteriorOfAFEq0ForAllele(a) + warningmePost);
                 }
             }
         }
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
index 96e055e92..016926e12 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@@ -83,8 +83,8 @@ public class AFCalcResultUnitTest extends BaseTest {
         List<Object[]> tests = new ArrayList<Object[]>();
 
         final List<Double> pValues = new LinkedList<Double>();
-        for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999) )
-            for ( final double espilon : Arrays.asList(-1e-5, 0.0, 1e-5) )
+        for ( final double p : Arrays.asList(0.01, 0.1, 0.9, 0.99, 0.999, 1 - 1e-4, 1 - 1e-5, 1 - 1e-6) )
+            for ( final double espilon : Arrays.asList(-1e-7, 0.0, 1e-7) )
                 pValues.add(p + espilon);
 
         for ( final double pNonRef : pValues  ) {
@@ -106,7 +106,7 @@ public class AFCalcResultUnitTest extends BaseTest {
                 alleles,
                 MathUtils.normalizeFromLog10(new double[]{1 - pNonRef, pNonRef}, true, false),
                 log10Even,
-                Collections.singletonMap(C, Math.log10(pNonRef)));
+                Collections.singletonMap(C, Math.log10(1 - pNonRef)));
     }
 
     @Test(enabled = true, dataProvider = "TestIsPolymorphic")
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
index 2d346e548..7ee909fe0 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcUnitTest.java
@@ -681,7 +681,7 @@ public class AFCalcUnitTest extends BaseTest {
 
             // must be getCalledChrCount because we cannot ensure that the VC made has our desired ACs
             Assert.assertEquals(result.getAlleleCountAtMLE(alt), vc.getCalledChrCount(alt));
-            Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFGt0ForAllele(alt));
+            Assert.assertEquals(result.isPolymorphic(alt, -1), (boolean)expectedPoly.get(i), "isPolymorphic for allele " + alt + " " + result.getLog10PosteriorOfAFEq0ForAllele(alt));
         }
     }
 }
\ No newline at end of file
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
index 391c99990..663471106 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalcUnitTest.java
@@ -148,7 +148,7 @@ public class IndependentAllelesDiploidExactAFCalcUnitTest extends BaseTest {
         for ( int i = 0; i < log10LAlleles.size(); i++ ) {
             final double log10LAllele1 = log10LAlleles.get(i);
             final double[] L1 = MathUtils.normalizeFromLog10(new double[]{log10LAllele1, 0.0}, true);
-            final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, 0.0));
+            final AFCalcResult result1 = new AFCalcResult(new int[]{1}, 1, Arrays.asList(A, C), L1, rawPriors, Collections.singletonMap(C, -10000.0));
             originalPriors.add(result1);
             pNonRefN.add(log10pNonRef*(i+1));
         }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
index 7ae2bb453..605a6680f 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/report/GATKReport.java
@@ -343,7 +343,7 @@ public class GATKReport {
 
         GATKReportTable table = tables.firstEntry().getValue();
         if ( table.getNumColumns() != values.length )
-            throw new ReviewedStingException("The number of arguments in writeRow() " + values.length + " must match the number of columns in the table" + table.getNumColumns());
+            throw new ReviewedStingException("The number of arguments in writeRow (" + values.length + ") must match the number of columns in the table (" + table.getNumColumns() + ")" );
 
         final int rowIndex = table.getNumRows();
         for ( int i = 0; i < values.length; i++ )
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
index dbb0e8cdd..d6a5cb16d 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
@@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper.afcalc;
 import com.google.java.contract.Ensures;
 import com.google.java.contract.Requires;
 import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 
@@ -52,7 +51,7 @@ public class AFCalcResult {
     private final double[] log10PriorsOfAC;
     private final double[] log10PosteriorsOfAC;
 
-    private final Map<Allele, Double> log10pNonRefByAllele;
+    private final Map<Allele, Double> log10pRefByAllele;
 
     /**
      * The AC values for all ALT alleles at the MLE
@@ -74,16 +73,16 @@ public class AFCalcResult {
                         final List<Allele> allelesUsedInGenotyping,
                         final double[] log10LikelihoodsOfAC,
                         final double[] log10PriorsOfAC,
-                        final Map<Allele, Double> log10pNonRefByAllele) {
+                        final Map<Allele, Double> log10pRefByAllele) {
         if ( allelesUsedInGenotyping == null || allelesUsedInGenotyping.size() < 1 ) throw new IllegalArgumentException("allelesUsedInGenotyping must be non-null list of at least 1 value " + allelesUsedInGenotyping);
         if ( alleleCountsOfMLE == null ) throw new IllegalArgumentException("alleleCountsOfMLE cannot be null");
         if ( alleleCountsOfMLE.length != allelesUsedInGenotyping.size() - 1) throw new IllegalArgumentException("alleleCountsOfMLE.length " + alleleCountsOfMLE.length + " != allelesUsedInGenotyping.size() " + allelesUsedInGenotyping.size());
         if ( nEvaluations < 0 ) throw new IllegalArgumentException("nEvaluations must be >= 0 but saw " + nEvaluations);
         if ( log10LikelihoodsOfAC.length != 2 ) throw new IllegalArgumentException("log10LikelihoodsOfAC must have length equal 2");
         if ( log10PriorsOfAC.length != 2 ) throw new IllegalArgumentException("log10PriorsOfAC must have length equal 2");
-        if ( log10pNonRefByAllele == null ) throw new IllegalArgumentException("log10pNonRefByAllele cannot be null");
-        if ( log10pNonRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pNonRefByAllele has the wrong number of elements: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
-        if ( ! allelesUsedInGenotyping.containsAll(log10pNonRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pNonRefByAllele doesn't contain all of the alleles used in genotyping: log10pNonRefByAllele " + log10pNonRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
+        if ( log10pRefByAllele == null ) throw new IllegalArgumentException("log10pRefByAllele cannot be null");
+        if ( log10pRefByAllele.size() != allelesUsedInGenotyping.size() - 1 ) throw new IllegalArgumentException("log10pRefByAllele has the wrong number of elements: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
+        if ( ! allelesUsedInGenotyping.containsAll(log10pRefByAllele.keySet()) ) throw new IllegalArgumentException("log10pRefByAllele doesn't contain all of the alleles used in genotyping: log10pRefByAllele " + log10pRefByAllele + " but allelesUsedInGenotyping " + allelesUsedInGenotyping);
         if ( ! MathUtils.goodLog10ProbVector(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES, false) ) throw new IllegalArgumentException("log10LikelihoodsOfAC are bad " + Utils.join(",", log10LikelihoodsOfAC));
         if ( ! MathUtils.goodLog10ProbVector(log10PriorsOfAC, LOG_10_ARRAY_SIZES, true) ) throw new IllegalArgumentException("log10priors are bad " + Utils.join(",", log10PriorsOfAC));
 
@@ -94,7 +93,7 @@ public class AFCalcResult {
         this.log10LikelihoodsOfAC = Arrays.copyOf(log10LikelihoodsOfAC, LOG_10_ARRAY_SIZES);
         this.log10PriorsOfAC = Arrays.copyOf(log10PriorsOfAC, LOG_10_ARRAY_SIZES);
         this.log10PosteriorsOfAC = computePosteriors(log10LikelihoodsOfAC, log10PriorsOfAC);
-        this.log10pNonRefByAllele = new HashMap<Allele, Double>(log10pNonRefByAllele);
+        this.log10pRefByAllele = new HashMap<Allele, Double>(log10pRefByAllele);
     }
 
     /**
@@ -104,7 +103,7 @@ public class AFCalcResult {
      * @return
      */
     public AFCalcResult withNewPriors(final double[] log10PriorsOfAC) {
-        return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
+        return new AFCalcResult(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
     }
 
     /**
@@ -219,7 +218,7 @@ public class AFCalcResult {
     public String toString() {
         final List<String> byAllele = new LinkedList<String>();
         for ( final Allele a : getAllelesUsedInGenotyping() )
-            if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFGt0ForAllele(a)));
+            if ( a.isNonReference() ) byAllele.add(String.format("%s => MLE %d / posterior %.2f", a, getAlleleCountAtMLE(a), getLog10PosteriorOfAFEq0ForAllele(a)));
         return String.format("AFCalc%n\t\tlog10PosteriorOfAFGT0=%.2f%n\t\t%s", getLog10LikelihoodOfAFGT0(), Utils.join("\n\t\t", byAllele));
     }
 
@@ -237,7 +236,7 @@ public class AFCalcResult {
      */
     @Requires("MathUtils.goodLog10Probability(log10minPNonRef)")
     public boolean isPolymorphic(final Allele allele, final double log10minPNonRef) {
-        return getLog10PosteriorOfAFGt0ForAllele(allele) >= log10minPNonRef;
+        return getLog10PosteriorOfAFEq0ForAllele(allele) < log10minPNonRef;
     }
 
     /**
@@ -245,7 +244,7 @@ public class AFCalcResult {
      */
     public boolean isPolymorphicPhredScaledQual(final Allele allele, final double minPNonRefPhredScaledQual) {
         if ( minPNonRefPhredScaledQual < 0 ) throw new IllegalArgumentException("phredScaledQual " + minPNonRefPhredScaledQual + " < 0 ");
-        final double log10Threshold = Math.log10(QualityUtils.qualToProb(minPNonRefPhredScaledQual));
+        final double log10Threshold = minPNonRefPhredScaledQual / -10;
         return isPolymorphic(allele, log10Threshold);
     }
 
@@ -263,7 +262,16 @@ public class AFCalcResult {
     }
 
     /**
-     * Returns the log10 probability that allele is segregating
+     * Returns the log10 probability that allele is not segregating
+     *
+     * Note that this function is p not segregating so that we can store
+     * internally the log10 value of AF == 0, which grows very quickly
+     * negative and yet has sufficient resolution for high confidence tests.
+     * For example, if log10pRef == -100, not an unreasonably high number,
+     * if we tried to store log10pNonRef we'd be looking at 1 - 10^-100, which
+     * quickly underflows to 1.  So the logic here is backward from what
+     * you really want (the p of segregating) but we do that for numerical
+     * reasons
      *
      * Unlike the sites-level annotation, this calculation is specific to allele, and can be
      * used to separately determine how much evidence there is that allele is independently
@@ -272,11 +280,11 @@ public class AFCalcResult {
      * evidence for one allele but not so much for any other allele
      *
      * @param allele the allele we're interested in, must be in getAllelesUsedInGenotyping
-     * @return the log10 probability that allele is segregating at this site
+     * @return the log10 probability that allele is not segregating at this site
      */
     @Ensures("MathUtils.goodLog10Probability(result)")
-    public double getLog10PosteriorOfAFGt0ForAllele(final Allele allele) {
-        final Double log10pNonRef = log10pNonRefByAllele.get(allele);
+    public double getLog10PosteriorOfAFEq0ForAllele(final Allele allele) {
+        final Double log10pNonRef = log10pRefByAllele.get(allele);
         if ( log10pNonRef == null ) throw new IllegalArgumentException("Unknown allele " + allele);
         return log10pNonRef;
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
index f13fe4429..b138ddf70 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/ExactCallLogger.java
@@ -77,7 +77,7 @@ public class ExactCallLogger implements Cloneable {
         for ( final Allele allele : result.getAllelesUsedInGenotyping() ) {
             if ( allele.isNonReference() ) {
                 printCallElement(vc, "MLE", allele, result.getAlleleCountAtMLE(allele));
-                printCallElement(vc, "pNonRefByAllele", allele, result.getLog10PosteriorOfAFGt0ForAllele(allele));
+                printCallElement(vc, "pRefByAllele", allele, result.getLog10PosteriorOfAFEq0ForAllele(allele));
             }
         }
 
@@ -123,7 +123,7 @@ public class ExactCallLogger implements Cloneable {
             final double[] posteriors = new double[2];
             final double[] priors = MathUtils.normalizeFromLog10(new double[]{0.5, 0.5}, true);
             final List<Integer> mle = new ArrayList<Integer>();
-            final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>();
+            final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>();
             long runtimeNano = -1;
 
             GenomeLoc currentLoc = null;
@@ -148,7 +148,7 @@ public class ExactCallLogger implements Cloneable {
                         builder.chr(currentLoc.getContig()).start(currentLoc.getStart()).stop(stop);
                         builder.genotypes(genotypes);
                         final int[] mleInts = ArrayUtils.toPrimitive(mle.toArray(new Integer[]{}));
-                        final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pNonRefByAllele);
+                        final AFCalcResult result = new AFCalcResult(mleInts, 1, alleles, posteriors, priors, log10pRefByAllele);
                         calls.add(new ExactCall(builder.make(), runtimeNano, result));
                     }
                     break;
@@ -165,9 +165,9 @@ public class ExactCallLogger implements Cloneable {
                     posteriors[1] = Double.valueOf(value);
                 } else if (variable.equals("MLE")) {
                     mle.add(Integer.valueOf(value));
-                } else if (variable.equals("pNonRefByAllele")) {
+                } else if (variable.equals("pRefByAllele")) {
                     final Allele a = Allele.create(key);
-                    log10pNonRefByAllele.put(a, Double.valueOf(value));
+                    log10pRefByAllele.put(a, Double.valueOf(value));
                 } else if (variable.equals("runtime.nano")) {
                     runtimeNano = Long.valueOf(value);
                 } else {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
index d0b801a20..937ef2ffc 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/IndependentAllelesDiploidExactAFCalc.java
@@ -125,8 +125,8 @@ import java.util.*;
          */
         final List<AFCalcResult> supporting;
 
-        private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pNonRefByAllele, List<AFCalcResult> supporting) {
-            super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pNonRefByAllele);
+        private MyAFCalcResult(int[] alleleCountsOfMLE, int nEvaluations, List<Allele> allelesUsedInGenotyping, double[] log10LikelihoodsOfAC, double[] log10PriorsOfAC, Map<Allele, Double> log10pRefByAllele, List<AFCalcResult> supporting) {
+            super(alleleCountsOfMLE, nEvaluations, allelesUsedInGenotyping, log10LikelihoodsOfAC, log10PriorsOfAC, log10pRefByAllele);
             this.supporting = supporting;
         }
     }
@@ -323,7 +323,7 @@ import java.util.*;
         final int nAltAlleles = sortedResultsWithThetaNPriors.size();
         final int[] alleleCountsOfMLE = new int[nAltAlleles];
         final double[] log10PriorsOfAC = new double[2];
-        final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
+        final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
 
         // the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
         double log10PosteriorOfACEq0Sum = 0.0;
@@ -348,7 +348,7 @@ import java.util.*;
             log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
 
             // bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
-            log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
+            log10pRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0());
 
             // trivial -- update the number of evaluations
             nEvaluations += sortedResultWithThetaNPriors.nEvaluations;
@@ -384,6 +384,6 @@ import java.util.*;
                 MathUtils.normalizeFromLog10(log10LikelihoodsOfAC, true),
                 // priors incorporate multiple alt alleles, must be normalized
                 MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
-                log10pNonRefByAllele, sortedResultsWithThetaNPriors);
+                log10pRefByAllele, sortedResultsWithThetaNPriors);
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
index fc26111e0..67cc79646 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/OriginalDiploidExactAFCalc.java
@@ -30,13 +30,13 @@ class OriginalDiploidExactAFCalc extends DiploidExactAFCalc {
         final double[] log10Priors = new double[]{log10AlleleFrequencyPriors[0], MathUtils.log10sumLog10(log10AlleleFrequencyPriors, 1)};
         final double[] log10Posteriors = MathUtils.vectorSum(log10Likelihoods, log10Priors);
 
-        final double log10PNonRef = log10Posteriors[1] > log10Posteriors[0] ? 0.0 : MathUtils.LOG10_P_OF_ZERO;
-        final Map<Allele, Double> log10pNonRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PNonRef);
+        final double log10PRef = log10Posteriors[1] > log10Posteriors[0] ? MathUtils.LOG10_P_OF_ZERO : 0.0;
+        final Map<Allele, Double> log10pRefByAllele = Collections.singletonMap(vc.getAlternateAllele(0), log10PRef);
 
         return new AFCalcResult(new int[]{mleK}, 0, vc.getAlleles(),
                 MathUtils.normalizeFromLog10(log10Likelihoods, true),
                 MathUtils.normalizeFromLog10(log10Priors, true),
-                log10pNonRefByAllele);
+                log10pRefByAllele);
     }
 
     /**
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
index b82ec1d29..ad6361a3f 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/StateTracker.java
@@ -165,14 +165,14 @@ final class StateTracker {
         final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
         final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
 
-        final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
+        final Map<Allele, Double> log10pRefByAllele = new HashMap<Allele, Double>(allelesUsedInGenotyping.size());
         for ( int i = 0; i < subACOfMLE.length; i++ ) {
             final Allele allele = allelesUsedInGenotyping.get(i+1);
-            final double log10PNonRef = alleleCountsOfMAP[i] > 0 ? 0 : -10000; // TODO -- a total hack but in effect what the old behavior was
-            log10pNonRefByAllele.put(allele, log10PNonRef);
+            final double log10PRef = alleleCountsOfMAP[i] > 0 ? -10000 : 0; // TODO -- a total hack but in effect what the old behavior was
+            log10pRefByAllele.put(allele, log10PRef);
         }
 
-        return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pNonRefByAllele);
+        return new AFCalcResult(subACOfMLE, nEvaluations, allelesUsedInGenotyping, log10Likelihoods, log10Priors, log10pRefByAllele);
     }
 
     // --------------------------------------------------------------------------------
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
index 7b797432d..848aaf8a3 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/indels/PairHMMIndelErrorModel.java
@@ -287,6 +287,9 @@ public class PairHMMIndelErrorModel {
                 if (startLocationInRefForHaplotypes < ref.getWindow().getStart()) {
                     startLocationInRefForHaplotypes = ref.getWindow().getStart();                                       // read starts before haplotype: read will have to be cut numStartSoftClippedBases += ref.getWindow().getStart() - startLocationInRefForHaplotypes;
                 }
+                else if (startLocationInRefForHaplotypes > ref.getWindow().getStop()) {
+                    startLocationInRefForHaplotypes = ref.getWindow().getStop();                                        // read starts after haplotype: read will have to be clipped completely;
+                }
 
                 if (stopLocationInRefForHaplotypes > ref.getWindow().getStop()) {
                     stopLocationInRefForHaplotypes = ref.getWindow().getStop();                                         // check also if end of read will go beyond reference context
@@ -338,6 +341,8 @@ public class PairHMMIndelErrorModel {
 
                         if (startLocationInRefForHaplotypes < haplotype.getStartPosition())
                             startLocationInRefForHaplotypes = haplotype.getStartPosition();
+                        else if (startLocationInRefForHaplotypes > haplotype.getStopPosition())
+                            startLocationInRefForHaplotypes = haplotype.getStopPosition();
 
                         final long indStart = startLocationInRefForHaplotypes - haplotype.getStartPosition();
                         final long indStop =  stopLocationInRefForHaplotypes - haplotype.getStartPosition();
@@ -347,8 +352,6 @@ public class PairHMMIndelErrorModel {
                             System.out.format("indStart: %d indStop: %d WinStart:%d WinStop:%d start: %d stop: %d readLength: %d C:%s\n",
                                     indStart, indStop, ref.getWindow().getStart(), ref.getWindow().getStop(), startLocationInRefForHaplotypes, stopLocationInRefForHaplotypes, read.getReadLength(), read.getCigar().toString());
 
-
-
                         final byte[] haplotypeBases = Arrays.copyOfRange(haplotype.getBases(),
                                 (int)indStart, (int)indStop);
 
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
index a65b0cb45..69907d485 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseActiveRegionsTest.java
@@ -109,12 +109,16 @@ public class TraverseActiveRegionsTest extends BaseTest {
         dictionary = reference.getSequenceDictionary();
         genomeLocParser = new GenomeLocParser(dictionary);
 
+        // TODO: test shard boundaries
+
         intervals = new ArrayList<GenomeLoc>();
         intervals.add(genomeLocParser.createGenomeLoc("1", 10, 20));
         intervals.add(genomeLocParser.createGenomeLoc("1", 1, 999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 1000, 1999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 2000, 2999));
         intervals.add(genomeLocParser.createGenomeLoc("1", 10000, 20000));
+        intervals.add(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+        intervals.add(genomeLocParser.createGenomeLoc("2", 1, 100));
         intervals.add(genomeLocParser.createGenomeLoc("20", 10000, 10100));
         intervals = IntervalUtils.sortAndMergeIntervals(genomeLocParser, intervals, IntervalMergingRule.OVERLAPPING_ONLY).toList();
 
@@ -126,6 +130,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         reads.add(buildSAMRecord("boundary_unequal", "1", 1990, 2008));
         reads.add(buildSAMRecord("extended_and_np", "1", 990, 1990));
         reads.add(buildSAMRecord("outside_intervals", "1", 5000, 6000));
+        reads.add(buildSAMRecord("end_of_chr1", "1", 249250600, 249250700));
         reads.add(buildSAMRecord("simple20", "20", 10025, 10075));
 
         // required by LocusIteratorByState, and I prefer to list them in test case order above
@@ -139,8 +144,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
         List<GenomeLoc> activeIntervals = getIsActiveIntervals(walker, intervals);
         // Contract: Every genome position in the analysis interval(s) is processed by the walker's isActive() call
         verifyEqualIntervals(intervals, activeIntervals);
-
-        // TODO: more tests and edge cases
     }
 
     private List<GenomeLoc> getIsActiveIntervals(DummyActiveRegionWalker walker, List<GenomeLoc> intervals) {
@@ -171,8 +174,6 @@ public class TraverseActiveRegionsTest extends BaseTest {
 
         Collection<ActiveRegion> activeRegions = getActiveRegions(walker, intervals).values();
         verifyActiveRegionCoverage(intervals, activeRegions);
-
-        // TODO: more tests and edge cases
     }
 
     private void verifyActiveRegionCoverage(List<GenomeLoc> intervals, Collection<ActiveRegion> activeRegions) {
@@ -231,6 +232,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -245,6 +247,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -256,6 +259,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -267,6 +271,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -278,9 +295,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
-
-        // TODO: more tests and edge cases
     }
 
     @Test
@@ -300,6 +316,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -314,6 +331,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -325,6 +343,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -336,6 +355,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -347,9 +379,8 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
-
-        // TODO: more tests and edge cases
     }
 
     @Test
@@ -370,6 +401,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         // boundary_unequal: Primary in 1:1000-1999, Non-Primary in 1:2000-2999
         // extended_and_np: Non-Primary in 1:1-999, Primary in 1:1000-1999, Extended in 1:2000-2999
         // outside_intervals: none
+        // end_of_chr1: Primary in 1:249250600-249250621
         // simple20: Primary in 20:10000-10100
 
         Map<GenomeLoc, ActiveRegion> activeRegions = getActiveRegions(walker, intervals);
@@ -384,6 +416,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 1000, 1999));
@@ -395,6 +428,7 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 2000, 2999));
@@ -406,6 +440,19 @@ public class TraverseActiveRegionsTest extends BaseTest {
         getRead(region, "boundary_unequal");
         getRead(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
+        verifyReadNotPlaced(region, "simple20");
+
+        region = activeRegions.get(genomeLocParser.createGenomeLoc("1", 249250600, 249250621));
+
+        verifyReadNotPlaced(region, "simple");
+        verifyReadNotPlaced(region, "overlap_equal");
+        verifyReadNotPlaced(region, "overlap_unequal");
+        verifyReadNotPlaced(region, "boundary_equal");
+        verifyReadNotPlaced(region, "boundary_unequal");
+        verifyReadNotPlaced(region, "extended_and_np");
+        verifyReadNotPlaced(region, "outside_intervals");
+        getRead(region, "end_of_chr1");
         verifyReadNotPlaced(region, "simple20");
 
         region = activeRegions.get(genomeLocParser.createGenomeLoc("20", 10000, 10100));
@@ -417,9 +464,13 @@ public class TraverseActiveRegionsTest extends BaseTest {
         verifyReadNotPlaced(region, "boundary_unequal");
         verifyReadNotPlaced(region, "extended_and_np");
         verifyReadNotPlaced(region, "outside_intervals");
+        verifyReadNotPlaced(region, "end_of_chr1");
         getRead(region, "simple20");
+    }
 
-        // TODO: more tests and edge cases
+    @Test
+    public void testUnmappedReads() {
+        // TODO
     }
 
     private void verifyReadNotPlaced(ActiveRegion region, String readName) {

From 3a420d163e0e07b23ca29acedd83c572534877de Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Sun, 9 Dec 2012 23:40:03 -0500
Subject: [PATCH 213/236] (1) changes in catVariants (work still under
 development) (2) changes to CV to throw an error when GenotypeMergeType is
 PRIORITIZE but no priority (rod_priority_list) is not given. Reported by
 TechnicalVault on the forum on Nov 14 2012

---
 .../walkers/variantutils/CombineVariants.java |  2 +-
 .../variantcontext/VariantContextUtils.java   | 45 ++-----------------
 2 files changed, 5 insertions(+), 42 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index e710befc9..1d4913769 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -228,7 +228,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
         if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
             throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
 
-        if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ){
+        if ( PRIORITY_STRING != null || genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ){
             priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
             if ( rodNames.size() != priority.size() )
                 throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 14fe0a184..81959c998 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -451,7 +451,7 @@ public class VariantContextUtils {
         if ( unsortedVCs == null || unsortedVCs.size() == 0 )
             return null;
 
-        if ( annotateOrigin && priorityListOfVCs == null && genotypeMergeOptions == GenotypeMergeType.PRIORITIZE)
+        if ( annotateOrigin && priorityListOfVCs == null )
             throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");
 
         if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
@@ -597,7 +597,7 @@ public class VariantContextUtils {
 
         if ( annotateOrigin ) { // we care about where the call came from
             String setValue;
-            if ( nFiltered == 0 && variantSources.size() == preFilteredVCs.size() ) // nothing was unfiltered
+            if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
                 setValue = MERGE_INTERSECTION;
             else if ( nFiltered == VCs.size() )     // everything was filtered out
                 setValue = MERGE_FILTER_IN_ALL;
@@ -840,11 +840,8 @@ public class VariantContextUtils {
         if ( mergeOption == GenotypeMergeType.PRIORITIZE && priorityListOfVCs == null )
             throw new IllegalArgumentException("Cannot merge calls by priority with a null priority list");
 
-        if ( mergeOption != GenotypeMergeType.PRIORITIZE ){
-            if (priorityListOfVCs != null )
-                logger.info("Priority string was provided but is not used since GenotypeMergeType is not PRIORITIZE");
+        if ( priorityListOfVCs == null || mergeOption == GenotypeMergeType.UNSORTED )
             return new ArrayList<VariantContext>(unsortedVCs);
-        }
         else {
             ArrayList<VariantContext> sorted = new ArrayList<VariantContext>(unsortedVCs);
             Collections.sort(sorted, new CompareByPriority(priorityListOfVCs));
@@ -982,40 +979,6 @@ public class VariantContextUtils {
     private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
     public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
 
-    /**
-     * Split variant context into its biallelic components if there are more than 2 alleles
-     *
-     * For VC has A/B/C alleles, returns A/B and A/C contexts.
-     * Genotypes are all no-calls now (it's not possible to fix them easily)
-     * Alleles are right trimmed to satisfy VCF conventions
-     *
-     * If vc is biallelic or non-variant it is just returned
-     *
-     * Chromosome counts are updated (but they are by definition 0)
-     *
-     * @param vc a potentially multi-allelic variant context
-     * @return a list of bi-allelic (or monomorphic) variant context
-     */
-    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
-        if ( ! vc.isVariant() || vc.isBiallelic() )
-            // non variant or biallelics already satisfy the contract
-            return Collections.singletonList(vc);
-        else {
-            final List<VariantContext> biallelics = new LinkedList<VariantContext>();
-
-            for ( final Allele alt : vc.getAlternateAlleles() ) {
-                VariantContextBuilder builder = new VariantContextBuilder(vc);
-                final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
-                builder.alleles(alleles);
-                builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
-                calculateChromosomeCounts(builder, true);
-                biallelics.add(reverseTrimAlleles(builder.make()));
-            }
-
-            return biallelics;
-        }
-    }
-
     /**
      * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
      *
@@ -1270,7 +1233,7 @@ public class VariantContextUtils {
      * @param testString             String to test
      * @return                       Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
      */
-    public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+    protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
         int numRepeats = 0;
         for (int start = 0; start < testString.length; start += repeatUnit.length) {
             int end = start + repeatUnit.length;

From 46edab6d6adc0e787a9f939a9f3b5e1b9e1bc212 Mon Sep 17 00:00:00 2001
From: David Roazen <droazen@broadinstitute.org>
Date: Mon, 26 Nov 2012 12:44:48 -0500
Subject: [PATCH 214/236] Use the new downsampling implementation by default

-Switch back to the old implementation, if needed, with --use_legacy_downsampler

-LocusIteratorByStateExperimental becomes the new LocusIteratorByState, and
the original LocusIteratorByState becomes LegacyLocusIteratorByState

-Similarly, the ExperimentalReadShardBalancer becomes the new ReadShardBalancer,
with the old one renamed to LegacyReadShardBalancer

-Performance improvements: locus traversals used to be 20% slower in the new
downsampling implementation, now they are roughly the same speed.

-Tests show a very high level of concordance with UG calls from the previous
implementation, with some new calls and edge cases that still require more examination.

-With the new implementation, can now use -dcov with ReadWalkers to set a limit
on the max # of reads per alignment start position per sample. Appropriate value
for ReadWalker dcov may be in the single digits for some tools, but this too
requires more investigation.
---
 ...GenotyperGeneralPloidyIntegrationTest.java |   4 +-
 .../UnifiedGenotyperIntegrationTest.java      |  10 +-
 .../sting/gatk/GenomeAnalysisEngine.java      |  18 +-
 .../sting/gatk/WalkerManager.java             |  32 +-
 .../arguments/GATKArgumentCollection.java     |  11 +-
 .../gatk/datasources/providers/LocusView.java |   9 +-
 .../gatk/datasources/reads/BAMScheduler.java  |   7 +-
 .../reads/ExperimentalReadShardBalancer.java  | 228 --------
 .../reads/LegacyReadShardBalancer.java        | 129 +++++
 .../datasources/reads/ReadShardBalancer.java  | 187 ++++--
 .../gatk/datasources/reads/SAMDataSource.java |  71 ++-
 .../gatk/downsampling/DownsamplingMethod.java |  48 +-
 .../downsampling/PassThroughDownsampler.java  | 106 ++++
 .../sting/gatk/executive/WindowMaker.java     |  13 +-
 ...l.java => LegacyLocusIteratorByState.java} | 531 +++++++++++++----
 .../gatk/iterators/LocusIteratorByState.java  | 533 ++++--------------
 ...r.java => LegacyReservoirDownsampler.java} |   6 +-
 .../reads/DownsamplerBenchmark.java           |   5 +-
 ...st.java => ReadShardBalancerUnitTest.java} |   8 +-
 ...> LegacyLocusIteratorByStateUnitTest.java} | 438 +++++---------
 .../LocusIteratorByStateUnitTest.java         | 404 +++++++++----
 .../traversals/TraverseReadsUnitTest.java     |   4 +-
 .../LegacyReservoirDownsamplerUnitTest.java   |  16 +-
 23 files changed, 1490 insertions(+), 1328 deletions(-)
 delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancer.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java
 create mode 100644 public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java
 rename public/java/src/org/broadinstitute/sting/gatk/iterators/{LocusIteratorByStateExperimental.java => LegacyLocusIteratorByState.java} (61%)
 rename public/java/src/org/broadinstitute/sting/utils/{ReservoirDownsampler.java => LegacyReservoirDownsampler.java} (94%)
 rename public/java/test/org/broadinstitute/sting/gatk/datasources/reads/{ExperimentalReadShardBalancerUnitTest.java => ReadShardBalancerUnitTest.java} (97%)
 rename public/java/test/org/broadinstitute/sting/gatk/iterators/{LocusIteratorByStateExperimentalUnitTest.java => LegacyLocusIteratorByStateUnitTest.java} (50%)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
index 73bc8fba6..f26194e00 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperGeneralPloidyIntegrationTest.java
@@ -80,11 +80,11 @@ public class UnifiedGenotyperGeneralPloidyIntegrationTest extends WalkerTest {
 
     @Test(enabled = true)
     public void testMT_SNP_DISCOVERY_sp4() {
-         PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","dd568dc30be90135a3a8957a45a7321c");
+         PC_MT_Test(CEUTRIO_BAM, " -maxAltAlleles 1 -ploidy 8", "MT_SNP_DISCOVERY_sp4","3fc6f4d458313616727c60e49c0e852b");
     }
 
     @Test(enabled = true)
     public void testMT_SNP_GGA_sp10() {
-        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "bf793c43b635a931207170be8035b288");
+        PC_MT_Test(CEUTRIO_BAM, String.format(" -maxAltAlleles 1 -ploidy 20 -gt_mode GENOTYPE_GIVEN_ALLELES  -out_mode EMIT_ALL_SITES -alleles %s",NA12891_CALLS), "MT_SNP_GGA_sp10", "1bebbc0f28bff6fd64736ccca8839df8");
     }
 }
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index 7459d131b..f2b2dfb7d 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -62,7 +62,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMultipleSNPAlleles() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + privateTestDir + "multiallelic.snps.bam -o %s -L " + privateTestDir + "multiallelic.snps.intervals", 1,
-                Arrays.asList("d20c7a143b899f0239bf64b652ad3edb"));
+                Arrays.asList("97df6c2a8d390d43b9bdf56c979d9b09"));
         executeTest("test Multiple SNP alleles", spec);
     }
 
@@ -86,7 +86,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testMismatchedPLs() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -glm INDEL -I " + privateTestDir + "mismatchedPLs.bam -o %s -L 1:24020341", 1,
-                Arrays.asList("fb204e821a24d03bd3a671b6e01c449a"));
+                Arrays.asList("935ee705ffe8cc6bf1d9efcceea271c8"));
         executeTest("test mismatched PLs", spec);
     }
 
@@ -437,7 +437,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testNsInCigar() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "testWithNs.bam -o %s -L 8:141813600-141813700 -out_mode EMIT_ALL_SITES", 1,
-                Arrays.asList("32f18ba50406cd8c8069ba07f2f89558"));
+                Arrays.asList("4d36969d4f8f1094f1fb6e7e085c19f6"));
         executeTest("test calling on reads with Ns in CIGAR", spec);
     }
 
@@ -451,13 +451,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     public void testReducedBam() {
         WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
                 "-T UnifiedGenotyper -R " + b37KGReference + " --no_cmdline_in_header -I " + privateTestDir + "bamExample.ReducedRead.ADAnnotation.bam -o %s -L 1:67,225,396-67,288,518", 1,
-                Arrays.asList("c1077662411164182c5f75478344f83d"));
+                Arrays.asList("092e42a712afb660ec79ff11c55933e2"));
         executeTest("test calling on a ReducedRead BAM", spec);
     }
 
     @Test
     public void testReducedBamSNPs() {
-        testReducedCalling("SNP", "dee6590e3b7079890bc3a9cb372c297e");
+        testReducedCalling("SNP", "c0de74ab8f4f14eb3a2c5d55c200ac5f");
     }
 
     @Test
diff --git a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
index b7000e0ee..1187039bb 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@@ -445,13 +445,17 @@ public class GenomeAnalysisEngine {
 
     protected DownsamplingMethod getDownsamplingMethod() {
         GATKArgumentCollection argCollection = this.getArguments();
-        boolean useExperimentalDownsampling = argCollection.enableExperimentalDownsampling;
+
+        // Legacy downsampler can only be selected via the command line, not via walker annotations
+        boolean useLegacyDownsampler = argCollection.useLegacyDownsampler;
 
         DownsamplingMethod commandLineMethod = argCollection.getDownsamplingMethod();
-        DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useExperimentalDownsampling);
-        DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useExperimentalDownsampling);
+        DownsamplingMethod walkerMethod = WalkerManager.getDownsamplingMethod(walker, useLegacyDownsampler);
+        DownsamplingMethod defaultMethod = DownsamplingMethod.getDefaultDownsamplingMethod(walker, useLegacyDownsampler);
 
-        return commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
+        DownsamplingMethod method = commandLineMethod != null ? commandLineMethod : (walkerMethod != null ? walkerMethod : defaultMethod);
+        method.checkCompatibilityWithWalker(walker);
+        return method;
     }
 
     protected void setDownsamplingMethod(DownsamplingMethod method) {
@@ -580,9 +584,9 @@ public class GenomeAnalysisEngine {
                         throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
                 }
 
-                // Use the experimental ReadShardBalancer if experimental downsampling is enabled
-                ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useExperimentalDownsampling ?
-                                                  new ExperimentalReadShardBalancer() :
+                // Use the legacy ReadShardBalancer if legacy downsampling is enabled
+                ShardBalancer readShardBalancer = downsamplingMethod != null && downsamplingMethod.useLegacyDownsampler ?
+                                                  new LegacyReadShardBalancer() :
                                                   new ReadShardBalancer();
 
                 if(intervals == null)
diff --git a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
index 28b5f918d..1a9162e51 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/WalkerManager.java
@@ -305,11 +305,23 @@ public class WalkerManager extends PluginManager<Walker> {
      * Gets the type of downsampling method requested by the walker.  If an alternative
      * downsampling method is specified on the command-line, the command-line version will
      * be used instead.
-     * @param walkerClass The class of the walker to interrogate.
-     * @param useExperimentalDownsampling If true, use the experimental downsampling implementation
+     * @param walker The walker to interrogate.
+     * @param useLegacyDownsampler If true, use the legacy downsampling implementation
      * @return The downsampling method, as specified by the walker.  Null if none exists.
      */
-    public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useExperimentalDownsampling) {
+    public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useLegacyDownsampler) {
+        return getDownsamplingMethod(walker.getClass(), useLegacyDownsampler);
+    }
+
+    /**
+     * Gets the type of downsampling method requested by the walker.  If an alternative
+     * downsampling method is specified on the command-line, the command-line version will
+     * be used instead.
+     * @param walkerClass The class of the walker to interrogate.
+     * @param useLegacyDownsampler If true, use the legacy downsampling implementation
+     * @return The downsampling method, as specified by the walker.  Null if none exists.
+     */
+    public static DownsamplingMethod getDownsamplingMethod(Class<? extends Walker> walkerClass, boolean useLegacyDownsampler) {
         DownsamplingMethod downsamplingMethod = null;
 
         if( walkerClass.isAnnotationPresent(Downsample.class) ) {
@@ -317,7 +329,7 @@ public class WalkerManager extends PluginManager<Walker> {
             DownsampleType type = downsampleParameters.by();
             Integer toCoverage = downsampleParameters.toCoverage() >= 0 ? downsampleParameters.toCoverage() : null;
             Double toFraction = downsampleParameters.toFraction() >= 0.0d ? downsampleParameters.toFraction() : null;
-            downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useExperimentalDownsampling);
+            downsamplingMethod = new DownsamplingMethod(type,toCoverage,toFraction,useLegacyDownsampler);
         }
 
         return downsamplingMethod;
@@ -331,18 +343,6 @@ public class WalkerManager extends PluginManager<Walker> {
         return walker.getClass().getAnnotation(BAQMode.class).ApplicationTime();
     }    
 
-    /**
-     * Gets the type of downsampling method requested by the walker.  If an alternative
-     * downsampling method is specified on the command-line, the command-line version will
-     * be used instead.
-     * @param walker The walker to interrogate.
-     * @param useExperimentalDownsampling If true, use the experimental downsampling implementation
-     * @return The downsampling method, as specified by the walker.  Null if none exists.
-     */
-    public static DownsamplingMethod getDownsamplingMethod(Walker walker, boolean useExperimentalDownsampling) {
-        return getDownsamplingMethod(walker.getClass(), useExperimentalDownsampling);
-    }
-
     /**
      * Create a name for this type of walker.
      *
diff --git a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
index d0f3e91e0..d9c7c9008 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@@ -162,12 +162,11 @@ public class GATKArgumentCollection {
     @Argument(fullName = "downsample_to_fraction", shortName = "dfrac", doc = "Fraction [0.0-1.0] of reads to downsample to", required = false)
     public Double downsampleFraction = null;
 
-    @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus", required = false)
+    @Argument(fullName = "downsample_to_coverage", shortName = "dcov", doc = "Coverage [integer] to downsample to at any given locus; note that downsampled reads are randomly selected from all possible reads at a locus. For non-locus-based traversals (eg., ReadWalkers), this sets the maximum number of reads at each alignment start position.", required = false)
     public Integer downsampleCoverage = null;
 
-    @Argument(fullName = "enable_experimental_downsampling", shortName = "enable_experimental_downsampling", doc = "Enable experimental engine-level downsampling", required = false)
-    @Hidden
-    public boolean enableExperimentalDownsampling = false;
+    @Argument(fullName = "use_legacy_downsampler", shortName = "use_legacy_downsampler", doc = "Use the legacy downsampling implementation instead of the newer, less-tested implementation", required = false)
+    public boolean useLegacyDownsampler = false;
 
     /**
      * Gets the downsampling method explicitly specified by the user.  If the user didn't specify
@@ -178,7 +177,7 @@ public class GATKArgumentCollection {
         if ( downsamplingType == null && downsampleFraction == null && downsampleCoverage == null )
             return null;
 
-        return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, enableExperimentalDownsampling);
+        return new DownsamplingMethod(downsamplingType, downsampleCoverage, downsampleFraction, useLegacyDownsampler);
     }
 
     /**
@@ -192,7 +191,7 @@ public class GATKArgumentCollection {
         downsamplingType = method.type;
         downsampleCoverage = method.toCoverage;
         downsampleFraction = method.toFraction;
-        enableExperimentalDownsampling = method.useExperimentalDownsampling;
+        useLegacyDownsampler = method.useLegacyDownsampler;
     }
 
     // --------------------------------------------------------------------------------------------------------------
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
index cd3403f2f..c12bce208 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/providers/LocusView.java
@@ -136,11 +136,12 @@ public abstract class LocusView extends LocusIterator implements View {
         // Cache the current and apply filtering.
         AlignmentContext current = nextLocus;
 
-        // The old ALL_READS downsampling implementation -- only use if we're not using the new experimental downsampling:
-        if( ! sourceInfo.getDownsamplingMethod().useExperimentalDownsampling &&
-            sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS && sourceInfo.getDownsamplingMethod().toCoverage != null ) {
+        // The old ALL_READS downsampling implementation -- use only if legacy downsampling was requested:
+        if ( sourceInfo.getDownsamplingMethod().useLegacyDownsampler &&
+             sourceInfo.getDownsamplingMethod().type == DownsampleType.ALL_READS &&
+             sourceInfo.getDownsamplingMethod().toCoverage != null ) {
 
-            current.downsampleToCoverage( sourceInfo.getDownsamplingMethod().toCoverage );
+            current.downsampleToCoverage(sourceInfo.getDownsamplingMethod().toCoverage);
         }
 
         // Indicate that the next operation will need to advance.
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
index 8ee7e0439..cb33c5ab8 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@@ -134,12 +134,11 @@ public class BAMScheduler implements Iterator<FilePointer> {
 
         // Only use the deprecated SAMDataSource.getCurrentPosition() if we're not using experimental downsampling
         // TODO: clean this up once the experimental downsampling engine fork collapses
-        if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useExperimentalDownsampling ) {
-            currentPosition = dataSource.getInitialReaderPositions();
+        if ( dataSource.getReadsInfo().getDownsamplingMethod() != null && dataSource.getReadsInfo().getDownsamplingMethod().useLegacyDownsampler ) {
+            currentPosition = dataSource.getCurrentPosition();
         }
         else {
-            currentPosition = dataSource.getCurrentPosition();
-
+            currentPosition = dataSource.getInitialReaderPositions();
         }
 
         for(SAMReaderID reader: dataSource.getReaderIDs())
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancer.java
deleted file mode 100644
index 0440c7eae..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancer.java
+++ /dev/null
@@ -1,228 +0,0 @@
-/*
- * Copyright (c) 2012, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.SAMRecord;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-
-import java.util.*;
-
-/**
- * Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
- *
- * When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
- * together into one monolithic FilePointer, create one persistent set of read iterators over
- * that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
- * fill read shards with reads.
- *
- * This strategy has several important advantages:
- *
- * 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
- *    contig will have regions that overlap with other FilePointers on the same contig, due
- *    to the limited granularity of BAM index data. By creating only one FilePointer per contig,
- *    we avoid having to track how much of each file region we've visited (as we did in the
- *    former implementation), we avoid expensive non-sequential access patterns in the files,
- *    and we avoid having to repeatedly re-create our iterator chain for every small region
- *    of interest.
- *
- * 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
- *    persistent set of read iterators (which include the downsampling iterator(s)) per contig,
- *    the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
- *    loses crucial state information while downsampling within a contig.
- *
- * TODO: There is also at least one important disadvantage:
- *
- * 1. We load more BAM index data into memory at once, and this work is done upfront before processing
- *    the next contig, creating a delay before traversal of each contig. This delay may be
- *    compensated for by the gains listed in #1 above, and we may be no worse off overall in
- *    terms of total runtime, but we need to verify this empirically.
- *
- * @author David Roazen
- */
-public class ExperimentalReadShardBalancer extends ShardBalancer {
-
-    private static Logger logger = Logger.getLogger(ExperimentalReadShardBalancer.class);
-
-    /**
-     * Convert iterators of file pointers into balanced iterators of shards.
-     * @return An iterator over balanced shards.
-     */
-    public Iterator<Shard> iterator() {
-        return new Iterator<Shard>() {
-            /**
-             * The cached shard to be returned next.  Prefetched in the peekable iterator style.
-             */
-            private Shard nextShard = null;
-
-            /**
-             * The file pointer currently being processed.
-             */
-            private FilePointer currentContigFilePointer = null;
-
-            /**
-             * Iterator over the reads from the current contig's file pointer. The same iterator will be
-             * used to fill all shards associated with a given file pointer
-             */
-            private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
-
-            /**
-             * How many FilePointers have we pulled from the filePointers iterator?
-             */
-            private int totalFilePointersConsumed = 0;
-
-            /**
-             * Have we encountered a monolithic FilePointer?
-             */
-            private boolean encounteredMonolithicFilePointer = false;
-
-
-            {
-                createNextContigFilePointer();
-                advance();
-            }
-
-            public boolean hasNext() {
-                return nextShard != null;
-            }
-
-            public Shard next() {
-                if ( ! hasNext() )
-                    throw new NoSuchElementException("No next read shard available");
-                Shard currentShard = nextShard;
-                advance();
-                return currentShard;
-            }
-
-            private void advance() {
-                nextShard = null;
-
-                // May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
-                while ( nextShard == null && currentContigFilePointer != null ) {
-
-                    // If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
-                    if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
-
-                        // Close the old, exhausted chain of iterators to release resources
-                        currentContigReadsIterator.close();
-
-                        // Advance to the FilePointer for the next contig
-                        createNextContigFilePointer();
-
-                        // We'll need to create a fresh iterator for this file pointer when we create the first
-                        // shard for it below.
-                        currentContigReadsIterator = null;
-                    }
-
-                    // At this point our currentContigReadsIterator may be null or non-null depending on whether or not
-                    // this is our first shard for this file pointer.
-                    if ( currentContigFilePointer != null ) {
-                        Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
-
-                        // Create a new reads iterator only when we've just advanced to the file pointer for the next
-                        // contig. It's essential that the iterators persist across all shards that share the same contig
-                        // to allow the downsampling to work properly.
-                        if ( currentContigReadsIterator == null ) {
-                            currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
-                        }
-
-                        if ( currentContigReadsIterator.hasNext() ) {
-                            shard.fill(currentContigReadsIterator);
-                            nextShard = shard;
-                        }
-                    }
-                }
-            }
-
-            /**
-             * Aggregate all FilePointers for the next contig together into one monolithic FilePointer
-             * to avoid boundary issues with visiting the same file regions more than once (since more
-             * granular FilePointers will have regions that overlap with other nearby FilePointers due
-             * to the nature of BAM indices).
-             *
-             * By creating one persistent set of iterators per contig we also avoid boundary artifacts
-             * in the engine-level downsampling.
-             *
-             * TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
-             * TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
-             * TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
-             * TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
-             * TODO: locus traversals).
-             */
-            private void createNextContigFilePointer() {
-                currentContigFilePointer = null;
-                List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
-
-                logger.info("Loading BAM index data for next contig");
-
-                while ( filePointers.hasNext() ) {
-
-                    // Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
-                    // it is the ONLY FilePointer we ever encounter
-                    if ( encounteredMonolithicFilePointer ) {
-                        throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
-                    }
-                    if ( filePointers.peek().isMonolithic() ) {
-                        if ( totalFilePointersConsumed > 0 ) {
-                            throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
-                        }
-                        encounteredMonolithicFilePointer = true;
-                        logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
-                    }
-
-                    // If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
-                    // same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
-                    if ( nextContigFilePointers.isEmpty() ||
-                             (! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
-                             nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
-                                 (nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
-
-                        nextContigFilePointers.add(filePointers.next());
-                        totalFilePointersConsumed++;
-                    }
-                    else {
-                        break; // next FilePointer is on a different contig or has different mapped/unmapped status,
-                               // save it for next time
-                    }
-                }
-
-                if ( ! nextContigFilePointers.isEmpty() ) {
-                    currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
-                }
-
-                if ( currentContigFilePointer != null ) {
-                    logger.info("Done loading BAM index data for next contig");
-                    logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
-                }
-            }
-
-            public void remove() {
-                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
-            }
-        };
-    }
-
-}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java
new file mode 100644
index 000000000..f5b4fba8e
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LegacyReadShardBalancer.java
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.SAMFileSpan;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Divide up large file pointers containing reads into more manageable subcomponents.
+ *
+ * TODO: delete this class once the experimental downsampling engine fork collapses
+ */
+public class LegacyReadShardBalancer extends ShardBalancer {
+    /**
+     * Convert iterators of file pointers into balanced iterators of shards.
+     * @return An iterator over balanced shards.
+     */
+    public Iterator<Shard> iterator() {
+        return new Iterator<Shard>() {
+            /**
+             * The cached shard to be returned next.  Prefetched in the peekable iterator style.
+             */
+            private Shard nextShard = null;
+
+            /**
+             * The file pointer currently being processed.
+             */
+            private FilePointer currentFilePointer;
+
+            /**
+             * Ending position of the last shard in the file.
+             */
+            private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
+
+            {
+                if(filePointers.hasNext())
+                    currentFilePointer = filePointers.next();
+                advance();
+            }
+
+            public boolean hasNext() {
+                return nextShard != null;
+            }
+
+            public Shard next() {
+                if(!hasNext())
+                    throw new NoSuchElementException("No next read shard available");
+                Shard currentShard = nextShard;
+                advance();
+                return currentShard;
+            }
+
+            public void remove() {
+                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
+            }
+
+            private void advance() {
+                Map<SAMReaderID,SAMFileSpan> shardPosition;
+                nextShard = null;
+
+                Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
+                while(selectedReaders.size() == 0 && currentFilePointer != null) {
+                    shardPosition = currentFilePointer.fileSpans;
+
+                    for(SAMReaderID id: shardPosition.keySet()) {
+                        SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
+                        selectedReaders.put(id,fileSpan);
+                    }
+
+                    if(!isEmpty(selectedReaders)) {
+                        Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
+                        readsDataSource.fillShard(shard);
+
+                        if(!shard.isBufferEmpty()) {
+                            nextShard = shard;
+                            break;
+                        }
+                    }
+
+                    selectedReaders.clear();
+                    currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
+                }
+
+                position = readsDataSource.getCurrentPosition();
+            }
+
+            /**
+             * Detects whether the list of file spans contain any read data.
+             * @param selectedSpans Mapping of readers to file spans.
+             * @return True if file spans are completely empty; false otherwise.
+             */
+            private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
+                for(SAMFileSpan fileSpan: selectedSpans.values()) {
+                    if(!fileSpan.isEmpty())
+                        return false;
+                }
+                return true;
+            }
+        };
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java
index 18fafb95d..8cee535b0 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, The Broad Institute
+ * Copyright (c) 2012, The Broad Institute
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -24,20 +24,49 @@
 
 package org.broadinstitute.sting.gatk.datasources.reads;
 
-import net.sf.samtools.GATKBAMFileSpan;
-import net.sf.samtools.SAMFileSpan;
+import net.sf.picard.util.PeekableIterator;
+import net.sf.samtools.SAMRecord;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.Map;
-import java.util.NoSuchElementException;
+import java.util.*;
 
 /**
- * Divide up large file pointers containing reads into more manageable subcomponents.
+ * Convert from an unbalanced iterator over FilePointers to a balanced iterator over Shards.
  *
- * TODO: delete this class once the experimental downsampling engine fork collapses
+ * When processing FilePointers, our strategy is to aggregate all FilePointers for each contig
+ * together into one monolithic FilePointer, create one persistent set of read iterators over
+ * that monolithic FilePointer, and repeatedly use that persistent set of read iterators to
+ * fill read shards with reads.
+ *
+ * This strategy has several important advantages:
+ *
+ * 1. We avoid issues with file span overlap. FilePointers that are more granular than a whole
+ *    contig will have regions that overlap with other FilePointers on the same contig, due
+ *    to the limited granularity of BAM index data. By creating only one FilePointer per contig,
+ *    we avoid having to track how much of each file region we've visited (as we did in the
+ *    former implementation), we avoid expensive non-sequential access patterns in the files,
+ *    and we avoid having to repeatedly re-create our iterator chain for every small region
+ *    of interest.
+ *
+ * 2. We avoid boundary issues with the engine-level downsampling. Since we create a single
+ *    persistent set of read iterators (which include the downsampling iterator(s)) per contig,
+ *    the downsampling process is never interrupted by FilePointer or Shard boundaries, and never
+ *    loses crucial state information while downsampling within a contig.
+ *
+ * TODO: There is also at least one important disadvantage:
+ *
+ * 1. We load more BAM index data into memory at once, and this work is done upfront before processing
+ *    the next contig, creating a delay before traversal of each contig. This delay may be
+ *    compensated for by the gains listed in #1 above, and we may be no worse off overall in
+ *    terms of total runtime, but we need to verify this empirically.
+ *
+ * @author David Roazen
  */
 public class ReadShardBalancer extends ShardBalancer {
+
+    private static Logger logger = Logger.getLogger(ReadShardBalancer.class);
+
     /**
      * Convert iterators of file pointers into balanced iterators of shards.
      * @return An iterator over balanced shards.
@@ -52,16 +81,27 @@ public class ReadShardBalancer extends ShardBalancer {
             /**
              * The file pointer currently being processed.
              */
-            private FilePointer currentFilePointer;
+            private FilePointer currentContigFilePointer = null;
 
             /**
-             * Ending position of the last shard in the file.
+             * Iterator over the reads from the current contig's file pointer. The same iterator will be
+             * used to fill all shards associated with a given file pointer
              */
-            private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
+            private PeekableIterator<SAMRecord> currentContigReadsIterator = null;
+
+            /**
+             * How many FilePointers have we pulled from the filePointers iterator?
+             */
+            private int totalFilePointersConsumed = 0;
+
+            /**
+             * Have we encountered a monolithic FilePointer?
+             */
+            private boolean encounteredMonolithicFilePointer = false;
+
 
             {
-                if(filePointers.hasNext())
-                    currentFilePointer = filePointers.next();
+                createNextContigFilePointer();
                 advance();
             }
 
@@ -70,58 +110,117 @@ public class ReadShardBalancer extends ShardBalancer {
             }
 
             public Shard next() {
-                if(!hasNext())
+                if ( ! hasNext() )
                     throw new NoSuchElementException("No next read shard available");
                 Shard currentShard = nextShard;
                 advance();
                 return currentShard;
             }
 
-            public void remove() {
-                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
-            }
-
             private void advance() {
-                Map<SAMReaderID,SAMFileSpan> shardPosition;
                 nextShard = null;
 
-                Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
-                while(selectedReaders.size() == 0 && currentFilePointer != null) {
-                    shardPosition = currentFilePointer.fileSpans;
+                // May need multiple iterations to fill the next shard if all reads in current file spans get filtered/downsampled away
+                while ( nextShard == null && currentContigFilePointer != null ) {
 
-                    for(SAMReaderID id: shardPosition.keySet()) {
-                        SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
-                        selectedReaders.put(id,fileSpan);
+                    // If we've exhausted the current file pointer of reads, move to the next file pointer (if there is one):
+                    if ( currentContigReadsIterator != null && ! currentContigReadsIterator.hasNext() ) {
+
+                        // Close the old, exhausted chain of iterators to release resources
+                        currentContigReadsIterator.close();
+
+                        // Advance to the FilePointer for the next contig
+                        createNextContigFilePointer();
+
+                        // We'll need to create a fresh iterator for this file pointer when we create the first
+                        // shard for it below.
+                        currentContigReadsIterator = null;
                     }
 
-                    if(!isEmpty(selectedReaders)) {
-                        Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
-                        readsDataSource.fillShard(shard);
+                    // At this point our currentContigReadsIterator may be null or non-null depending on whether or not
+                    // this is our first shard for this file pointer.
+                    if ( currentContigFilePointer != null ) {
+                        Shard shard = new ReadShard(parser,readsDataSource, currentContigFilePointer.fileSpans, currentContigFilePointer.locations, currentContigFilePointer.isRegionUnmapped);
 
-                        if(!shard.isBufferEmpty()) {
+                        // Create a new reads iterator only when we've just advanced to the file pointer for the next
+                        // contig. It's essential that the iterators persist across all shards that share the same contig
+                        // to allow the downsampling to work properly.
+                        if ( currentContigReadsIterator == null ) {
+                            currentContigReadsIterator = new PeekableIterator<SAMRecord>(readsDataSource.getIterator(shard));
+                        }
+
+                        if ( currentContigReadsIterator.hasNext() ) {
+                            shard.fill(currentContigReadsIterator);
                             nextShard = shard;
-                            break;
                         }
                     }
-
-                    selectedReaders.clear();
-                    currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
                 }
-
-                position = readsDataSource.getCurrentPosition();
             }
 
             /**
-             * Detects whether the list of file spans contain any read data.
-             * @param selectedSpans Mapping of readers to file spans.
-             * @return True if file spans are completely empty; false otherwise.
+             * Aggregate all FilePointers for the next contig together into one monolithic FilePointer
+             * to avoid boundary issues with visiting the same file regions more than once (since more
+             * granular FilePointers will have regions that overlap with other nearby FilePointers due
+             * to the nature of BAM indices).
+             *
+             * By creating one persistent set of iterators per contig we also avoid boundary artifacts
+             * in the engine-level downsampling.
+             *
+             * TODO: This FilePointer aggregation should ideally be done at the BAMSchedule level for
+             * TODO: read traversals, as there's little point in the BAMSchedule emitting extremely
+             * TODO: granular FilePointers if we're just going to union them. The BAMSchedule should
+             * TODO: emit one FilePointer per contig for read traversals (but, crucially, NOT for
+             * TODO: locus traversals).
              */
-            private boolean isEmpty(Map<SAMReaderID,SAMFileSpan> selectedSpans) {
-                for(SAMFileSpan fileSpan: selectedSpans.values()) {
-                    if(!fileSpan.isEmpty())
-                        return false;
+            private void createNextContigFilePointer() {
+                currentContigFilePointer = null;
+                List<FilePointer> nextContigFilePointers = new ArrayList<FilePointer>();
+
+                logger.info("Loading BAM index data for next contig");
+
+                while ( filePointers.hasNext() ) {
+
+                    // Make sure that if we see a monolithic FilePointer (representing all regions in all files) that
+                    // it is the ONLY FilePointer we ever encounter
+                    if ( encounteredMonolithicFilePointer ) {
+                        throw new ReviewedStingException("Bug: encountered additional FilePointers after encountering a monolithic FilePointer");
+                    }
+                    if ( filePointers.peek().isMonolithic() ) {
+                        if ( totalFilePointersConsumed > 0 ) {
+                            throw new ReviewedStingException("Bug: encountered additional FilePointers before encountering a monolithic FilePointer");
+                        }
+                        encounteredMonolithicFilePointer = true;
+                        logger.debug(String.format("Encountered monolithic FilePointer: %s", filePointers.peek()));
+                    }
+
+                    // If this is the first FP we've seen, or we're dealing with mapped regions and the next FP is on the
+                    // same contig as previous FPs, or all our FPs are unmapped, add the next FP to the list of FPs to merge
+                    if ( nextContigFilePointers.isEmpty() ||
+                             (! nextContigFilePointers.get(0).isRegionUnmapped && ! filePointers.peek().isRegionUnmapped &&
+                             nextContigFilePointers.get(0).getContigIndex() == filePointers.peek().getContigIndex()) ||
+                                 (nextContigFilePointers.get(0).isRegionUnmapped && filePointers.peek().isRegionUnmapped) ) {
+
+                        nextContigFilePointers.add(filePointers.next());
+                        totalFilePointersConsumed++;
+                    }
+                    else {
+                        break; // next FilePointer is on a different contig or has different mapped/unmapped status,
+                               // save it for next time
+                    }
                 }
-                return true;
+
+                if ( ! nextContigFilePointers.isEmpty() ) {
+                    currentContigFilePointer = FilePointer.union(nextContigFilePointers, parser);
+                }
+
+                if ( currentContigFilePointer != null ) {
+                    logger.info("Done loading BAM index data for next contig");
+                    logger.debug(String.format("Next contig FilePointer: %s", currentContigFilePointer));
+                }
+            }
+
+            public void remove() {
+                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
             }
         };
     }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
index 88de3ac9b..e99814278 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@@ -466,7 +466,7 @@ public class SAMDataSource {
     /**
      * Legacy method to fill the given buffering shard with reads.
      *
-     * Shard.fill() is used instead of this method when experimental downsampling is enabled
+     * Shard.fill() is used instead of this method unless legacy downsampling is enabled
      *
      * TODO: delete this method once the experimental downsampling engine fork collapses
      *
@@ -638,7 +638,8 @@ public class SAMDataSource {
                 readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
                 readProperties.getSupplementalFilters(),
                 readProperties.getReadTransformers(),
-                readProperties.defaultBaseQualities());
+                readProperties.defaultBaseQualities(),
+                shard instanceof LocusShard);
     }
 
     private class BAMCodecIterator implements CloseableIterator<SAMRecord> {
@@ -695,6 +696,7 @@ public class SAMDataSource {
      * @param noValidationOfReadOrder Another trigger for the verifying iterator?  TODO: look into this.
      * @param supplementalFilters additional filters to apply to the reads.
      * @param defaultBaseQualities if the reads have incomplete quality scores, set them all to defaultBaseQuality.
+     * @param isLocusBasedTraversal true if we're dealing with a read stream from a LocusShard
      * @return An iterator wrapped with filters reflecting the passed-in parameters.  Will not be null.
      */
     protected StingSAMIterator applyDecoratingIterators(ReadMetrics readMetrics,
@@ -705,7 +707,8 @@ public class SAMDataSource {
                                                         Boolean noValidationOfReadOrder,
                                                         Collection<ReadFilter> supplementalFilters,
                                                         List<ReadTransformer> readTransformers,
-                                                        byte defaultBaseQualities) {
+                                                        byte defaultBaseQualities,
+                                                        boolean isLocusBasedTraversal ) {
 
         // ************************************************************************************************ //
         // *  NOTE: ALL FILTERING/DOWNSAMPLING SHOULD BE DONE BEFORE ANY ITERATORS THAT MODIFY THE READS! * //
@@ -714,12 +717,26 @@ public class SAMDataSource {
 
         wrappedIterator = StingSAMIteratorAdapter.adapt(new CountingFilteringIterator(readMetrics,wrappedIterator,supplementalFilters));
 
-        if ( readProperties.getDownsamplingMethod().useExperimentalDownsampling ) {
-            wrappedIterator = applyDownsamplingIterator(wrappedIterator);
+        // If we're using the new downsampling implementation, apply downsampling iterators at this
+        // point in the read stream for most (but not all) cases
+        if ( ! readProperties.getDownsamplingMethod().useLegacyDownsampler ) {
+
+            // For locus traversals where we're downsampling to coverage by sample, assume that the downsamplers
+            // will be invoked downstream from us in LocusIteratorByState. This improves performance by avoiding
+            // splitting/re-assembly of the read stream at this stage, and also allows for partial downsampling
+            // of individual reads.
+            boolean assumeDownstreamLIBSDownsampling = isLocusBasedTraversal &&
+                                                       readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
+                                                       readProperties.getDownsamplingMethod().toCoverage != null;
+
+            if ( ! assumeDownstreamLIBSDownsampling ) {
+                wrappedIterator = applyDownsamplingIterator(wrappedIterator);
+            }
         }
 
-        // Use the old fractional downsampler only if we're not using experimental downsampling:
-        if ( ! readProperties.getDownsamplingMethod().useExperimentalDownsampling && downsamplingFraction != null )
+        // Use the old fractional downsampler only if we're using legacy downsampling:
+        // TODO: remove this statement (and associated classes) once the downsampling engine fork collapses
+        if ( readProperties.getDownsamplingMethod().useLegacyDownsampler && downsamplingFraction != null )
             wrappedIterator = new LegacyDownsampleIterator(wrappedIterator, downsamplingFraction);
 
         // unless they've said not to validate read ordering (!noValidationOfReadOrder) and we've enabled verification,
@@ -741,19 +758,37 @@ public class SAMDataSource {
     }
 
     protected StingSAMIterator applyDownsamplingIterator( StingSAMIterator wrappedIterator ) {
-        if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
-            ReadsDownsamplerFactory<SAMRecord> downsamplerFactory = readProperties.getDownsamplingMethod().toCoverage != null ?
-                                                                    new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
-                                                                    new FractionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
-
-            return new PerSampleDownsamplingReadsIterator(wrappedIterator, downsamplerFactory);
+        if ( readProperties.getDownsamplingMethod() == null ||
+             readProperties.getDownsamplingMethod().type == DownsampleType.NONE ) {
+            return wrappedIterator;
         }
-        else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
-            ReadsDownsampler<SAMRecord> downsampler = readProperties.getDownsamplingMethod().toCoverage != null ?
-                                                      new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage) :
-                                                      new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction);
 
-            return new DownsamplingReadsIterator(wrappedIterator, downsampler);
+        if ( readProperties.getDownsamplingMethod().toFraction != null ) {
+
+            // If we're downsampling to a fraction of reads, there's no point in paying the cost of
+            // splitting/re-assembling the read stream by sample to run the FractionalDownsampler on
+            // reads from each sample separately, since the result would be the same as running the
+            // FractionalDownsampler on the entire stream. So, ALWAYS use the DownsamplingReadsIterator
+            // rather than the PerSampleDownsamplingReadsIterator, even if BY_SAMPLE downsampling
+            // was requested.
+
+            return new DownsamplingReadsIterator(wrappedIterator,
+                                                 new FractionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toFraction));
+        }
+        else if ( readProperties.getDownsamplingMethod().toCoverage != null ) {
+
+            // If we're downsampling to coverage, we DO need to pay the cost of splitting/re-assembling
+            // the read stream to run the downsampler on the reads for each individual sample separately if
+            // BY_SAMPLE downsampling was requested.
+
+            if ( readProperties.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE ) {
+                return new PerSampleDownsamplingReadsIterator(wrappedIterator,
+                                                              new SimplePositionalDownsamplerFactory<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
+            }
+            else if ( readProperties.getDownsamplingMethod().type == DownsampleType.ALL_READS ) {
+                return new DownsamplingReadsIterator(wrappedIterator,
+                                                     new SimplePositionalDownsampler<SAMRecord>(readProperties.getDownsamplingMethod().toCoverage));
+            }
         }
 
         return wrappedIterator;
diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java
index ae1d98ce0..b3f636fd6 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/DownsamplingMethod.java
@@ -50,9 +50,9 @@ public class DownsamplingMethod {
     public final Double toFraction;
 
     /**
-     * Use the new experimental downsampling?
+     * Use the legacy downsampling implementation instead of the newer implementation?
      */
-    public final boolean useExperimentalDownsampling;
+    public final boolean useLegacyDownsampler;
 
     /**
      * Expresses no downsampling applied at all.
@@ -69,11 +69,11 @@ public class DownsamplingMethod {
      */
     public static int DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE = 1000;
 
-    public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useExperimentalDownsampling ) {
+    public DownsamplingMethod( DownsampleType type, Integer toCoverage, Double toFraction, boolean useLegacyDownsampler ) {
         this.type = type != null ? type : DEFAULT_DOWNSAMPLING_TYPE;
         this.toCoverage = toCoverage;
         this.toFraction = toFraction;
-        this.useExperimentalDownsampling = useExperimentalDownsampling;
+        this.useLegacyDownsampler = useLegacyDownsampler;
 
         if ( type == DownsampleType.NONE ) {
             toCoverage = null;
@@ -101,19 +101,19 @@ public class DownsamplingMethod {
         if ( toFraction != null && (toFraction < 0.0 || toFraction > 1.0) ) {
             throw new UserException.CommandLineException("toFraction must be >= 0.0 and <= 1.0 when downsampling to a fraction of reads");
         }
+    }
 
-        // Some restrictions only exist for the old downsampling implementation:
-        if ( ! useExperimentalDownsampling ) {
-            // By sample downsampling does not work with a fraction of reads in the old downsampling implementation
-            if( type == DownsampleType.BY_SAMPLE && toFraction != null )
-                throw new UserException.CommandLineException("Cannot downsample to fraction with the BY_SAMPLE method");
+    public void checkCompatibilityWithWalker( Walker walker ) {
+        boolean isLocusTraversal = walker instanceof LocusWalker || walker instanceof ActiveRegionWalker;
+
+        if ( ! isLocusTraversal && useLegacyDownsampler && toCoverage != null ) {
+            throw new UserException.CommandLineException("Downsampling to coverage for read-based traversals (eg., ReadWalkers) is not supported in the legacy downsampling implementation. " +
+                                                         "The newer downsampling implementation does not have this limitation.");
         }
 
-        // Some restrictions only exist for the new downsampling implementation:
-        if ( useExperimentalDownsampling ) {
-            if ( type == DownsampleType.ALL_READS && toCoverage != null ) {
-                throw new UserException.CommandLineException("Cannot downsample to coverage with the ALL_READS method in the experimental downsampling implementation");
-            }
+        if ( isLocusTraversal && ! useLegacyDownsampler && type == DownsampleType.ALL_READS && toCoverage != null ) {
+            throw new UserException.CommandLineException("Downsampling to coverage with the ALL_READS method for locus-based traversals (eg., LocusWalkers) is not yet supported in the new downsampling implementation (though it is supported for ReadWalkers). " +
+                                                         "You can run with --use_legacy_downsampler for a broken and poorly-maintained implementation of ALL_READS to-coverage downsampling, but this is not recommended.");
         }
     }
 
@@ -124,30 +124,34 @@ public class DownsamplingMethod {
             builder.append("No downsampling");
         }
         else {
-            builder.append(String.format("Method: %s ", type));
+            builder.append(String.format("Method: %s, ", type));
 
             if ( toCoverage != null ) {
-                builder.append(String.format("Target Coverage: %d ", toCoverage));
+                builder.append(String.format("Target Coverage: %d, ", toCoverage));
             }
             else {
-                builder.append(String.format("Target Fraction: %.2f ", toFraction));
+                builder.append(String.format("Target Fraction: %.2f, ", toFraction));
             }
 
-            if ( useExperimentalDownsampling ) {
-                builder.append("Using Experimental Downsampling");
+            if ( useLegacyDownsampler ) {
+                builder.append("Using the legacy downsampling implementation");
+            }
+            else {
+                builder.append("Using the new downsampling implementation");
             }
         }
 
         return builder.toString();
     }
 
-    public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useExperimentalDownsampling ) {
+    public static DownsamplingMethod getDefaultDownsamplingMethod( Walker walker, boolean useLegacyDownsampler ) {
         if ( walker instanceof LocusWalker || walker instanceof ActiveRegionWalker ) {
             return new DownsamplingMethod(DEFAULT_DOWNSAMPLING_TYPE, DEFAULT_LOCUS_BASED_TRAVERSAL_DOWNSAMPLING_COVERAGE,
-                                          null, useExperimentalDownsampling);
+                                          null, useLegacyDownsampler);
         }
         else {
-            return new DownsamplingMethod(DownsampleType.NONE, null, null, useExperimentalDownsampling);
+            // Downsampling is off by default for non-locus-based traversals
+            return new DownsamplingMethod(DownsampleType.NONE, null, null, useLegacyDownsampler);
         }
     }
 }
diff --git a/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java
new file mode 100644
index 000000000..008ffde3b
--- /dev/null
+++ b/public/java/src/org/broadinstitute/sting/gatk/downsampling/PassThroughDownsampler.java
@@ -0,0 +1,106 @@
+/*
+ * Copyright (c) 2012, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.downsampling;
+
+import net.sf.samtools.SAMRecord;
+
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+
+/**
+ * Pass-Through Downsampler: Implementation of the ReadsDownsampler interface that does no
+ * downsampling whatsoever, and instead simply "passes-through" all the reads it's given.
+ * Useful for situations where you want to disable downsampling, but still need to use
+ * the downsampler interface.
+ *
+ * @author David Roazen
+ */
+public class PassThroughDownsampler<T extends SAMRecord> implements ReadsDownsampler<T> {
+
+    private ArrayList<T> selectedReads;
+
+    public PassThroughDownsampler() {
+        clear();
+    }
+
+    public void submit( T newRead ) {
+        // All reads pass-through, no reads get downsampled
+        selectedReads.add(newRead);
+    }
+
+    public void submit( Collection<T> newReads ) {
+        for ( T read : newReads ) {
+            submit(read);
+        }
+    }
+
+    public boolean hasFinalizedItems() {
+        return selectedReads.size() > 0;
+    }
+
+    public List<T> consumeFinalizedItems() {
+        // pass by reference rather than make a copy, for speed
+        List<T> downsampledItems = selectedReads;
+        clear();
+        return downsampledItems;
+    }
+
+    public boolean hasPendingItems() {
+        return false;
+    }
+
+    public T peekFinalized() {
+        return selectedReads.isEmpty() ? null : selectedReads.get(0);
+    }
+
+    public T peekPending() {
+        return null;
+    }
+
+    public int getNumberOfDiscardedItems() {
+        return 0;
+    }
+
+    public void signalEndOfInput() {
+        // NO-OP
+    }
+
+    public void clear() {
+        selectedReads = new ArrayList<T>();
+    }
+
+    public void reset() {
+        // NO-OP
+    }
+
+    public boolean requiresCoordinateSortOrder() {
+        return false;
+    }
+
+    public void signalNoMoreReadsBefore( T read ) {
+        // NO-OP
+    }
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
index 6c0dc9769..735a4dce5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/WindowMaker.java
@@ -4,9 +4,9 @@ import net.sf.picard.util.PeekableIterator;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
+import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.LocusIterator;
 import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
-import org.broadinstitute.sting.gatk.iterators.LocusIteratorByStateExperimental;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@@ -83,17 +83,18 @@ public class WindowMaker implements Iterable<WindowMaker.WindowMakerIterator>, I
         this.sourceInfo = shard.getReadProperties();
         this.readIterator = iterator;
 
-        // Temporary: use the experimental version of LocusIteratorByState if experimental downsampling was requested:
-        this.sourceIterator = sourceInfo.getDownsamplingMethod().useExperimentalDownsampling ?
-                              new PeekableIterator<AlignmentContext>(new LocusIteratorByStateExperimental(iterator,sourceInfo,genomeLocParser, sampleNames))
+        // Use the legacy version of LocusIteratorByState if legacy downsampling was requested:
+        this.sourceIterator = sourceInfo.getDownsamplingMethod().useLegacyDownsampler ?
+                              new PeekableIterator<AlignmentContext>(new LegacyLocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames))
                               :
-                              new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser, sampleNames));
+                              new PeekableIterator<AlignmentContext>(new LocusIteratorByState(iterator,sourceInfo,genomeLocParser,sampleNames));
+
 
         this.intervalIterator = intervals.size()>0 ? new PeekableIterator<GenomeLoc>(intervals.iterator()) : null;
     }
 
     public WindowMaker(Shard shard, GenomeLocParser genomeLocParser, StingSAMIterator iterator, List<GenomeLoc> intervals ) {
-        this(shard, genomeLocParser, iterator, intervals, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
+        this(shard, genomeLocParser, iterator, intervals, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
     }
 
     public Iterator<WindowMakerIterator> iterator() {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByState.java
similarity index 61%
rename from public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java
rename to public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByState.java
index 557cbd009..5c833de4c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimental.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByState.java
@@ -31,14 +31,14 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
+import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
-import org.broadinstitute.sting.gatk.downsampling.Downsampler;
-import org.broadinstitute.sting.gatk.downsampling.LevelingDownsampler;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.LegacyReservoirDownsampler;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@@ -50,11 +50,11 @@ import java.util.*;
 /**
  * Iterator that traverses a SAM File, accumulating information on a per-locus basis
  */
-public class LocusIteratorByStateExperimental extends LocusIterator {
+public class LegacyLocusIteratorByState extends LocusIterator {
     /**
      * our log, which we want to capture anything from this class
      */
-    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
+    private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
 
     // -----------------------------------------------------------------------------------------------------------------
     //
@@ -69,7 +69,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
     private final ArrayList<String> samples;
     private final ReadStateManager readStates;
 
-    protected static class SAMRecordState {
+    static private class SAMRecordState {
         SAMRecord read;
         int readOffset = -1;                    // how far are we offset from the start of the read bases?
         int genomeOffset = -1;                  // how far are we offset from the alignment start on the genome?
@@ -213,7 +213,6 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
     //final boolean DEBUG2 = false && DEBUG;
     private ReadProperties readInfo;
     private AlignmentContext nextAlignmentContext;
-    private boolean performLevelingDownsampling;
 
     // -----------------------------------------------------------------------------------------------------------------
     //
@@ -221,15 +220,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
     //
     // -----------------------------------------------------------------------------------------------------------------
 
-    public LocusIteratorByStateExperimental(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
+    public LegacyLocusIteratorByState(final Iterator<SAMRecord> samIterator, ReadProperties readInformation, GenomeLocParser genomeLocParser, Collection<String> samples) {
         this.readInfo = readInformation;
         this.genomeLocParser = genomeLocParser;
         this.samples = new ArrayList<String>(samples);
-        this.readStates = new ReadStateManager(samIterator);
-
-        this.performLevelingDownsampling = readInfo.getDownsamplingMethod() != null &&
-                                           readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
-                                           readInfo.getDownsamplingMethod().toCoverage != null;
+        this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
 
         // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
         // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
@@ -290,13 +285,11 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
 
             final GenomeLoc location = getLocation();
             final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
-
-            // TODO: How can you determine here whether the current pileup has been downsampled?
             boolean hasBeenSampled = false;
-
             for (final String sample : samples) {
                 final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                 final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
+                hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
 
                 int size = 0;                                                           // number of elements in this sample's pileup
                 int nDeletions = 0;                                                     // number of deletions in this sample's pileup
@@ -405,20 +398,34 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
         throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
     }
 
-    protected class ReadStateManager {
+    private class ReadStateManager {
         private final PeekableIterator<SAMRecord> iterator;
+        private final DownsamplingMethod downsamplingMethod;
         private final SamplePartitioner samplePartitioner;
         private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
+        private final int targetCoverage;
         private int totalReadStates = 0;
 
-        public ReadStateManager(Iterator<SAMRecord> source) {
+        public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
             this.iterator = new PeekableIterator<SAMRecord>(source);
-
-            for (final String sample : samples) {
-                readStatesBySample.put(sample, new PerSampleReadStateManager());
+            this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
+            switch (this.downsamplingMethod.type) {
+                case BY_SAMPLE:
+                    if (downsamplingMethod.toCoverage == null)
+                        throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
+                    this.targetCoverage = downsamplingMethod.toCoverage;
+                    break;
+                default:
+                    this.targetCoverage = Integer.MAX_VALUE;
             }
 
-            samplePartitioner = new SamplePartitioner();
+            Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
+            for (final String sample : samples) {
+                readStatesBySample.put(sample, new PerSampleReadStateManager());
+                readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
+            }
+
+            samplePartitioner = new SamplePartitioner(readSelectors);
         }
 
         /**
@@ -442,6 +449,7 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
 
                 public void remove() {
                     wrappedIterator.remove();
+                    totalReadStates--;
                 }
             };
         }
@@ -469,6 +477,17 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
             return readStatesBySample.get(sample).size();
         }
 
+        /**
+         * The extent of downsampling; basically, the furthest base out which has 'fallen
+         * victim' to the downsampler.
+         *
+         * @param sample Sample, downsampled independently.
+         * @return Integer stop of the furthest undownsampled region.
+         */
+        public int getDownsamplingExtent(final String sample) {
+            return readStatesBySample.get(sample).getDownsamplingExtent();
+        }
+
         public SAMRecordState getFirst() {
             for (final String sample : samples) {
                 PerSampleReadStateManager reads = readStatesBySample.get(sample);
@@ -501,13 +520,61 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
                     samplePartitioner.submitRead(iterator.next());
                 }
             }
+            samplePartitioner.complete();
 
             for (final String sample : samples) {
-                Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
-                PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
-                addReadsToSample(statesBySample, newReads);
-            }
+                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
 
+                Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
+
+                PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
+                int numReads = statesBySample.size();
+                int downsamplingExtent = aggregator.getDownsamplingExtent();
+
+                if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
+                    long readLimit = aggregator.getNumReadsSeen();
+                    addReadsToSample(statesBySample, newReads, readLimit);
+                    statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
+                } else {
+                    int[] counts = statesBySample.getCountsPerAlignmentStart();
+                    int[] updatedCounts = new int[counts.length];
+                    System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
+
+                    boolean readPruned = true;
+                    while (numReads + newReads.size() > targetCoverage && readPruned) {
+                        readPruned = false;
+                        for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
+                            if (updatedCounts[alignmentStart] > 1) {
+                                updatedCounts[alignmentStart]--;
+                                numReads--;
+                                readPruned = true;
+                            }
+                        }
+                    }
+
+                    if (numReads == targetCoverage) {
+                        updatedCounts[0]--;
+                        numReads--;
+                    }
+
+                    BitSet toPurge = new BitSet(readStates.size());
+                    int readOffset = 0;
+
+                    for (int i = 0; i < updatedCounts.length; i++) {
+                        int n = counts[i];
+                        int k = updatedCounts[i];
+
+                        for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
+                            toPurge.set(readOffset + purgedElement);
+
+                        readOffset += counts[i];
+                    }
+                    downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
+
+                    addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
+                    statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
+                }
+            }
             samplePartitioner.reset();
         }
 
@@ -516,134 +583,380 @@ public class LocusIteratorByStateExperimental extends LocusIterator {
          *
          * @param readStates The list of read states to add this collection of reads.
          * @param reads      Reads to add.  Selected reads will be pulled from this source.
+         * @param maxReads   Maximum number of reads to add.
          */
-        private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
+        private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
             if (reads.isEmpty())
                 return;
 
             Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
-
+            int readCount = 0;
             for (SAMRecord read : reads) {
-                SAMRecordState state = new SAMRecordState(read);
-                state.stepForwardOnGenome();
-                newReadStates.add(state);
+                if (readCount < maxReads) {
+                    SAMRecordState state = new SAMRecordState(read);
+                    state.stepForwardOnGenome();
+                    newReadStates.add(state);
+                    readCount++;
+                }
             }
-
             readStates.addStatesAtNextAlignmentStart(newReadStates);
         }
 
-        protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
-            private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
-            private int thisSampleReadStates = 0;
-            private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
-                      performLevelingDownsampling ?
-                      new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
-                      null;
+        private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
+            private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
+            private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
+            private int downsamplingExtent = 0;
 
             public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
-                if ( states.isEmpty() ) {
-                    return;
-                }
-
-                readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
-                thisSampleReadStates += states.size();
+                readStates.addAll(states);
+                readStateCounter.add(new Counter(states.size()));
                 totalReadStates += states.size();
-
-                if ( levelingDownsampler != null ) {
-                    levelingDownsampler.submit(readStatesByAlignmentStart);
-                    levelingDownsampler.signalEndOfInput();
-
-                    thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
-                    totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
-
-                    // use returned List directly rather than make a copy, for efficiency's sake
-                    readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
-                    levelingDownsampler.reset();
-                }
             }
 
             public boolean isEmpty() {
-                return readStatesByAlignmentStart.isEmpty();
+                return readStates.isEmpty();
             }
 
             public SAMRecordState peek() {
-                return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
+                return readStates.peek();
             }
 
             public int size() {
-                return thisSampleReadStates;
+                return readStates.size();
+            }
+
+            public void specifyNewDownsamplingExtent(int downsamplingExtent) {
+                this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
+            }
+
+            public int getDownsamplingExtent() {
+                return downsamplingExtent;
+            }
+
+            public int[] getCountsPerAlignmentStart() {
+                int[] counts = new int[readStateCounter.size()];
+                int index = 0;
+                for (Counter counter : readStateCounter)
+                    counts[index++] = counter.getCount();
+                return counts;
             }
 
             public Iterator<SAMRecordState> iterator() {
                 return new Iterator<SAMRecordState>() {
-                    private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
-                    private LinkedList<SAMRecordState> currentPositionReadStates = null;
-                    private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
+                    private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
 
                     public boolean hasNext() {
-                        return  alignmentStartIterator.hasNext() ||
-                                (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
+                        return wrappedIterator.hasNext();
                     }
 
                     public SAMRecordState next() {
-                        if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
-                            currentPositionReadStates = alignmentStartIterator.next();
-                            currentPositionReadStatesIterator = currentPositionReadStates.iterator();
-                        }
-
-                        return currentPositionReadStatesIterator.next();
+                        return wrappedIterator.next();
                     }
 
                     public void remove() {
-                        currentPositionReadStatesIterator.remove();
-                        thisSampleReadStates--;
-                        totalReadStates--;
-
-                        if ( currentPositionReadStates.isEmpty() ) {
-                            alignmentStartIterator.remove();
-                        }
+                        wrappedIterator.remove();
+                        Counter counter = readStateCounter.peek();
+                        counter.decrement();
+                        if (counter.getCount() == 0)
+                            readStateCounter.remove();
                     }
                 };
             }
+
+            /**
+             * Purge the given elements from the bitset.  If an element in the bitset is true, purge
+             * the corresponding read state.
+             *
+             * @param elements bits from the set to purge.
+             * @return the extent of the final downsampled read.
+             */
+            public int purge(final BitSet elements) {
+                int downsamplingExtent = 0;
+
+                if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
+
+                Iterator<SAMRecordState> readStateIterator = readStates.iterator();
+
+                Iterator<Counter> counterIterator = readStateCounter.iterator();
+                Counter currentCounter = counterIterator.next();
+
+                int readIndex = 0;
+                long alignmentStartCounter = currentCounter.getCount();
+
+                int toPurge = elements.nextSetBit(0);
+                int removedCount = 0;
+
+                while (readStateIterator.hasNext() && toPurge >= 0) {
+                    SAMRecordState state = readStateIterator.next();
+                    downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
+
+                    if (readIndex == toPurge) {
+                        readStateIterator.remove();
+                        currentCounter.decrement();
+                        if (currentCounter.getCount() == 0)
+                            counterIterator.remove();
+                        removedCount++;
+                        toPurge = elements.nextSetBit(toPurge + 1);
+                    }
+
+                    readIndex++;
+                    alignmentStartCounter--;
+                    if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
+                        currentCounter = counterIterator.next();
+                        alignmentStartCounter = currentCounter.getCount();
+                    }
+                }
+
+                totalReadStates -= removedCount;
+
+                return downsamplingExtent;
+            }
         }
     }
 
     /**
-     * Note: stores reads by sample ID string, not by sample object
+     * Note: assuming that, whenever we downsample, we downsample to an integer capacity.
      */
-    private class SamplePartitioner {
-        private Map<String, Collection<SAMRecord>> readsBySample;
-        private long readsSeen = 0;
+    static private class Counter {
+        private int count;
 
-        public SamplePartitioner() {
-            readsBySample = new HashMap<String, Collection<SAMRecord>>();
-
-            for ( String sample : samples ) {
-                readsBySample.put(sample, new ArrayList<SAMRecord>());
-            }
+        public Counter(int count) {
+            this.count = count;
         }
 
-        public void submitRead(SAMRecord read) {
-            String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
-            if (readsBySample.containsKey(sampleName))
-                readsBySample.get(sampleName).add(read);
-            readsSeen++;
+        public int getCount() {
+            return count;
         }
 
-        public long getNumReadsSeen() {
-            return readsSeen;
-        }
-
-        public Collection<SAMRecord> getReadsForSample(String sampleName) {
-            if ( ! readsBySample.containsKey(sampleName) )
-                throw new NoSuchElementException("Sample name not found");
-            return readsBySample.get(sampleName);
-        }
-
-        public void reset() {
-            for ( Collection<SAMRecord> perSampleReads : readsBySample.values() )
-                perSampleReads.clear();
-            readsSeen = 0;
+        public void decrement() {
+            count--;
         }
     }
-}
\ No newline at end of file
+}
+
+/**
+ * Selects reads passed to it based on a criteria decided through inheritance.
+ * TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option.  Get rid of this.
+ */
+interface ReadSelector {
+    /**
+     * All previous selectors in the chain have allowed this read.  Submit it to this selector for consideration.
+     *
+     * @param read the read to evaluate.
+     */
+    public void submitRead(SAMRecord read);
+
+    /**
+     * A previous selector has deemed this read unfit.  Notify this selector so that this selector's counts are valid.
+     *
+     * @param read the read previously rejected.
+     */
+    public void notifyReadRejected(SAMRecord read);
+
+    /**
+     * Signal the selector that read additions are complete.
+     */
+    public void complete();
+
+    /**
+     * Retrieve the number of reads seen by this selector so far.
+     *
+     * @return number of reads seen.
+     */
+    public long getNumReadsSeen();
+
+    /**
+     * Return the number of reads accepted by this selector so far.
+     *
+     * @return number of reads selected.
+     */
+    public long getNumReadsSelected();
+
+    /**
+     * Gets the locus at which the last of the downsampled reads selected by this selector ends.  The value returned will be the
+     * last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
+     * position 3 whose cigar string is 76M, the value of this parameter will be 78.
+     *
+     * @return If any read has been downsampled, this will return the last aligned base of the longest alignment.  Else, 0.
+     */
+    public int getDownsamplingExtent();
+
+    /**
+     * Get the reads selected by this selector.
+     *
+     * @return collection of reads selected by this selector.
+     */
+    public Collection<SAMRecord> getSelectedReads();
+
+    /**
+     * Reset this collection to its pre-gathered state.
+     */
+    public void reset();
+}
+
+/**
+ * Select every read passed in.
+ */
+class AllReadsSelector implements ReadSelector {
+    private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
+    private long readsSeen = 0;
+    private int downsamplingExtent = 0;
+
+    public void submitRead(SAMRecord read) {
+        reads.add(read);
+        readsSeen++;
+    }
+
+    public void notifyReadRejected(SAMRecord read) {
+        readsSeen++;
+        downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
+    }
+
+    public void complete() {
+        // NO-OP.
+    }
+
+    public long getNumReadsSeen() {
+        return readsSeen;
+    }
+
+    public long getNumReadsSelected() {
+        return readsSeen;
+    }
+
+    public int getDownsamplingExtent() {
+        return downsamplingExtent;
+    }
+
+    public Collection<SAMRecord> getSelectedReads() {
+        return reads;
+    }
+
+    public void reset() {
+        reads.clear();
+        readsSeen = 0;
+        downsamplingExtent = 0;
+    }
+}
+
+
+/**
+ * Select N reads randomly from the input stream.
+ */
+class NRandomReadSelector implements ReadSelector {
+    private final LegacyReservoirDownsampler<SAMRecord> reservoir;
+    private final ReadSelector chainedSelector;
+    private long readsSeen = 0;
+    private int downsamplingExtent = 0;
+
+    public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
+        this.reservoir = new LegacyReservoirDownsampler<SAMRecord>((int) readLimit);
+        this.chainedSelector = chainedSelector;
+    }
+
+    public void submitRead(SAMRecord read) {
+        SAMRecord displaced = reservoir.add(read);
+        if (displaced != null && chainedSelector != null) {
+            chainedSelector.notifyReadRejected(read);
+            downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
+        }
+        readsSeen++;
+    }
+
+    public void notifyReadRejected(SAMRecord read) {
+        readsSeen++;
+    }
+
+    public void complete() {
+        for (SAMRecord read : reservoir.getDownsampledContents())
+            chainedSelector.submitRead(read);
+        if (chainedSelector != null)
+            chainedSelector.complete();
+    }
+
+
+    public long getNumReadsSeen() {
+        return readsSeen;
+    }
+
+    public long getNumReadsSelected() {
+        return reservoir.size();
+    }
+
+    public int getDownsamplingExtent() {
+        return downsamplingExtent;
+    }
+
+    public Collection<SAMRecord> getSelectedReads() {
+        return reservoir.getDownsampledContents();
+    }
+
+    public void reset() {
+        reservoir.clear();
+        downsamplingExtent = 0;
+        if (chainedSelector != null)
+            chainedSelector.reset();
+    }
+}
+
+/**
+ * Note: stores reads by sample ID string, not by sample object
+ */
+class SamplePartitioner implements ReadSelector {
+    private final Map<String, ReadSelector> readsBySample;
+    private long readsSeen = 0;
+
+    public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
+        readsBySample = readSelectors;
+    }
+
+    public void submitRead(SAMRecord read) {
+        String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+        if (readsBySample.containsKey(sampleName))
+            readsBySample.get(sampleName).submitRead(read);
+        readsSeen++;
+    }
+
+    public void notifyReadRejected(SAMRecord read) {
+        String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+        if (readsBySample.containsKey(sampleName))
+            readsBySample.get(sampleName).notifyReadRejected(read);
+        readsSeen++;
+    }
+
+    public void complete() {
+        // NO-OP.
+    }
+
+    public long getNumReadsSeen() {
+        return readsSeen;
+    }
+
+    public long getNumReadsSelected() {
+        return readsSeen;
+    }
+
+    public int getDownsamplingExtent() {
+        int downsamplingExtent = 0;
+        for (ReadSelector storage : readsBySample.values())
+            downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
+        return downsamplingExtent;
+    }
+
+    public Collection<SAMRecord> getSelectedReads() {
+        throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
+    }
+
+    public ReadSelector getSelectedReads(String sampleName) {
+        if (!readsBySample.containsKey(sampleName))
+            throw new NoSuchElementException("Sample name not found");
+        return readsBySample.get(sampleName);
+    }
+
+    public void reset() {
+        for (ReadSelector storage : readsBySample.values())
+            storage.reset();
+        readsSeen = 0;
+    }
+
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
index 46e84798a..4f8d7d3f2 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/iterators/LocusIteratorByState.java
@@ -31,14 +31,11 @@ import net.sf.samtools.CigarElement;
 import net.sf.samtools.CigarOperator;
 import net.sf.samtools.SAMRecord;
 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.gatk.downsampling.DownsampleType;
-import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.downsampling.*;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.ReservoirDownsampler;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileupImpl;
@@ -54,7 +51,7 @@ public class LocusIteratorByState extends LocusIterator {
     /**
      * our log, which we want to capture anything from this class
      */
-    private static Logger logger = Logger.getLogger(LocusIteratorByState.class);
+    private static Logger logger = Logger.getLogger(LegacyLocusIteratorByState.class);
 
     // -----------------------------------------------------------------------------------------------------------------
     //
@@ -69,7 +66,7 @@ public class LocusIteratorByState extends LocusIterator {
     private final ArrayList<String> samples;
     private final ReadStateManager readStates;
 
-    static private class SAMRecordState {
+    protected static class SAMRecordState {
         SAMRecord read;
         int readOffset = -1;                    // how far are we offset from the start of the read bases?
         int genomeOffset = -1;                  // how far are we offset from the alignment start on the genome?
@@ -213,6 +210,7 @@ public class LocusIteratorByState extends LocusIterator {
     //final boolean DEBUG2 = false && DEBUG;
     private ReadProperties readInfo;
     private AlignmentContext nextAlignmentContext;
+    private boolean performDownsampling;
 
     // -----------------------------------------------------------------------------------------------------------------
     //
@@ -224,7 +222,18 @@ public class LocusIteratorByState extends LocusIterator {
         this.readInfo = readInformation;
         this.genomeLocParser = genomeLocParser;
         this.samples = new ArrayList<String>(samples);
-        this.readStates = new ReadStateManager(samIterator, readInformation.getDownsamplingMethod());
+
+        // LIBS will invoke the Reservoir and Leveling downsamplers on the read stream if we're
+        // downsampling to coverage by sample. SAMDataSource will have refrained from applying
+        // any downsamplers to the read stream in this case, in the expectation that LIBS will
+        // manage the downsampling. The reason for this is twofold: performance (don't have to
+        // split/re-assemble the read stream in SAMDataSource), and to enable partial downsampling
+        // of reads (eg., using half of a read, and throwing the rest away).
+        this.performDownsampling = readInfo.getDownsamplingMethod() != null &&
+                                   readInfo.getDownsamplingMethod().type == DownsampleType.BY_SAMPLE &&
+                                   readInfo.getDownsamplingMethod().toCoverage != null;
+
+        this.readStates = new ReadStateManager(samIterator);
 
         // currently the GATK expects this LocusIteratorByState to accept empty sample lists, when
         // there's no read data.  So we need to throw this error only when samIterator.hasNext() is true
@@ -285,11 +294,13 @@ public class LocusIteratorByState extends LocusIterator {
 
             final GenomeLoc location = getLocation();
             final Map<String, ReadBackedPileupImpl> fullPileup = new HashMap<String, ReadBackedPileupImpl>();
+
+            // TODO: How can you determine here whether the current pileup has been downsampled?
             boolean hasBeenSampled = false;
+
             for (final String sample : samples) {
                 final Iterator<SAMRecordState> iterator = readStates.iterator(sample);
                 final List<PileupElement> pile = new ArrayList<PileupElement>(readStates.size(sample));
-                hasBeenSampled |= location.getStart() <= readStates.getDownsamplingExtent(sample);
 
                 int size = 0;                                                           // number of elements in this sample's pileup
                 int nDeletions = 0;                                                     // number of deletions in this sample's pileup
@@ -398,34 +409,20 @@ public class LocusIteratorByState extends LocusIterator {
         throw new UnsupportedOperationException("Can not remove records from a SAM file via an iterator!");
     }
 
-    private class ReadStateManager {
+    protected class ReadStateManager {
         private final PeekableIterator<SAMRecord> iterator;
-        private final DownsamplingMethod downsamplingMethod;
         private final SamplePartitioner samplePartitioner;
         private final Map<String, PerSampleReadStateManager> readStatesBySample = new HashMap<String, PerSampleReadStateManager>();
-        private final int targetCoverage;
         private int totalReadStates = 0;
 
-        public ReadStateManager(Iterator<SAMRecord> source, DownsamplingMethod downsamplingMethod) {
+        public ReadStateManager(Iterator<SAMRecord> source) {
             this.iterator = new PeekableIterator<SAMRecord>(source);
-            this.downsamplingMethod = downsamplingMethod.type != null ? downsamplingMethod : DownsamplingMethod.NONE;
-            switch (this.downsamplingMethod.type) {
-                case BY_SAMPLE:
-                    if (downsamplingMethod.toCoverage == null)
-                        throw new UserException.BadArgumentValue("dcov", "Downsampling coverage (-dcov) must be specified when downsampling by sample");
-                    this.targetCoverage = downsamplingMethod.toCoverage;
-                    break;
-                default:
-                    this.targetCoverage = Integer.MAX_VALUE;
-            }
 
-            Map<String, ReadSelector> readSelectors = new HashMap<String, ReadSelector>();
             for (final String sample : samples) {
                 readStatesBySample.put(sample, new PerSampleReadStateManager());
-                readSelectors.put(sample, downsamplingMethod.type == DownsampleType.BY_SAMPLE ? new NRandomReadSelector(null, targetCoverage) : new AllReadsSelector());
             }
 
-            samplePartitioner = new SamplePartitioner(readSelectors);
+            samplePartitioner = new SamplePartitioner(performDownsampling);
         }
 
         /**
@@ -449,7 +446,6 @@ public class LocusIteratorByState extends LocusIterator {
 
                 public void remove() {
                     wrappedIterator.remove();
-                    totalReadStates--;
                 }
             };
         }
@@ -477,17 +473,6 @@ public class LocusIteratorByState extends LocusIterator {
             return readStatesBySample.get(sample).size();
         }
 
-        /**
-         * The extent of downsampling; basically, the furthest base out which has 'fallen
-         * victim' to the downsampler.
-         *
-         * @param sample Sample, downsampled independently.
-         * @return Integer stop of the furthest undownsampled region.
-         */
-        public int getDownsamplingExtent(final String sample) {
-            return readStatesBySample.get(sample).getDownsamplingExtent();
-        }
-
         public SAMRecordState getFirst() {
             for (final String sample : samples) {
                 PerSampleReadStateManager reads = readStatesBySample.get(sample);
@@ -520,61 +505,15 @@ public class LocusIteratorByState extends LocusIterator {
                     samplePartitioner.submitRead(iterator.next());
                 }
             }
-            samplePartitioner.complete();
+
+            samplePartitioner.doneSubmittingReads();
 
             for (final String sample : samples) {
-                ReadSelector aggregator = samplePartitioner.getSelectedReads(sample);
-
-                Collection<SAMRecord> newReads = new ArrayList<SAMRecord>(aggregator.getSelectedReads());
-
+                Collection<SAMRecord> newReads = samplePartitioner.getReadsForSample(sample);
                 PerSampleReadStateManager statesBySample = readStatesBySample.get(sample);
-                int numReads = statesBySample.size();
-                int downsamplingExtent = aggregator.getDownsamplingExtent();
-
-                if (numReads + newReads.size() <= targetCoverage || downsamplingMethod.type == DownsampleType.NONE) {
-                    long readLimit = aggregator.getNumReadsSeen();
-                    addReadsToSample(statesBySample, newReads, readLimit);
-                    statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
-                } else {
-                    int[] counts = statesBySample.getCountsPerAlignmentStart();
-                    int[] updatedCounts = new int[counts.length];
-                    System.arraycopy(counts, 0, updatedCounts, 0, counts.length);
-
-                    boolean readPruned = true;
-                    while (numReads + newReads.size() > targetCoverage && readPruned) {
-                        readPruned = false;
-                        for (int alignmentStart = updatedCounts.length - 1; numReads + newReads.size() > targetCoverage && alignmentStart >= 0; alignmentStart--) {
-                            if (updatedCounts[alignmentStart] > 1) {
-                                updatedCounts[alignmentStart]--;
-                                numReads--;
-                                readPruned = true;
-                            }
-                        }
-                    }
-
-                    if (numReads == targetCoverage) {
-                        updatedCounts[0]--;
-                        numReads--;
-                    }
-
-                    BitSet toPurge = new BitSet(readStates.size());
-                    int readOffset = 0;
-
-                    for (int i = 0; i < updatedCounts.length; i++) {
-                        int n = counts[i];
-                        int k = updatedCounts[i];
-
-                        for (Integer purgedElement : MathUtils.sampleIndicesWithoutReplacement(n, n - k))
-                            toPurge.set(readOffset + purgedElement);
-
-                        readOffset += counts[i];
-                    }
-                    downsamplingExtent = Math.max(downsamplingExtent, statesBySample.purge(toPurge));
-
-                    addReadsToSample(statesBySample, newReads, targetCoverage - numReads);
-                    statesBySample.specifyNewDownsamplingExtent(downsamplingExtent);
-                }
+                addReadsToSample(statesBySample, newReads);
             }
+
             samplePartitioner.reset();
         }
 
@@ -583,380 +522,140 @@ public class LocusIteratorByState extends LocusIterator {
          *
          * @param readStates The list of read states to add this collection of reads.
          * @param reads      Reads to add.  Selected reads will be pulled from this source.
-         * @param maxReads   Maximum number of reads to add.
          */
-        private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads, final long maxReads) {
+        private void addReadsToSample(final PerSampleReadStateManager readStates, final Collection<SAMRecord> reads) {
             if (reads.isEmpty())
                 return;
 
             Collection<SAMRecordState> newReadStates = new LinkedList<SAMRecordState>();
-            int readCount = 0;
+
             for (SAMRecord read : reads) {
-                if (readCount < maxReads) {
-                    SAMRecordState state = new SAMRecordState(read);
-                    state.stepForwardOnGenome();
-                    newReadStates.add(state);
-                    readCount++;
-                }
+                SAMRecordState state = new SAMRecordState(read);
+                state.stepForwardOnGenome();
+                newReadStates.add(state);
             }
+
             readStates.addStatesAtNextAlignmentStart(newReadStates);
         }
 
-        private class PerSampleReadStateManager implements Iterable<SAMRecordState> {
-            private final Queue<SAMRecordState> readStates = new LinkedList<SAMRecordState>();
-            private final Deque<Counter> readStateCounter = new LinkedList<Counter>();
-            private int downsamplingExtent = 0;
+        protected class PerSampleReadStateManager implements Iterable<SAMRecordState> {
+            private List<LinkedList<SAMRecordState>> readStatesByAlignmentStart = new LinkedList<LinkedList<SAMRecordState>>();
+            private int thisSampleReadStates = 0;
+            private Downsampler<LinkedList<SAMRecordState>> levelingDownsampler =
+                      performDownsampling ?
+                      new LevelingDownsampler<LinkedList<SAMRecordState>, SAMRecordState>(readInfo.getDownsamplingMethod().toCoverage) :
+                      null;
 
             public void addStatesAtNextAlignmentStart(Collection<SAMRecordState> states) {
-                readStates.addAll(states);
-                readStateCounter.add(new Counter(states.size()));
+                if ( states.isEmpty() ) {
+                    return;
+                }
+
+                readStatesByAlignmentStart.add(new LinkedList<SAMRecordState>(states));
+                thisSampleReadStates += states.size();
                 totalReadStates += states.size();
+
+                if ( levelingDownsampler != null ) {
+                    levelingDownsampler.submit(readStatesByAlignmentStart);
+                    levelingDownsampler.signalEndOfInput();
+
+                    thisSampleReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
+                    totalReadStates -= levelingDownsampler.getNumberOfDiscardedItems();
+
+                    // use returned List directly rather than make a copy, for efficiency's sake
+                    readStatesByAlignmentStart = levelingDownsampler.consumeFinalizedItems();
+                    levelingDownsampler.reset();
+                }
             }
 
             public boolean isEmpty() {
-                return readStates.isEmpty();
+                return readStatesByAlignmentStart.isEmpty();
             }
 
             public SAMRecordState peek() {
-                return readStates.peek();
+                return isEmpty() ? null : readStatesByAlignmentStart.get(0).peek();
             }
 
             public int size() {
-                return readStates.size();
-            }
-
-            public void specifyNewDownsamplingExtent(int downsamplingExtent) {
-                this.downsamplingExtent = Math.max(this.downsamplingExtent, downsamplingExtent);
-            }
-
-            public int getDownsamplingExtent() {
-                return downsamplingExtent;
-            }
-
-            public int[] getCountsPerAlignmentStart() {
-                int[] counts = new int[readStateCounter.size()];
-                int index = 0;
-                for (Counter counter : readStateCounter)
-                    counts[index++] = counter.getCount();
-                return counts;
+                return thisSampleReadStates;
             }
 
             public Iterator<SAMRecordState> iterator() {
                 return new Iterator<SAMRecordState>() {
-                    private Iterator<SAMRecordState> wrappedIterator = readStates.iterator();
+                    private Iterator<LinkedList<SAMRecordState>> alignmentStartIterator = readStatesByAlignmentStart.iterator();
+                    private LinkedList<SAMRecordState> currentPositionReadStates = null;
+                    private Iterator<SAMRecordState> currentPositionReadStatesIterator = null;
 
                     public boolean hasNext() {
-                        return wrappedIterator.hasNext();
+                        return  alignmentStartIterator.hasNext() ||
+                                (currentPositionReadStatesIterator != null && currentPositionReadStatesIterator.hasNext());
                     }
 
                     public SAMRecordState next() {
-                        return wrappedIterator.next();
+                        if ( currentPositionReadStatesIterator == null || ! currentPositionReadStatesIterator.hasNext() ) {
+                            currentPositionReadStates = alignmentStartIterator.next();
+                            currentPositionReadStatesIterator = currentPositionReadStates.iterator();
+                        }
+
+                        return currentPositionReadStatesIterator.next();
                     }
 
                     public void remove() {
-                        wrappedIterator.remove();
-                        Counter counter = readStateCounter.peek();
-                        counter.decrement();
-                        if (counter.getCount() == 0)
-                            readStateCounter.remove();
+                        currentPositionReadStatesIterator.remove();
+                        thisSampleReadStates--;
+                        totalReadStates--;
+
+                        if ( currentPositionReadStates.isEmpty() ) {
+                            alignmentStartIterator.remove();
+                        }
                     }
                 };
             }
+        }
+    }
 
-            /**
-             * Purge the given elements from the bitset.  If an element in the bitset is true, purge
-             * the corresponding read state.
-             *
-             * @param elements bits from the set to purge.
-             * @return the extent of the final downsampled read.
-             */
-            public int purge(final BitSet elements) {
-                int downsamplingExtent = 0;
+    /**
+     * Divides reads by sample and (if requested) does a preliminary downsampling pass with a ReservoirDownsampler.
+     *
+     * Note: stores reads by sample ID string, not by sample object
+     */
+    private class SamplePartitioner {
+        private Map<String, Downsampler<SAMRecord>> readsBySample;
 
-                if (elements.isEmpty() || readStates.isEmpty()) return downsamplingExtent;
+        public SamplePartitioner( boolean downsampleReads ) {
+            readsBySample = new HashMap<String, Downsampler<SAMRecord>>();
 
-                Iterator<SAMRecordState> readStateIterator = readStates.iterator();
+            for ( String sample : samples ) {
+                readsBySample.put(sample,
+                                  downsampleReads ? new ReservoirDownsampler<SAMRecord>(readInfo.getDownsamplingMethod().toCoverage) :
+                                                    new PassThroughDownsampler<SAMRecord>());
+            }
+        }
 
-                Iterator<Counter> counterIterator = readStateCounter.iterator();
-                Counter currentCounter = counterIterator.next();
+        public void submitRead(SAMRecord read) {
+            String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
+            if (readsBySample.containsKey(sampleName))
+                readsBySample.get(sampleName).submit(read);
+        }
 
-                int readIndex = 0;
-                long alignmentStartCounter = currentCounter.getCount();
+        public void doneSubmittingReads() {
+            for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
+                perSampleReads.getValue().signalEndOfInput();
+            }
+        }
 
-                int toPurge = elements.nextSetBit(0);
-                int removedCount = 0;
+        public Collection<SAMRecord> getReadsForSample(String sampleName) {
+            if ( ! readsBySample.containsKey(sampleName) )
+                throw new NoSuchElementException("Sample name not found");
 
-                while (readStateIterator.hasNext() && toPurge >= 0) {
-                    SAMRecordState state = readStateIterator.next();
-                    downsamplingExtent = Math.max(downsamplingExtent, state.getRead().getAlignmentEnd());
+            return readsBySample.get(sampleName).consumeFinalizedItems();
+        }
 
-                    if (readIndex == toPurge) {
-                        readStateIterator.remove();
-                        currentCounter.decrement();
-                        if (currentCounter.getCount() == 0)
-                            counterIterator.remove();
-                        removedCount++;
-                        toPurge = elements.nextSetBit(toPurge + 1);
-                    }
-
-                    readIndex++;
-                    alignmentStartCounter--;
-                    if (alignmentStartCounter == 0 && counterIterator.hasNext()) {
-                        currentCounter = counterIterator.next();
-                        alignmentStartCounter = currentCounter.getCount();
-                    }
-                }
-
-                totalReadStates -= removedCount;
-
-                return downsamplingExtent;
+        public void reset() {
+            for ( Map.Entry<String, Downsampler<SAMRecord>> perSampleReads : readsBySample.entrySet() ) {
+                perSampleReads.getValue().clear();
+                perSampleReads.getValue().reset();
             }
         }
     }
-
-    /**
-     * Note: assuming that, whenever we downsample, we downsample to an integer capacity.
-     */
-    static private class Counter {
-        private int count;
-
-        public Counter(int count) {
-            this.count = count;
-        }
-
-        public int getCount() {
-            return count;
-        }
-
-        public void decrement() {
-            count--;
-        }
-    }
-}
-
-/**
- * Selects reads passed to it based on a criteria decided through inheritance.
- * TODO: This is a temporary abstraction until we can get rid of this downsampling implementation and the mrl option.  Get rid of this.
- */
-interface ReadSelector {
-    /**
-     * All previous selectors in the chain have allowed this read.  Submit it to this selector for consideration.
-     *
-     * @param read the read to evaluate.
-     */
-    public void submitRead(SAMRecord read);
-
-    /**
-     * A previous selector has deemed this read unfit.  Notify this selector so that this selector's counts are valid.
-     *
-     * @param read the read previously rejected.
-     */
-    public void notifyReadRejected(SAMRecord read);
-
-    /**
-     * Signal the selector that read additions are complete.
-     */
-    public void complete();
-
-    /**
-     * Retrieve the number of reads seen by this selector so far.
-     *
-     * @return number of reads seen.
-     */
-    public long getNumReadsSeen();
-
-    /**
-     * Return the number of reads accepted by this selector so far.
-     *
-     * @return number of reads selected.
-     */
-    public long getNumReadsSelected();
-
-    /**
-     * Gets the locus at which the last of the downsampled reads selected by this selector ends.  The value returned will be the
-     * last aligned position from this selection to which a downsampled read aligns -- in other words, if a read is thrown out at
-     * position 3 whose cigar string is 76M, the value of this parameter will be 78.
-     *
-     * @return If any read has been downsampled, this will return the last aligned base of the longest alignment.  Else, 0.
-     */
-    public int getDownsamplingExtent();
-
-    /**
-     * Get the reads selected by this selector.
-     *
-     * @return collection of reads selected by this selector.
-     */
-    public Collection<SAMRecord> getSelectedReads();
-
-    /**
-     * Reset this collection to its pre-gathered state.
-     */
-    public void reset();
-}
-
-/**
- * Select every read passed in.
- */
-class AllReadsSelector implements ReadSelector {
-    private Collection<SAMRecord> reads = new LinkedList<SAMRecord>();
-    private long readsSeen = 0;
-    private int downsamplingExtent = 0;
-
-    public void submitRead(SAMRecord read) {
-        reads.add(read);
-        readsSeen++;
-    }
-
-    public void notifyReadRejected(SAMRecord read) {
-        readsSeen++;
-        downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
-    }
-
-    public void complete() {
-        // NO-OP.
-    }
-
-    public long getNumReadsSeen() {
-        return readsSeen;
-    }
-
-    public long getNumReadsSelected() {
-        return readsSeen;
-    }
-
-    public int getDownsamplingExtent() {
-        return downsamplingExtent;
-    }
-
-    public Collection<SAMRecord> getSelectedReads() {
-        return reads;
-    }
-
-    public void reset() {
-        reads.clear();
-        readsSeen = 0;
-        downsamplingExtent = 0;
-    }
-}
-
-
-/**
- * Select N reads randomly from the input stream.
- */
-class NRandomReadSelector implements ReadSelector {
-    private final ReservoirDownsampler<SAMRecord> reservoir;
-    private final ReadSelector chainedSelector;
-    private long readsSeen = 0;
-    private int downsamplingExtent = 0;
-
-    public NRandomReadSelector(ReadSelector chainedSelector, long readLimit) {
-        this.reservoir = new ReservoirDownsampler<SAMRecord>((int) readLimit);
-        this.chainedSelector = chainedSelector;
-    }
-
-    public void submitRead(SAMRecord read) {
-        SAMRecord displaced = reservoir.add(read);
-        if (displaced != null && chainedSelector != null) {
-            chainedSelector.notifyReadRejected(read);
-            downsamplingExtent = Math.max(downsamplingExtent, read.getAlignmentEnd());
-        }
-        readsSeen++;
-    }
-
-    public void notifyReadRejected(SAMRecord read) {
-        readsSeen++;
-    }
-
-    public void complete() {
-        for (SAMRecord read : reservoir.getDownsampledContents())
-            chainedSelector.submitRead(read);
-        if (chainedSelector != null)
-            chainedSelector.complete();
-    }
-
-
-    public long getNumReadsSeen() {
-        return readsSeen;
-    }
-
-    public long getNumReadsSelected() {
-        return reservoir.size();
-    }
-
-    public int getDownsamplingExtent() {
-        return downsamplingExtent;
-    }
-
-    public Collection<SAMRecord> getSelectedReads() {
-        return reservoir.getDownsampledContents();
-    }
-
-    public void reset() {
-        reservoir.clear();
-        downsamplingExtent = 0;
-        if (chainedSelector != null)
-            chainedSelector.reset();
-    }
-}
-
-/**
- * Note: stores reads by sample ID string, not by sample object
- */
-class SamplePartitioner implements ReadSelector {
-    private final Map<String, ReadSelector> readsBySample;
-    private long readsSeen = 0;
-
-    public SamplePartitioner(Map<String, ReadSelector> readSelectors) {
-        readsBySample = readSelectors;
-    }
-
-    public void submitRead(SAMRecord read) {
-        String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
-        if (readsBySample.containsKey(sampleName))
-            readsBySample.get(sampleName).submitRead(read);
-        readsSeen++;
-    }
-
-    public void notifyReadRejected(SAMRecord read) {
-        String sampleName = read.getReadGroup() != null ? read.getReadGroup().getSample() : null;
-        if (readsBySample.containsKey(sampleName))
-            readsBySample.get(sampleName).notifyReadRejected(read);
-        readsSeen++;
-    }
-
-    public void complete() {
-        // NO-OP.
-    }
-
-    public long getNumReadsSeen() {
-        return readsSeen;
-    }
-
-    public long getNumReadsSelected() {
-        return readsSeen;
-    }
-
-    public int getDownsamplingExtent() {
-        int downsamplingExtent = 0;
-        for (ReadSelector storage : readsBySample.values())
-            downsamplingExtent = Math.max(downsamplingExtent, storage.getDownsamplingExtent());
-        return downsamplingExtent;
-    }
-
-    public Collection<SAMRecord> getSelectedReads() {
-        throw new UnsupportedOperationException("Cannot directly get selected reads from a read partitioner.");
-    }
-
-    public ReadSelector getSelectedReads(String sampleName) {
-        if (!readsBySample.containsKey(sampleName))
-            throw new NoSuchElementException("Sample name not found");
-        return readsBySample.get(sampleName);
-    }
-
-    public void reset() {
-        for (ReadSelector storage : readsBySample.values())
-            storage.reset();
-        readsSeen = 0;
-    }
-
-}
+}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java b/public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java
similarity index 94%
rename from public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java
rename to public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java
index a758df431..ba863310d 100644
--- a/public/java/src/org/broadinstitute/sting/utils/ReservoirDownsampler.java
+++ b/public/java/src/org/broadinstitute/sting/utils/LegacyReservoirDownsampler.java
@@ -8,6 +8,8 @@ import java.util.Collection;
 import java.util.Iterator;
 
 /**
+ * THIS IMPLEMENTATION IS BROKEN AND WILL BE REMOVED ONCE THE DOWNSAMPLING ENGINE FORK COLLAPSES
+ *
  * Randomly downsample from a stream of elements.  This algorithm is a direct,
  * naive implementation of reservoir downsampling as described in "Random Downsampling
  * with a Reservoir" (Vitter 1985).  At time of writing, this paper is located here:
@@ -16,7 +18,7 @@ import java.util.Iterator;
  * @author mhanna
  * @version 0.1
  */
-public class ReservoirDownsampler<T> {
+public class LegacyReservoirDownsampler<T> {
     /**
      * The reservoir of elements tracked by this downsampler.
      */
@@ -31,7 +33,7 @@ public class ReservoirDownsampler<T> {
      * Create a new downsampler with the given source iterator and given comparator.
      * @param maxElements What is the maximum number of reads that can be returned in any call of this
      */
-    public ReservoirDownsampler(final int maxElements) {
+    public LegacyReservoirDownsampler(final int maxElements) {
         if(maxElements < 0)
             throw new ReviewedStingException("Unable to work with an negative size collection of elements");
         this.reservoir = new ArrayList<T>(maxElements);
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
index 61c1c51b4..8186f69cf 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/DownsamplerBenchmark.java
@@ -32,11 +32,10 @@ import net.sf.samtools.SAMRecord;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.ReadProperties;
-import org.broadinstitute.sting.gatk.arguments.GATKArgumentCollection;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
-import org.broadinstitute.sting.gatk.iterators.LocusIteratorByState;
+import org.broadinstitute.sting.gatk.iterators.LegacyLocusIteratorByState;
 import org.broadinstitute.sting.gatk.iterators.ReadTransformer;
 import org.broadinstitute.sting.gatk.walkers.qc.CountLoci;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@@ -85,7 +84,7 @@ public class DownsamplerBenchmark extends ReadProcessingBenchmark {
             GenomeLocParser genomeLocParser = new GenomeLocParser(reader.getFileHeader().getSequenceDictionary());
             // Filter unmapped reads.  TODO: is this always strictly necessary?  Who in the GATK normally filters these out?
             Iterator<SAMRecord> readIterator = new FilteringIterator(reader.iterator(),new UnmappedReadFilter());
-            LocusIteratorByState locusIteratorByState = new LocusIteratorByState(readIterator,readProperties,genomeLocParser, LocusIteratorByState.sampleListForSAMWithoutReadGroups());
+            LegacyLocusIteratorByState locusIteratorByState = new LegacyLocusIteratorByState(readIterator,readProperties,genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
             while(locusIteratorByState.hasNext()) {
                 locusIteratorByState.next().getLocation();
             }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java
similarity index 97%
rename from public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java
rename to public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java
index 0807f36dc..73664141f 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ExperimentalReadShardBalancerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancerUnitTest.java
@@ -45,10 +45,10 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 
-public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
+public class ReadShardBalancerUnitTest extends BaseTest {
 
     /**
-     * Tests to ensure that ExperimentalReadShardBalancer works as expected and does not place shard boundaries
+     * Tests to ensure that ReadShardBalancer works as expected and does not place shard boundaries
      * at inappropriate places, such as within an alignment start position
      */
     private static class ExperimentalReadShardBalancerTest extends TestDataProvider {
@@ -74,7 +74,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
             this.stackSize = stackSize;
             this.numUnmappedReads = numUnmappedReads;
 
-            this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, true);
+            this.downsamplingMethod = new DownsamplingMethod(DownsampleType.BY_SAMPLE, downsamplingTargetCoverage, null, false);
             this.expectedReadCount = Math.min(stackSize, downsamplingTargetCoverage) * numStacksPerContig * numContigs + numUnmappedReads;
 
             setName(String.format("%s: numContigs=%d numStacksPerContig=%d stackSize=%d numUnmappedReads=%d downsamplingTargetCoverage=%d",
@@ -96,7 +96,7 @@ public class ExperimentalReadShardBalancerUnitTest extends BaseTest {
                                                          new ArrayList<ReadFilter>(),
                                                          false);
 
-            Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ExperimentalReadShardBalancer());
+            Iterable<Shard> shardIterator = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
 
             SAMRecord readAtEndOfLastShard = null;
             int totalReadsSeen = 0;
diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByStateUnitTest.java
similarity index 50%
rename from public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java
rename to public/java/test/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByStateUnitTest.java
index a49a602c6..44d182661 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateExperimentalUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LegacyLocusIteratorByStateUnitTest.java
@@ -7,10 +7,8 @@ import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -21,14 +19,17 @@ import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
-import java.util.*;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;
 
 /**
- * testing of the experimental version of LocusIteratorByState
+ * testing of the LEGACY version of LocusIteratorByState
  */
-public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
+public class LegacyLocusIteratorByStateUnitTest extends BaseTest {
     private static SAMFileHeader header;
-    private LocusIteratorByStateExperimental li;
+    private LegacyLocusIteratorByState li;
     private GenomeLocParser genomeLocParser;
 
     @BeforeClass
@@ -37,8 +38,8 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
         genomeLocParser = new GenomeLocParser(header.getSequenceDictionary());
     }
 
-    private LocusIteratorByStateExperimental makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
-        return new LocusIteratorByStateExperimental(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LocusIteratorByStateExperimental.sampleListForSAMWithoutReadGroups());
+    private LegacyLocusIteratorByState makeLTBS(List<SAMRecord> reads, ReadProperties readAttributes) {
+        return new LegacyLocusIteratorByState(new FakeCloseableIterator<SAMRecord>(reads.iterator()), readAttributes, genomeLocParser, LegacyLocusIteratorByState.sampleListForSAMWithoutReadGroups());
     }
 
     @Test
@@ -328,184 +329,14 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
     // End comprehensive LIBS/PileupElement tests //
     ////////////////////////////////////////////////
 
-
-    ///////////////////////////////////////
-    // Read State Manager Tests          //
-    ///////////////////////////////////////
-
-    private class PerSampleReadStateManagerTest extends TestDataProvider {
-        private List<Integer> readCountsPerAlignmentStart;
-        private List<SAMRecord> reads;
-        private List<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>> recordStatesByAlignmentStart;
-        private int removalInterval;
-
-        public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
-            super(PerSampleReadStateManagerTest.class);
-
-            this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
-            this.removalInterval = removalInterval;
-
-            reads = new ArrayList<SAMRecord>();
-            recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByStateExperimental.SAMRecordState>>();
-
-            setName(String.format("%s: readCountsPerAlignmentStart: %s  removalInterval: %d",
-                                  getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
-        }
-
-        public void run() {
-            LocusIteratorByStateExperimental libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
-            LocusIteratorByStateExperimental.ReadStateManager readStateManager =
-                    libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
-            LocusIteratorByStateExperimental.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
-                    readStateManager.new PerSampleReadStateManager();
-
-            makeReads();
-
-            for ( ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
-                perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
-            }
-
-            // read state manager should have the right number of reads
-            Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
-
-            Iterator<SAMRecord> originalReadsIterator = reads.iterator();
-            Iterator<LocusIteratorByStateExperimental.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
-            int recordStateCount = 0;
-            int numReadStatesRemoved = 0;
-
-            // Do a first-pass validation of the record state iteration by making sure we get back everything we
-            // put in, in the same order, doing any requested removals of read states along the way
-            while ( recordStateIterator.hasNext() ) {
-                LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
-                recordStateCount++;
-                SAMRecord readFromPerSampleReadStateManager = readState.getRead();
-
-                Assert.assertTrue(originalReadsIterator.hasNext());
-                SAMRecord originalRead = originalReadsIterator.next();
-
-                // The read we get back should be literally the same read in memory as we put in
-                Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
-
-                // If requested, remove a read state every removalInterval states
-                if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
-                    recordStateIterator.remove();
-                    numReadStatesRemoved++;
-                }
-            }
-
-            Assert.assertFalse(originalReadsIterator.hasNext());
-
-            // If we removed any read states, do a second pass through the read states to make sure the right
-            // states were removed
-            if ( numReadStatesRemoved > 0 ) {
-                Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
-
-                originalReadsIterator = reads.iterator();
-                recordStateIterator = perSampleReadStateManager.iterator();
-                int readCount = 0;
-                int readStateCount = 0;
-
-                // Match record states with the reads that should remain after removal
-                while ( recordStateIterator.hasNext() ) {
-                    LocusIteratorByStateExperimental.SAMRecordState readState = recordStateIterator.next();
-                    readStateCount++;
-                    SAMRecord readFromPerSampleReadStateManager = readState.getRead();
-
-                    Assert.assertTrue(originalReadsIterator.hasNext());
-
-                    SAMRecord originalRead = originalReadsIterator.next();
-                    readCount++;
-
-                    if ( readCount % removalInterval == 0 ) {
-                        originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
-                        readCount++;
-                    }
-
-                    // The read we get back should be literally the same read in memory as we put in (after accounting for removals)
-                    Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
-                }
-
-                Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
-            }
-
-            // Allow memory used by this test to be reclaimed
-            readCountsPerAlignmentStart = null;
-            reads = null;
-            recordStatesByAlignmentStart = null;
-        }
-
-        private void makeReads() {
-            int alignmentStart = 1;
-
-            for ( int readsThisStack : readCountsPerAlignmentStart ) {
-                ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
-                ArrayList<LocusIteratorByStateExperimental.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByStateExperimental.SAMRecordState>();
-
-                for ( SAMRecord read : stackReads ) {
-                    stackRecordStates.add(new LocusIteratorByStateExperimental.SAMRecordState(read));
-                }
-
-                reads.addAll(stackReads);
-                recordStatesByAlignmentStart.add(stackRecordStates);
-            }
-        }
-    }
-
-    @DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
-    public Object[][] createPerSampleReadStateManagerTests() {
-        for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
-                                                                     Arrays.asList(2),
-                                                                     Arrays.asList(10),
-                                                                     Arrays.asList(1, 1),
-                                                                     Arrays.asList(2, 2),
-                                                                     Arrays.asList(10, 10),
-                                                                     Arrays.asList(1, 10),
-                                                                     Arrays.asList(10, 1),
-                                                                     Arrays.asList(1, 1, 1),
-                                                                     Arrays.asList(2, 2, 2),
-                                                                     Arrays.asList(10, 10, 10),
-                                                                     Arrays.asList(1, 1, 1, 1, 1, 1),
-                                                                     Arrays.asList(10, 10, 10, 10, 10, 10),
-                                                                     Arrays.asList(1, 2, 10, 1, 2, 10)
-                                                                   ) ) {
-
-            for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
-                new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
-            }
-        }
-
-        return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
-    }
-
-    @Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
-    public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
-        logger.warn("Running test: " + test);
-
-        test.run();
-    }
-
-    ///////////////////////////////////////
-    // End Read State Manager Tests      //
-    ///////////////////////////////////////
-
-
-
-    ///////////////////////////////////////
-    // Helper methods / classes          //
-    ///////////////////////////////////////
-
     private static ReadProperties createTestReadProperties() {
-        return createTestReadProperties(null);
-    }
-
-    private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
         return new ReadProperties(
                 Collections.<SAMReaderID>emptyList(),
                 new SAMFileHeader(),
                 SAMFileHeader.SortOrder.coordinate,
                 false,
                 SAMFileReader.ValidationStringency.STRICT,
-                downsamplingMethod,
+                null,
                 new ValidationExclusion(),
                 Collections.<ReadFilter>emptyList(),
                 Collections.<ReadTransformer>emptyList(),
@@ -513,136 +344,137 @@ public class LocusIteratorByStateExperimentalUnitTest extends BaseTest {
                 (byte) -1
         );
     }
+}
 
-    private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
-        Iterator<T> iterator;
+class FakeCloseableIterator<T> implements CloseableIterator<T> {
+    Iterator<T> iterator;
 
-        public FakeCloseableIterator(Iterator<T> it) {
-            iterator = it;
-        }
-
-        @Override
-        public void close() {}
-
-        @Override
-        public boolean hasNext() {
-            return iterator.hasNext();
-        }
-
-        @Override
-        public T next() {
-            return iterator.next();
-        }
-
-        @Override
-        public void remove() {
-            throw new UnsupportedOperationException("Don't remove!");
-        }
+    public FakeCloseableIterator(Iterator<T> it) {
+        iterator = it;
     }
 
-    private static final class LIBS_position {
+    @Override
+    public void close() {}
 
-        SAMRecord read;
+    @Override
+    public boolean hasNext() {
+        return iterator.hasNext();
+    }
 
-        final int numOperators;
-        int currentOperatorIndex = 0;
-        int currentPositionOnOperator = 0;
-        int currentReadOffset = 0;
+    @Override
+    public T next() {
+        return iterator.next();
+    }
 
-        boolean isBeforeDeletionStart = false;
-        boolean isBeforeDeletedBase = false;
-        boolean isAfterDeletionEnd = false;
-        boolean isAfterDeletedBase = false;
-        boolean isBeforeInsertion = false;
-        boolean isAfterInsertion = false;
-        boolean isNextToSoftClip = false;
-
-        boolean sawMop = false;
-
-        public LIBS_position(final SAMRecord read) {
-            this.read = read;
-            numOperators = read.getCigar().numCigarElements();
-        }
-
-        public int getCurrentReadOffset() {
-            return Math.max(0, currentReadOffset - 1);
-        }
-
-        /**
-         * Steps forward on the genome.  Returns false when done reading the read, true otherwise.
-         */
-        public boolean stepForwardOnGenome() {
-            if ( currentOperatorIndex == numOperators )
-                return false;
-
-            CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
-            if ( currentPositionOnOperator >= curElement.getLength() ) {
-                if ( ++currentOperatorIndex == numOperators )
-                    return false;
-
-                curElement = read.getCigar().getCigarElement(currentOperatorIndex);
-                currentPositionOnOperator = 0;
-            }
-
-            switch ( curElement.getOperator() ) {
-                case I: // insertion w.r.t. the reference
-                    if ( !sawMop )
-                        break;
-                case S: // soft clip
-                    currentReadOffset += curElement.getLength();
-                case H: // hard clip
-                case P: // padding
-                    currentOperatorIndex++;
-                    return stepForwardOnGenome();
-
-                case D: // deletion w.r.t. the reference
-                case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
-                    currentPositionOnOperator++;
-                    break;
-
-                case M:
-                case EQ:
-                case X:
-                    sawMop = true;
-                    currentReadOffset++;
-                    currentPositionOnOperator++;
-                    break;
-                default:
-                    throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
-            }
-
-            final boolean isFirstOp = currentOperatorIndex == 0;
-            final boolean isLastOp = currentOperatorIndex == numOperators - 1;
-            final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
-            final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
-
-            isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
-            isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
-            isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
-            isAfterDeletedBase  = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
-            isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
-                    || (!sawMop && curElement.getOperator() == CigarOperator.I);
-            isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
-            isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
-                    || isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
-
-            return true;
-        }
-
-        private static boolean isBeforeOp(final Cigar cigar,
-                                          final int currentOperatorIndex,
-                                          final CigarOperator op,
-                                          final boolean isLastOp,
-                                          final boolean isLastBaseOfOp) {
-            return  !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
-        }
-
-        private static boolean isAfterOp(final Cigar cigar,
-                                         final int currentOperatorIndex,
-                                         final CigarOperator op,
-                                         final boolean isFirstOp,
-                                         final boolean isFirstBaseOfOp) {
-            return  !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
-        }
+    @Override
+    public void remove() {
+        throw new UnsupportedOperationException("Don't remove!");
+    }
+}
+
+
+final class LIBS_position {
+
+    SAMRecord read;
+
+    final int numOperators;
+    int currentOperatorIndex = 0;
+    int currentPositionOnOperator = 0;
+    int currentReadOffset = 0;
+
+    boolean isBeforeDeletionStart = false;
+    boolean isBeforeDeletedBase = false;
+    boolean isAfterDeletionEnd = false;
+    boolean isAfterDeletedBase = false;
+    boolean isBeforeInsertion = false;
+    boolean isAfterInsertion = false;
+    boolean isNextToSoftClip = false;
+
+    boolean sawMop = false;
+
+    public LIBS_position(final SAMRecord read) {
+        this.read = read;
+        numOperators = read.getCigar().numCigarElements();
+    }
+
+    public int getCurrentReadOffset() {
+        return Math.max(0, currentReadOffset - 1);
+    }
+
+    /**
+     * Steps forward on the genome.  Returns false when done reading the read, true otherwise.
+     */
+    public boolean stepForwardOnGenome() {
+        if ( currentOperatorIndex == numOperators )
+            return false;
+
+        CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
+        if ( currentPositionOnOperator >= curElement.getLength() ) {
+            if ( ++currentOperatorIndex == numOperators )
+                return false;
+
+            curElement = read.getCigar().getCigarElement(currentOperatorIndex);
+            currentPositionOnOperator = 0;
+        }
+
+        switch ( curElement.getOperator() ) {
+            case I: // insertion w.r.t. the reference
+                if ( !sawMop )
+                    break;
+            case S: // soft clip
+                currentReadOffset += curElement.getLength();
+            case H: // hard clip
+            case P: // padding
+                currentOperatorIndex++;
+                return stepForwardOnGenome();
+
+            case D: // deletion w.r.t. the reference
+            case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
+                currentPositionOnOperator++;
+                break;
+
+            case M:
+            case EQ:
+            case X:
+                sawMop = true;
+                currentReadOffset++;
+                currentPositionOnOperator++;
+                break;
+            default:
+                throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
+        }
+
+        final boolean isFirstOp = currentOperatorIndex == 0;
+        final boolean isLastOp = currentOperatorIndex == numOperators - 1;
+        final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
+        final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
+
+        isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
+        isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
+        isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
+        isAfterDeletedBase  = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
+        isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
+                || (!sawMop && curElement.getOperator() == CigarOperator.I);
+        isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
+        isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
+                || isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
+
+        return true;
+    }
+
+    private static boolean isBeforeOp(final Cigar cigar,
+                                      final int currentOperatorIndex,
+                                      final CigarOperator op,
+                                      final boolean isLastOp,
+                                      final boolean isLastBaseOfOp) {
+        return  !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
+    }
+
+    private static boolean isAfterOp(final Cigar cigar,
+                                     final int currentOperatorIndex,
+                                     final CigarOperator op,
+                                     final boolean isFirstOp,
+                                     final boolean isFirstBaseOfOp) {
+        return  !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
index 83913fa76..4e4126ad4 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/iterators/LocusIteratorByStateUnitTest.java
@@ -7,8 +7,10 @@ import org.broadinstitute.sting.gatk.ReadProperties;
 import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.downsampling.DownsamplingMethod;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
@@ -19,13 +21,10 @@ import org.testng.annotations.BeforeClass;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Iterator;
-import java.util.List;
+import java.util.*;
 
 /**
- * testing of the LocusIteratorByState
+ * testing of the new (non-legacy) version of LocusIteratorByState
  */
 public class LocusIteratorByStateUnitTest extends BaseTest {
     private static SAMFileHeader header;
@@ -329,14 +328,184 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
     // End comprehensive LIBS/PileupElement tests //
     ////////////////////////////////////////////////
 
+
+    ///////////////////////////////////////
+    // Read State Manager Tests          //
+    ///////////////////////////////////////
+
+    private class PerSampleReadStateManagerTest extends TestDataProvider {
+        private List<Integer> readCountsPerAlignmentStart;
+        private List<SAMRecord> reads;
+        private List<ArrayList<LocusIteratorByState.SAMRecordState>> recordStatesByAlignmentStart;
+        private int removalInterval;
+
+        public PerSampleReadStateManagerTest( List<Integer> readCountsPerAlignmentStart, int removalInterval ) {
+            super(PerSampleReadStateManagerTest.class);
+
+            this.readCountsPerAlignmentStart = readCountsPerAlignmentStart;
+            this.removalInterval = removalInterval;
+
+            reads = new ArrayList<SAMRecord>();
+            recordStatesByAlignmentStart = new ArrayList<ArrayList<LocusIteratorByState.SAMRecordState>>();
+
+            setName(String.format("%s: readCountsPerAlignmentStart: %s  removalInterval: %d",
+                                  getClass().getSimpleName(), readCountsPerAlignmentStart, removalInterval));
+        }
+
+        public void run() {
+            LocusIteratorByState libs = makeLTBS(new ArrayList<SAMRecord>(), createTestReadProperties());
+            LocusIteratorByState.ReadStateManager readStateManager =
+                    libs.new ReadStateManager(new ArrayList<SAMRecord>().iterator());
+            LocusIteratorByState.ReadStateManager.PerSampleReadStateManager perSampleReadStateManager =
+                    readStateManager.new PerSampleReadStateManager();
+
+            makeReads();
+
+            for ( ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates : recordStatesByAlignmentStart ) {
+                perSampleReadStateManager.addStatesAtNextAlignmentStart(stackRecordStates);
+            }
+
+            // read state manager should have the right number of reads
+            Assert.assertEquals(reads.size(), perSampleReadStateManager.size());
+
+            Iterator<SAMRecord> originalReadsIterator = reads.iterator();
+            Iterator<LocusIteratorByState.SAMRecordState> recordStateIterator = perSampleReadStateManager.iterator();
+            int recordStateCount = 0;
+            int numReadStatesRemoved = 0;
+
+            // Do a first-pass validation of the record state iteration by making sure we get back everything we
+            // put in, in the same order, doing any requested removals of read states along the way
+            while ( recordStateIterator.hasNext() ) {
+                LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
+                recordStateCount++;
+                SAMRecord readFromPerSampleReadStateManager = readState.getRead();
+
+                Assert.assertTrue(originalReadsIterator.hasNext());
+                SAMRecord originalRead = originalReadsIterator.next();
+
+                // The read we get back should be literally the same read in memory as we put in
+                Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
+
+                // If requested, remove a read state every removalInterval states
+                if ( removalInterval > 0 && recordStateCount % removalInterval == 0 ) {
+                    recordStateIterator.remove();
+                    numReadStatesRemoved++;
+                }
+            }
+
+            Assert.assertFalse(originalReadsIterator.hasNext());
+
+            // If we removed any read states, do a second pass through the read states to make sure the right
+            // states were removed
+            if ( numReadStatesRemoved > 0 ) {
+                Assert.assertEquals(perSampleReadStateManager.size(), reads.size() - numReadStatesRemoved);
+
+                originalReadsIterator = reads.iterator();
+                recordStateIterator = perSampleReadStateManager.iterator();
+                int readCount = 0;
+                int readStateCount = 0;
+
+                // Match record states with the reads that should remain after removal
+                while ( recordStateIterator.hasNext() ) {
+                    LocusIteratorByState.SAMRecordState readState = recordStateIterator.next();
+                    readStateCount++;
+                    SAMRecord readFromPerSampleReadStateManager = readState.getRead();
+
+                    Assert.assertTrue(originalReadsIterator.hasNext());
+
+                    SAMRecord originalRead = originalReadsIterator.next();
+                    readCount++;
+
+                    if ( readCount % removalInterval == 0 ) {
+                        originalRead = originalReadsIterator.next(); // advance to next read, since the previous one should have been discarded
+                        readCount++;
+                    }
+
+                    // The read we get back should be literally the same read in memory as we put in (after accounting for removals)
+                    Assert.assertTrue(originalRead == readFromPerSampleReadStateManager);
+                }
+
+                Assert.assertEquals(readStateCount, reads.size() - numReadStatesRemoved);
+            }
+
+            // Allow memory used by this test to be reclaimed
+            readCountsPerAlignmentStart = null;
+            reads = null;
+            recordStatesByAlignmentStart = null;
+        }
+
+        private void makeReads() {
+            int alignmentStart = 1;
+
+            for ( int readsThisStack : readCountsPerAlignmentStart ) {
+                ArrayList<SAMRecord> stackReads = new ArrayList<SAMRecord>(ArtificialSAMUtils.createStackOfIdenticalArtificialReads(readsThisStack, header, "foo", 0, alignmentStart, MathUtils.randomIntegerInRange(50, 100)));
+                ArrayList<LocusIteratorByState.SAMRecordState> stackRecordStates = new ArrayList<LocusIteratorByState.SAMRecordState>();
+
+                for ( SAMRecord read : stackReads ) {
+                    stackRecordStates.add(new LocusIteratorByState.SAMRecordState(read));
+                }
+
+                reads.addAll(stackReads);
+                recordStatesByAlignmentStart.add(stackRecordStates);
+            }
+        }
+    }
+
+    @DataProvider(name = "PerSampleReadStateManagerTestDataProvider")
+    public Object[][] createPerSampleReadStateManagerTests() {
+        for ( List<Integer> thisTestReadStateCounts : Arrays.asList( Arrays.asList(1),
+                                                                     Arrays.asList(2),
+                                                                     Arrays.asList(10),
+                                                                     Arrays.asList(1, 1),
+                                                                     Arrays.asList(2, 2),
+                                                                     Arrays.asList(10, 10),
+                                                                     Arrays.asList(1, 10),
+                                                                     Arrays.asList(10, 1),
+                                                                     Arrays.asList(1, 1, 1),
+                                                                     Arrays.asList(2, 2, 2),
+                                                                     Arrays.asList(10, 10, 10),
+                                                                     Arrays.asList(1, 1, 1, 1, 1, 1),
+                                                                     Arrays.asList(10, 10, 10, 10, 10, 10),
+                                                                     Arrays.asList(1, 2, 10, 1, 2, 10)
+                                                                   ) ) {
+
+            for ( int removalInterval : Arrays.asList(0, 2, 3) ) {
+                new PerSampleReadStateManagerTest(thisTestReadStateCounts, removalInterval);
+            }
+        }
+
+        return PerSampleReadStateManagerTest.getTests(PerSampleReadStateManagerTest.class);
+    }
+
+    @Test(dataProvider = "PerSampleReadStateManagerTestDataProvider")
+    public void runPerSampleReadStateManagerTest( PerSampleReadStateManagerTest test ) {
+        logger.warn("Running test: " + test);
+
+        test.run();
+    }
+
+    ///////////////////////////////////////
+    // End Read State Manager Tests      //
+    ///////////////////////////////////////
+
+
+
+    ///////////////////////////////////////
+    // Helper methods / classes          //
+    ///////////////////////////////////////
+
     private static ReadProperties createTestReadProperties() {
+        return createTestReadProperties(null);
+    }
+
+    private static ReadProperties createTestReadProperties( DownsamplingMethod downsamplingMethod ) {
         return new ReadProperties(
                 Collections.<SAMReaderID>emptyList(),
                 new SAMFileHeader(),
                 SAMFileHeader.SortOrder.coordinate,
                 false,
                 SAMFileReader.ValidationStringency.STRICT,
-                null,
+                downsamplingMethod,
                 new ValidationExclusion(),
                 Collections.<ReadFilter>emptyList(),
                 Collections.<ReadTransformer>emptyList(),
@@ -344,137 +513,136 @@ public class LocusIteratorByStateUnitTest extends BaseTest {
                 (byte) -1
         );
     }
-}
 
-class FakeCloseableIterator<T> implements CloseableIterator<T> {
-    Iterator<T> iterator;
+    private static class FakeCloseableIterator<T> implements CloseableIterator<T> {
+        Iterator<T> iterator;
 
-    public FakeCloseableIterator(Iterator<T> it) {
-        iterator = it;
+        public FakeCloseableIterator(Iterator<T> it) {
+            iterator = it;
+        }
+
+        @Override
+        public void close() {}
+
+        @Override
+        public boolean hasNext() {
+            return iterator.hasNext();
+        }
+
+        @Override
+        public T next() {
+            return iterator.next();
+        }
+
+        @Override
+        public void remove() {
+            throw new UnsupportedOperationException("Don't remove!");
+        }
     }
 
-    @Override
-    public void close() {}
+    private static final class LIBS_position {
 
-    @Override
-    public boolean hasNext() {
-        return iterator.hasNext();
-    }
+        SAMRecord read;
 
-    @Override
-    public T next() {
-        return iterator.next();
-    }
+        final int numOperators;
+        int currentOperatorIndex = 0;
+        int currentPositionOnOperator = 0;
+        int currentReadOffset = 0;
 
-    @Override
-    public void remove() {
-        throw new UnsupportedOperationException("Don't remove!");
-    }
-}
+        boolean isBeforeDeletionStart = false;
+        boolean isBeforeDeletedBase = false;
+        boolean isAfterDeletionEnd = false;
+        boolean isAfterDeletedBase = false;
+        boolean isBeforeInsertion = false;
+        boolean isAfterInsertion = false;
+        boolean isNextToSoftClip = false;
 
+        boolean sawMop = false;
 
-final class LIBS_position {
+        public LIBS_position(final SAMRecord read) {
+            this.read = read;
+            numOperators = read.getCigar().numCigarElements();
+        }
 
-    SAMRecord read;
+        public int getCurrentReadOffset() {
+            return Math.max(0, currentReadOffset - 1);
+        }
 
-    final int numOperators;
-    int currentOperatorIndex = 0;
-    int currentPositionOnOperator = 0;
-    int currentReadOffset = 0;
-
-    boolean isBeforeDeletionStart = false;
-    boolean isBeforeDeletedBase = false;
-    boolean isAfterDeletionEnd = false;
-    boolean isAfterDeletedBase = false;
-    boolean isBeforeInsertion = false;
-    boolean isAfterInsertion = false;
-    boolean isNextToSoftClip = false;
-
-    boolean sawMop = false;
-
-    public LIBS_position(final SAMRecord read) {
-        this.read = read;
-        numOperators = read.getCigar().numCigarElements();
-    }
-
-    public int getCurrentReadOffset() {
-        return Math.max(0, currentReadOffset - 1);
-    }
-
-    /**
-     * Steps forward on the genome.  Returns false when done reading the read, true otherwise.
-     */
-    public boolean stepForwardOnGenome() {
-        if ( currentOperatorIndex == numOperators )
-            return false;
-
-        CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
-        if ( currentPositionOnOperator >= curElement.getLength() ) {
-            if ( ++currentOperatorIndex == numOperators )
+        /**
+         * Steps forward on the genome.  Returns false when done reading the read, true otherwise.
+         */
+        public boolean stepForwardOnGenome() {
+            if ( currentOperatorIndex == numOperators )
                 return false;
 
-            curElement = read.getCigar().getCigarElement(currentOperatorIndex);
-            currentPositionOnOperator = 0;
-        }
+            CigarElement curElement = read.getCigar().getCigarElement(currentOperatorIndex);
+            if ( currentPositionOnOperator >= curElement.getLength() ) {
+                if ( ++currentOperatorIndex == numOperators )
+                    return false;
 
-        switch ( curElement.getOperator() ) {
-            case I: // insertion w.r.t. the reference
-                if ( !sawMop )
+                curElement = read.getCigar().getCigarElement(currentOperatorIndex);
+                currentPositionOnOperator = 0;
+            }
+
+            switch ( curElement.getOperator() ) {
+                case I: // insertion w.r.t. the reference
+                    if ( !sawMop )
+                        break;
+                case S: // soft clip
+                    currentReadOffset += curElement.getLength();
+                case H: // hard clip
+                case P: // padding
+                    currentOperatorIndex++;
+                    return stepForwardOnGenome();
+
+                case D: // deletion w.r.t. the reference
+                case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
+                    currentPositionOnOperator++;
                     break;
-            case S: // soft clip
-                currentReadOffset += curElement.getLength();
-            case H: // hard clip
-            case P: // padding
-                currentOperatorIndex++;
-                return stepForwardOnGenome();
 
-            case D: // deletion w.r.t. the reference
-            case N: // reference skip (looks and gets processed just like a "deletion", just different logical meaning)
-                currentPositionOnOperator++;
-                break;
+                case M:
+                case EQ:
+                case X:
+                    sawMop = true;
+                    currentReadOffset++;
+                    currentPositionOnOperator++;
+                    break;
+                default:
+                    throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
+            }
 
-            case M:
-            case EQ:
-            case X:
-                sawMop = true;
-                currentReadOffset++;
-                currentPositionOnOperator++;
-                break;
-            default:
-                throw new IllegalStateException("No support for cigar op: " + curElement.getOperator());
+            final boolean isFirstOp = currentOperatorIndex == 0;
+            final boolean isLastOp = currentOperatorIndex == numOperators - 1;
+            final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
+            final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
+
+            isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
+            isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
+            isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
+            isAfterDeletedBase  = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
+            isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
+                    || (!sawMop && curElement.getOperator() == CigarOperator.I);
+            isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
+            isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
+                    || isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
+
+            return true;
         }
 
-        final boolean isFirstOp = currentOperatorIndex == 0;
-        final boolean isLastOp = currentOperatorIndex == numOperators - 1;
-        final boolean isFirstBaseOfOp = currentPositionOnOperator == 1;
-        final boolean isLastBaseOfOp = currentPositionOnOperator == curElement.getLength();
+        private static boolean isBeforeOp(final Cigar cigar,
+                                          final int currentOperatorIndex,
+                                          final CigarOperator op,
+                                          final boolean isLastOp,
+                                          final boolean isLastBaseOfOp) {
+            return  !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
+        }
 
-        isBeforeDeletionStart = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isLastOp, isLastBaseOfOp);
-        isBeforeDeletedBase = isBeforeDeletionStart || (!isLastBaseOfOp && curElement.getOperator() == CigarOperator.D);
-        isAfterDeletionEnd = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.D, isFirstOp, isFirstBaseOfOp);
-        isAfterDeletedBase  = isAfterDeletionEnd || (!isFirstBaseOfOp && curElement.getOperator() == CigarOperator.D);
-        isBeforeInsertion = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isLastOp, isLastBaseOfOp)
-                || (!sawMop && curElement.getOperator() == CigarOperator.I);
-        isAfterInsertion = isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.I, isFirstOp, isFirstBaseOfOp);
-        isNextToSoftClip = isBeforeOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isLastOp, isLastBaseOfOp)
-                || isAfterOp(read.getCigar(), currentOperatorIndex, CigarOperator.S, isFirstOp, isFirstBaseOfOp);
-
-        return true;
-    }
-
-    private static boolean isBeforeOp(final Cigar cigar,
-                                      final int currentOperatorIndex,
-                                      final CigarOperator op,
-                                      final boolean isLastOp,
-                                      final boolean isLastBaseOfOp) {
-        return  !isLastOp && isLastBaseOfOp && cigar.getCigarElement(currentOperatorIndex+1).getOperator() == op;
-    }
-
-    private static boolean isAfterOp(final Cigar cigar,
-                                     final int currentOperatorIndex,
-                                     final CigarOperator op,
-                                     final boolean isFirstOp,
-                                     final boolean isFirstBaseOfOp) {
-        return  !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
+        private static boolean isAfterOp(final Cigar cigar,
+                                         final int currentOperatorIndex,
+                                         final CigarOperator op,
+                                         final boolean isFirstOp,
+                                         final boolean isFirstBaseOfOp) {
+            return  !isFirstOp && isFirstBaseOfOp && cigar.getCigarElement(currentOperatorIndex-1).getOperator() == op;
+        }
     }
 }
diff --git a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java
index bf1fc9e65..d9af2ea7e 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java
@@ -6,7 +6,7 @@ import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
-import org.broadinstitute.sting.gatk.datasources.reads.ReadShardBalancer;
+import org.broadinstitute.sting.gatk.datasources.reads.LegacyReadShardBalancer;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
@@ -114,7 +114,7 @@ public class TraverseReadsUnitTest extends BaseTest {
     @Test
     public void testUnmappedReadCount() {
         SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
-        Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
+        Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new LegacyReadShardBalancer());
 
         countReadWalker.initialize();
         Object accumulator = countReadWalker.reduceInit();
diff --git a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java
index 5b052454a..47096c73a 100644
--- a/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/LegacyReservoirDownsamplerUnitTest.java
@@ -23,14 +23,14 @@ public class LegacyReservoirDownsamplerUnitTest {
 
     @Test
     public void testEmptyIterator() {
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
         Assert.assertTrue(downsampler.isEmpty(),"Downsampler is not empty but should be.");
     }
 
     @Test
     public void testOneElementWithPoolSizeOne() {
         List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@@ -42,7 +42,7 @@ public class LegacyReservoirDownsamplerUnitTest {
     @Test
     public void testOneElementWithPoolSizeGreaterThanOne() {
         List<GATKSAMRecord> reads = Collections.singletonList(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@@ -58,7 +58,7 @@ public class LegacyReservoirDownsamplerUnitTest {
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@@ -78,7 +78,7 @@ public class LegacyReservoirDownsamplerUnitTest {
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@@ -99,7 +99,7 @@ public class LegacyReservoirDownsamplerUnitTest {
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read2",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(0);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(0);
         downsampler.addAll(reads);
 
         Assert.assertTrue(downsampler.isEmpty(),"Downsampler isn't empty but should be");
@@ -115,7 +115,7 @@ public class LegacyReservoirDownsamplerUnitTest {
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read3",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read4",0,1,76));
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read5",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(1);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(1);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");
@@ -128,7 +128,7 @@ public class LegacyReservoirDownsamplerUnitTest {
     public void testFillingAcrossLoci() {
         List<SAMRecord> reads = new ArrayList<SAMRecord>();
         reads.add(ArtificialSAMUtils.createArtificialRead(header,"read1",0,1,76));
-        ReservoirDownsampler<SAMRecord> downsampler = new ReservoirDownsampler<SAMRecord>(5);
+        LegacyReservoirDownsampler<SAMRecord> downsampler = new LegacyReservoirDownsampler<SAMRecord>(5);
         downsampler.addAll(reads);
 
         Assert.assertFalse(downsampler.isEmpty(),"Downsampler is empty but shouldn't be");

From 573ace4403e688504c714d7608ad00e199ca1909 Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Mon, 10 Dec 2012 10:28:56 -0500
Subject: [PATCH 215/236] restore the right version of VariantContextUtils.java
 in my unstable dir

---
 .../variantcontext/VariantContextUtils.java   | 36 ++++++++++++++++++-
 1 file changed, 35 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index 81959c998..b3e3cf8df 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -979,6 +979,40 @@ public class VariantContextUtils {
     private static final List<Allele> NO_CALL_ALLELES = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
     public static final double SUM_GL_THRESH_NOCALL = -0.1; // if sum(gl) is bigger than this threshold, we treat GL's as non-informative and will force a no-call.
 
+    /**
+     * Split variant context into its biallelic components if there are more than 2 alleles
+     *
+     * For VC has A/B/C alleles, returns A/B and A/C contexts.
+     * Genotypes are all no-calls now (it's not possible to fix them easily)
+     * Alleles are right trimmed to satisfy VCF conventions
+     *
+     * If vc is biallelic or non-variant it is just returned
+     *
+     * Chromosome counts are updated (but they are by definition 0)
+     *
+     * @param vc a potentially multi-allelic variant context
+     * @return a list of bi-allelic (or monomorphic) variant context
+     */
+    public static List<VariantContext> splitVariantContextToBiallelics(final VariantContext vc) {
+        if ( ! vc.isVariant() || vc.isBiallelic() )
+            // non variant or biallelics already satisfy the contract
+            return Collections.singletonList(vc);
+        else {
+            final List<VariantContext> biallelics = new LinkedList<VariantContext>();
+
+            for ( final Allele alt : vc.getAlternateAlleles() ) {
+                VariantContextBuilder builder = new VariantContextBuilder(vc);
+                final List<Allele> alleles = Arrays.asList(vc.getReference(), alt);
+                builder.alleles(alleles);
+                builder.genotypes(VariantContextUtils.subsetDiploidAlleles(vc, alleles, false));
+                calculateChromosomeCounts(builder, true);
+                biallelics.add(reverseTrimAlleles(builder.make()));
+            }
+
+            return biallelics;
+        }
+    }
+
     /**
      * subset the Variant Context to the specific set of alleles passed in (pruning the PLs appropriately)
      *
@@ -1233,7 +1267,7 @@ public class VariantContextUtils {
      * @param testString             String to test
      * @return                       Number of repetitions (0 if testString is not a concatenation of n repeatUnit's
      */
-    protected static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
+    public static int findNumberofRepetitions(byte[] repeatUnit, byte[] testString) {
         int numRepeats = 0;
         for (int start = 0; start < testString.length; start += repeatUnit.length) {
             int end = start + repeatUnit.length;

From bdda63d9730182e5b0860ab00fb4ef5f0857b223 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Mon, 10 Dec 2012 14:47:04 -0500
Subject: [PATCH 217/236] Related bug fixes to GGA mode in the HC: some
 variants (especially MNPs) were causing problems because they don't have to
 start at the current location to match the allele being genotyped.  Fixed.

---
 .../gatk/walkers/haplotypecaller/GenotypingEngine.java     | 7 +++----
 .../haplotypecaller/HaplotypeCallerIntegrationTest.java    | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
index fee6c86f8..4d81d0010 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/GenotypingEngine.java
@@ -33,7 +33,6 @@ import org.apache.commons.lang.ArrayUtils;
 import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
 import org.broadinstitute.sting.gatk.walkers.genotyper.UnifiedGenotyperEngine;
 import org.broadinstitute.sting.utils.*;
-import org.broadinstitute.sting.utils.collections.Pair;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.genotyper.PerReadAlleleLikelihoodMap;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@@ -449,10 +448,10 @@ public class GenotypingEngine {
 
         final ArrayList<Haplotype> undeterminedHaplotypes = new ArrayList<Haplotype>(haplotypes.size());
         for( final Haplotype h : haplotypes ) {
-            if( h.getEventMap().get(loc) == null ) { // no event at this location so this is a reference-supporting haplotype
-                alleleMapper.get(refAllele).add(h);
-            } else if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
+            if( h.isArtificialHaplotype() && loc == h.getArtificialAllelePosition() && alleleMapper.containsKey(h.getArtificialAllele()) ) {
                 alleleMapper.get(h.getArtificialAllele()).add(h);
+            } else if( h.getEventMap().get(loc) == null ) { // no event at this location so let's investigate later
+                undeterminedHaplotypes.add(h);
             } else {
                 boolean haplotypeIsDetermined = false;
                 for( final VariantContext vcAtThisLoc : eventsAtThisLoc ) {
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
index e9c1ec605..a80137c27 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCallerIntegrationTest.java
@@ -33,7 +33,7 @@ public class HaplotypeCallerIntegrationTest extends WalkerTest {
     @Test
     public void testHaplotypeCallerMultiSampleGGA() {
         HCTest(CEUTRIO_BAM, "--max_alternate_alleles 3 -gt_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + validationDataLocation + "combined.phase1.chr20.raw.indels.sites.vcf",
-                "e2b3bf420c45c677956a2e4a56d75ea2");
+                "fe84caa79f59ecbd98fcbcd5b30ab164");
     }
 
     private void HCTestComplexVariants(String bam, String args, String md5) {

From 8a115edbaf17b0bd798c1d55a126d6be6b7ff7b1 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Thu, 6 Dec 2012 17:06:32 -0500
Subject: [PATCH 218/236] ReduceReads is now scattered by contig

It's no longer safe to scatter/gather by interval because now we don't hard-clip to the intervals anymore.
---
 .../compression/reducereads/ReduceReads.java  |   2 +-
 .../walkers/qc/AssessReducedCoverage.java     | 149 --------------
 .../gatk/walkers/qc/AssessReducedQuals.java   | 183 ------------------
 3 files changed, 1 insertion(+), 333 deletions(-)
 delete mode 100755 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java
 delete mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 629a27c48..2061c5364 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -85,7 +85,7 @@ import java.util.*;
  */
 
 @DocumentedGATKFeature( groupName = "BAM Processing and Analysis Tools", extraDocs = {CommandLineGATK.class} )
-@PartitionBy(PartitionType.INTERVAL)
+@PartitionBy(PartitionType.CONTIG)
 @ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
 public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java
deleted file mode 100755
index d38c11594..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedCoverage.java
+++ /dev/null
@@ -1,149 +0,0 @@
-/*
- * Copyright (c) 2010 The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.qc;
-
-import org.broadinstitute.sting.commandline.*;
-import org.broadinstitute.sting.gatk.CommandLineGATK;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.filters.*;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.*;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-
-import java.io.PrintStream;
-import java.util.*;
-
-/**
- * Emits intervals present in either the original or reduced bam but not the other.
- *
- * <h2>Input</h2>
- * <p>
- * The original and reduced BAM files.
- * </p>
- *
- * <h2>Output</h2>
- * <p>
- * A list of intervals present in one bam but not the other.
- * </p>
- *
- * <h2>Examples</h2>
- * <pre>
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -I:original original.bam \
- *   -I:reduced reduced.bam \
- *   -R ref.fasta \
- *   -T AssessReducedCoverage \
- *   -o output.intervals
- * </pre>
- *
- * @author ebanks
- */
-@DocumentedGATKFeature( groupName = "Quality Control and Simple Analysis Tools", extraDocs = {CommandLineGATK.class} )
-@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
-@Hidden
-public class AssessReducedCoverage extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
-
-    private static final String original = "original";
-    private static final String reduced = "reduced";
-
-    @Output
-    protected PrintStream out;
-
-    @Override
-    public boolean includeReadsWithDeletionAtLoci() { return true; }
-
-    @Argument(fullName = "output_reduced_only_coverage", shortName = "output_reduced_only_coverage", doc = "Output an interval if the reduced bam has coverage where the original does not", required = false)
-    public boolean OUTPUT_REDUCED_ONLY_INTERVALS = false;
-
-    public void initialize() {}
-
-    public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-
-        if ( tracker == null )
-            return null;
-
-        Set<String> tags = getAllTags(context.getBasePileup());
-        return (tags.contains(original) && !tags.contains(reduced)) ||
-                (OUTPUT_REDUCED_ONLY_INTERVALS && tags.contains(reduced) && !tags.contains(original)) ? ref.getLocus() : null;
-    }
-
-    private Set<String> getAllTags(final ReadBackedPileup pileup) {
-
-        final Set<String> tags = new HashSet<String>(10);
-
-        for ( final PileupElement p : pileup ) {
-            if ( (int)p.getQual() > 2 && p.getMappingQual() > 0 && !p.isDeletion() )
-                tags.addAll(getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags());
-        }
-
-        return tags;
-    }
-
-    public void onTraversalDone(GenomeLoc sum) {
-        if ( sum != null )
-            out.println(sum);
-    }
-
-    public GenomeLoc reduceInit() {
-        return null;
-    }
-
-    public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
-        if ( lhs == null )
-            return rhs;
-
-        if ( rhs == null )
-            return lhs;
-
-        // if contiguous, just merge them
-        if ( lhs.contiguousP(rhs) )
-            return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
-
-        // otherwise, print the lhs and start over with the rhs
-        out.println(lhs);
-        return rhs;
-    }
-
-    public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
-        if ( value == null )
-            return sum;
-
-        if ( sum == null )
-            return value;
-
-        // if contiguous, just merge them
-        if ( sum.contiguousP(value) )
-            return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
-
-        // otherwise, print the sum and start over with the value
-        out.println(sum);
-        return value;
-    }
-}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
deleted file mode 100644
index 2c70d44e2..000000000
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/AssessReducedQuals.java
+++ /dev/null
@@ -1,183 +0,0 @@
-package org.broadinstitute.sting.gatk.walkers.qc;
-
-import org.broadinstitute.sting.commandline.Argument;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
-import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
-import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
-import org.broadinstitute.sting.gatk.walkers.LocusWalker;
-import org.broadinstitute.sting.gatk.walkers.TreeReducible;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.MathUtils;
-import org.broadinstitute.sting.utils.pileup.PileupElement;
-import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
-
-import java.io.PrintStream;
-import java.util.List;
-
-/**
- * Emits intervals in which the differences between the original and reduced bam quals are bigger epsilon (unless the quals of
- * the reduced bam are above sufficient threshold)
- *
- * <h2>Input</h2>
- * <p>
- * The original and reduced BAM files.
- * </p>
- *
- * <h2>Output</h2>
- * <p>
- * A list of intervals in which the differences between the original and reduced bam quals are bigger epsilon.
- * </p>
- *
- * <h2>Examples</h2>
- * <pre>
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -I:original original.bam \
- *   -I:reduced reduced.bam \
- *   -R ref.fasta \
- *   -T AssessReducedQuals \
- *   -o output.intervals
- * </pre>
- *
- * @author ami
- */
-
-public class AssessReducedQuals extends LocusWalker<GenomeLoc, GenomeLoc> implements TreeReducible<GenomeLoc> {
-
-    private static final String reduced = "reduced";
-    private static final int originalQualsIndex = 0;
-    private static final int reducedQualsIndex = 1;
-
-    @Argument(fullName = "sufficientQualSum", shortName = "sufficientQualSum", doc = "When a reduced bam qual sum is above this threshold, it passes even without comparing to the non-reduced bam ", required = false)
-    public int sufficientQualSum = 600;
-
-    @Argument(fullName = "qual_epsilon", shortName = "epsilon", doc = "when |Quals_reduced_bam - Quals_original_bam| > epsilon*Quals_original_bam we output this interval", required = false)
-    public int qual_epsilon = 0;
-
-    @Argument(fullName = "debugLevel", shortName = "debug", doc = "debug level: NO_DEBUG, PRINT_LOCI,PRINT_PILEUPS", required = false)
-    public DebugLevel debugLevel = DebugLevel.NO_DEBUG;
-
-    @Output
-    protected PrintStream out;
-
-    public void initialize() {
-        if (debugLevel != DebugLevel.NO_DEBUG)
-            out.println("  Debug mode" +
-                        "Debug:\tsufficientQualSum: "+sufficientQualSum+ "\n " +
-                        "Debug:\tqual_epsilon: "+qual_epsilon);
-    }
-
-    @Override
-    public boolean includeReadsWithDeletionAtLoci() { return true; }
-
-    @Override
-    public GenomeLoc map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
-        if ( tracker == null )
-            return null;
-
-        boolean reportLocus;
-        final int[] quals = getPileupQuals(context.getBasePileup());
-        if (debugLevel != DebugLevel.NO_DEBUG)
-            out.println("Debug:\tLocus Quals\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]);
-        final int epsilon = MathUtils.fastRound(quals[originalQualsIndex]*qual_epsilon);
-        final int calcOriginalQuals = Math.min(quals[originalQualsIndex],sufficientQualSum);
-        final int calcReducedQuals = Math.min(quals[reducedQualsIndex],sufficientQualSum);
-        final int OriginalReducedQualDiff = calcOriginalQuals - calcReducedQuals;
-        reportLocus = OriginalReducedQualDiff > epsilon || OriginalReducedQualDiff < -1*epsilon;
-        if(debugLevel != DebugLevel.NO_DEBUG && reportLocus)
-            out.println("Debug:\tEmited Locus\t"+ref.getLocus()+"\toriginal\t"+quals[originalQualsIndex]+"\treduced\t"+quals[reducedQualsIndex]+"\tepsilon\t"+epsilon+"\tdiff\t"+OriginalReducedQualDiff);
-
-        return reportLocus ? ref.getLocus() : null;
-    }
-
-    private int[] getPileupQuals(final ReadBackedPileup readPileup) {
-
-        final int[] quals = new int[2];
-        String[] printPileup = {"Debug 2:\toriginal pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n",
-                                "Debug 2:\t reduced pileup:\t"+readPileup.getLocation()+"\nDebug 2:----------------------------------\n"};
-
-        for( PileupElement p : readPileup ){
-            final List<String> tags = getToolkit().getReaderIDForRead(p.getRead()).getTags().getPositionalTags();
-            if ( isGoodRead(p) ){
-                final int tempQual = (int)(p.getQual()) * p.getRepresentativeCount();
-                final int tagIndex = getTagIndex(tags);
-                quals[tagIndex] += tempQual;
-                if(debugLevel == DebugLevel.PRINT_PILEUPS)
-                    printPileup[tagIndex] += "\tDebug 2: ("+p+")\tMQ="+p.getMappingQual()+":QU="+p.getQual()+":RC="+p.getRepresentativeCount()+":OS="+p.getOffset()+"\n";
-            }
-        }
-        if(debugLevel == DebugLevel.PRINT_PILEUPS){
-            out.println(printPileup[originalQualsIndex]);
-            out.println(printPileup[reducedQualsIndex]);
-        }
-        return quals;
-    }
-
-
-    private boolean isGoodRead(PileupElement p){
-        // TODO -- You want to check whether the read itself is a reduced read and only
-        // TODO --  for them you want to ignore that min mapping quality cutoff (but you *do* still want the min base cutoff).
-        return !p.isDeletion() && ((p.getRead().isReducedRead()) || (!p.getRead().isReducedRead() && (int)p.getQual() >= 20 && p.getMappingQual() >= 20));
-    }
-
-    private int getTagIndex(List<String> tags){
-        return tags.contains(reduced) ? 1 : 0;
-    }
-
-    @Override
-    public void onTraversalDone(GenomeLoc sum) {
-        if ( sum != null )
-            out.println(sum);
-    }
-
-    @Override
-    public GenomeLoc treeReduce(GenomeLoc lhs, GenomeLoc rhs) {
-        if ( lhs == null )
-            return rhs;
-
-        if ( rhs == null )
-            return lhs;
-
-        // if contiguous, just merge them
-        if ( lhs.contiguousP(rhs) )
-            return getToolkit().getGenomeLocParser().createGenomeLoc(lhs.getContig(), lhs.getStart(), rhs.getStop());
-
-        // otherwise, print the lhs and start over with the rhs
-        out.println(lhs);
-        return rhs;
-    }
-
-    @Override
-    public GenomeLoc reduceInit() {
-        return null;
-    }
-
-    @Override
-    public GenomeLoc reduce(GenomeLoc value, GenomeLoc sum) {
-        if ( value == null )
-            return sum;
-
-        if ( sum == null )
-            return value;
-
-        // if contiguous, just merge them
-        if ( sum.contiguousP(value) )
-            return getToolkit().getGenomeLocParser().createGenomeLoc(sum.getContig(), sum.getStart(), value.getStop());
-
-        // otherwise, print the sum and start over with the value
-        out.println(sum);
-        return value;
-    }
-
-    public enum DebugLevel {
-        NO_DEBUG,
-        /**
-         * Print locus level information (such as locus quals) and loci with unmatch quals
-         */
-        PRINT_LOCI,
-        /**
-         * Print the pileup infomarion of the reduced bam files and the original bam files
-         */
-        PRINT_PILEUPS
-    }
-}

From 2e3284f30669c2b28e14dffa9ac0cf49f1f1f7e6 Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Mon, 10 Dec 2012 22:23:58 -0500
Subject: [PATCH 219/236] Continue to fix the case where PRIORITIZE is used but
 no priority list is given. While fixing that case I also removed unnecessary
 sorting, when the prioeity list is not provied. When the priority list is not
 provided, it will continue to be null. Thus, the number of original Variant
 Contexts should be given as a new parameter to simpleMerge (since priority
 might be null). This new parameter is used for checking if there are filtered
 VC, when annotationOrigin is true.

---
 .../walkers/variantutils/CombineVariants.java |  7 ++--
 .../variantcontext/VariantContextUtils.java   | 42 +++++++++++++++++--
 2 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
index 1d4913769..68fac7631 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariants.java
@@ -228,7 +228,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
         if ( genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE && PRIORITY_STRING == null )
             throw new UserException.MissingArgument("rod_priority_list", "Priority string must be provided if you want to prioritize genotypes");
 
-        if ( PRIORITY_STRING != null || genotypeMergeOption == VariantContextUtils.GenotypeMergeType.PRIORITIZE ){
+        if ( PRIORITY_STRING != null){
             priority = new ArrayList<String>(Arrays.asList(PRIORITY_STRING.split(",")));
             if ( rodNames.size() != priority.size() )
                 throw new UserException.BadArgumentValue("rod_priority_list", "The priority list must contain exactly one rod binding per ROD provided to the GATK: rodNames=" + rodNames + " priority=" + priority);
@@ -243,6 +243,7 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
         if ( tracker == null ) // RodWalkers can make funky map calls
             return 0;
 
+        Set<String> rodNames = SampleUtils.getRodNamesWithVCFHeader(getToolkit(), null);
         // get all of the vcf rods at this locus
         // Need to provide reference bases to simpleMerge starting at current locus
         Collection<VariantContext> vcs = tracker.getValues(variants, context.getLocation());
@@ -289,13 +290,13 @@ public class CombineVariants extends RodWalker<Integer, Integer> implements Tree
             for (VariantContext.Type type : VariantContext.Type.values()) {
                 if (VCsByType.containsKey(type))
                     mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), VCsByType.get(type),
-                            priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                            priority, rodNames.size() , filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
                             SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
             }
         }
         else if (multipleAllelesMergeType == VariantContextUtils.MultipleAllelesMergeType.MIX_TYPES) {
             mergedVCs.add(VariantContextUtils.simpleMerge(getToolkit().getGenomeLocParser(), vcs,
-                    priority, filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
+                    priority, rodNames.size(), filteredRecordsMergeType, genotypeMergeOption, true, printComplexMerges,
                     SET_KEY, filteredAreUncalled, MERGE_INFO_WITH_MAX_AC));
         }
         else {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
index b3e3cf8df..8b360eb5e 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContextUtils.java
@@ -448,11 +448,47 @@ public class VariantContextUtils {
                                              final String setKey,
                                              final boolean filteredAreUncalled,
                                              final boolean mergeInfoWithMaxAC ) {
+        int originalNumOfVCs = priorityListOfVCs == null ? 0 : priorityListOfVCs.size();
+        return simpleMerge(genomeLocParser,unsortedVCs,priorityListOfVCs,originalNumOfVCs,filteredRecordMergeType,genotypeMergeOptions,annotateOrigin,printMessages,setKey,filteredAreUncalled,mergeInfoWithMaxAC);
+    }
+
+    /**
+     * Merges VariantContexts into a single hybrid.  Takes genotypes for common samples in priority order, if provided.
+     * If uniquifySamples is true, the priority order is ignored and names are created by concatenating the VC name with
+     * the sample name
+     *
+     * @param genomeLocParser           loc parser
+     * @param unsortedVCs               collection of unsorted VCs
+     * @param priorityListOfVCs         priority list detailing the order in which we should grab the VCs
+     * @param filteredRecordMergeType   merge type for filtered records
+     * @param genotypeMergeOptions      merge option for genotypes
+     * @param annotateOrigin            should we annotate the set it came from?
+     * @param printMessages             should we print messages?
+     * @param setKey                    the key name of the set
+     * @param filteredAreUncalled       are filtered records uncalled?
+     * @param mergeInfoWithMaxAC        should we merge in info from the VC with maximum allele count?
+     * @return new VariantContext       representing the merge of unsortedVCs
+     */
+    public static VariantContext simpleMerge(final GenomeLocParser genomeLocParser,
+                                             final Collection<VariantContext> unsortedVCs,
+                                             final List<String> priorityListOfVCs,
+                                             final int originalNumOfVCs,
+                                             final FilteredRecordMergeType filteredRecordMergeType,
+                                             final GenotypeMergeType genotypeMergeOptions,
+                                             final boolean annotateOrigin,
+                                             final boolean printMessages,
+                                             final String setKey,
+                                             final boolean filteredAreUncalled,
+                                             final boolean mergeInfoWithMaxAC ) {
+
         if ( unsortedVCs == null || unsortedVCs.size() == 0 )
             return null;
 
-        if ( annotateOrigin && priorityListOfVCs == null )
-            throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts");
+        if (priorityListOfVCs != null && originalNumOfVCs != priorityListOfVCs.size())
+            throw new IllegalArgumentException("the number of the original VariantContexts must be the same as the number of VariantContexts in the priority list");
+
+        if ( annotateOrigin && priorityListOfVCs == null && originalNumOfVCs == 0)
+            throw new IllegalArgumentException("Cannot merge calls and annotate their origins without a complete priority list of VariantContexts or the number of original VariantContexts");
 
         if ( genotypeMergeOptions == GenotypeMergeType.REQUIRE_UNIQUE )
             verifyUniqueSampleNames(unsortedVCs);
@@ -597,7 +633,7 @@ public class VariantContextUtils {
 
         if ( annotateOrigin ) { // we care about where the call came from
             String setValue;
-            if ( nFiltered == 0 && variantSources.size() == priorityListOfVCs.size() ) // nothing was unfiltered
+            if ( nFiltered == 0 && variantSources.size() == originalNumOfVCs ) // nothing was unfiltered
                 setValue = MERGE_INTERSECTION;
             else if ( nFiltered == VCs.size() )     // everything was filtered out
                 setValue = MERGE_FILTER_IN_ALL;

From 2f99569dda32da2c0d7124194e4e1c4371d29ee7 Mon Sep 17 00:00:00 2001
From: Ami Levy-Moonshine <ami@broadinstitute.org>
Date: Mon, 10 Dec 2012 22:48:15 -0500
Subject: [PATCH 220/236] change the md5 in one of the CV intergration tests,
 since it wasn't use the priority list when printing the origin of the
 annotation (the setValue field)

---
 .../walkers/variantutils/CombineVariantsIntegrationTest.java    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
index c32d77f82..28b176d4b 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/CombineVariantsIntegrationTest.java
@@ -137,7 +137,7 @@ public class CombineVariantsIntegrationTest extends WalkerTest {
                         " -priority NA19240_BGI,NA19240_ILLUMINA,NA19240_WUGSC,denovoInfo" +
                         " -genotypeMergeOptions UNIQUIFY -L 1"),
                 1,
-                Arrays.asList("e5f0e7a80cd392172ebf5ddb06b91a00"));
+                Arrays.asList("58e6281df108c361e99673a501ee4749"));
         cvExecuteTest("threeWayWithRefs", spec, true);
     }
 

From dd52a70d459004d32ee9edc5490bde7fb269c53a Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Tue, 11 Dec 2012 10:39:58 -0500
Subject: [PATCH 221/236] Fix AFCalcResult unit test

-- I was simply passing in the wrong values into the function.  Fixed the calls, and expanded the docs on what needs to be passed in.
---
 .../walkers/genotyper/afcalc/AFCalcResultUnitTest.java | 10 +++++-----
 .../gatk/walkers/genotyper/afcalc/AFCalcResult.java    |  3 +++
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
index 016926e12..ee5436264 100644
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResultUnitTest.java
@@ -111,11 +111,11 @@ public class AFCalcResultUnitTest extends BaseTest {
 
     @Test(enabled = true, dataProvider = "TestIsPolymorphic")
     private void testIsPolymorphic(final double pNonRef, final double pThreshold, final boolean shouldBePoly) {
-        final AFCalcResult result = makePolymorphicTestData(pNonRef);
-        final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(pThreshold));
-        Assert.assertEquals(actualIsPoly, shouldBePoly,
-                "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
-                        + actualIsPoly + " but the expected result is " + shouldBePoly);
+            final AFCalcResult result = makePolymorphicTestData(pNonRef);
+            final boolean actualIsPoly = result.isPolymorphic(C, Math.log10(1 - pThreshold));
+            Assert.assertEquals(actualIsPoly, shouldBePoly,
+                    "isPolymorphic with pNonRef " + pNonRef + " and threshold " + pThreshold + " returned "
+                            + actualIsPoly + " but the expected result is " + shouldBePoly);
     }
 
     @Test(enabled = true, dataProvider = "TestIsPolymorphic")
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
index d6a5cb16d..142469077 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/afcalc/AFCalcResult.java
@@ -230,6 +230,9 @@ public class AFCalcResult {
      * And that log10minPNonRef is -3.
      * We are considered polymorphic since 10^-5 < 10^-3 => -5 < -3
      *
+     * Note that log10minPNonRef is really the minimum confidence, scaled as an error rate, so
+     * if you want to be 99% confidence, then log10PNonRef should be log10(0.01) = -2.
+     *
      * @param log10minPNonRef the log10 scaled min pr of being non-ref to be considered polymorphic
      *
      * @return true if there's enough confidence (relative to log10minPNonRef) to reject AF == 0

From 5632c13bf247826920018fb9c881470504567d4e Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Wed, 12 Dec 2012 10:27:07 -0500
Subject: [PATCH 224/236] Resolves GSA-681 / Compressed VCF.gz output is too
 big because of unnecessary call to flush().

-- Now compressed output VCFs are properly blocked compressed (i.e., they are actually smaller than the uncompressed VCF)
---
 .../walkers/genotyper/UnifiedGenotyperIntegrationTest.java     | 2 +-
 .../sting/utils/variantcontext/writer/VCFWriter.java           | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index f2b2dfb7d..c768f95ad 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     //
     // --------------------------------------------------------------------------------------------------------------
 
-    private final static String COMPRESSED_OUTPUT_MD5 = "5b8f477c287770b5769b05591e35bc2d";
+    private final static String COMPRESSED_OUTPUT_MD5 = "3eba6c309514d1e9ee06a20a112b68e6";
 
     @Test
     public void testCompressedOutput() {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
index 9a987f161..974e50ced 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
@@ -248,7 +248,8 @@ class VCFWriter extends IndexingVariantContextWriter {
             }
             
             mWriter.write("\n");
-            mWriter.flush();  // necessary so that writing to an output stream will work
+            // note that we cannot call flush here if we want block gzipping to work properly
+            // calling flush results in all gzipped blocks for each variant
         } catch (IOException e) {
             throw new RuntimeException("Unable to write the VCF object to " + getStreamName(), e);
         }

From a52e3c7e15a1bca6221df4d881642cfce590f84d Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 12 Dec 2012 13:09:39 -0500
Subject: [PATCH 225/236] Revert "Bug fix for RR: don't let the softclip start
 position be less than 1" this introduced a bug in reduce reads by
 de-activating it's hard clipping of the out of bounds soft-clips (specially
 in the MT). DEV-322 #resolve #time 4m

This reverts commit 42acfd9d0bccfc0411944c342a5b889f5feae736.
---
 .../src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java  | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
index 6c7a162f8..9fdb48b34 100755
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@@ -397,9 +397,6 @@ public class GATKSAMRecord extends BAMRecord {
                 else if (op != CigarOperator.HARD_CLIP)
                     break;
             }
-
-            if ( softStart < 1 )
-                softStart = 1;
         }
         return softStart;
     }

From 33290bfe0c9dd1d19550044e379413326be2ef47 Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Wed, 12 Dec 2012 13:41:01 -0500
Subject: [PATCH 226/236] Added integration test to catch the read off contig
 in ReduceReads.

So upstream changes won't break it again.
---
 .../reducereads/ReduceReadsIntegrationTest.java       | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index 1e539dc9d..f0e8b76d4 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -17,6 +17,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     final String COREDUCTION_BAM_A = validationDataLocation + "coreduction.test.A.bam";
     final String COREDUCTION_BAM_B = validationDataLocation + "coreduction.test.B.bam";
     final String COREDUCTION_L = " -L 1:1,853,860-1,854,354 -L 1:1,884,131-1,892,057";
+    final String OFFCONTIG_BAM = privateTestDir + "readOffb37contigMT.bam";
 
     private void RRTest(String testName, String args, String md5) {
         String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s ";
@@ -86,5 +87,15 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
         executeTest("testCoReduction", new WalkerTestSpec(base, Arrays.asList("5c30fde961a1357bf72c15144c01981b")));
     }
 
+    /**
+     * Bug happens when reads are soft-clipped off the  contig (usually in the MT). This test guarantees no changes to the upstream code will
+     * break the current hard-clipping routine that protects reduce reads from such reads.
+     */
+    @Test(enabled = true)
+    public void testReadOffContig() {
+        String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
+        executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("53e16367d333da0b7d40a7683a35c95f")));
+    }
+
 }
 

From 211a6e78ea2404f714ecaeec6460fa52233689ba Mon Sep 17 00:00:00 2001
From: Ryan Poplin <rpoplin@broadinstitute.org>
Date: Wed, 12 Dec 2012 14:53:02 -0500
Subject: [PATCH 227/236] Further related bug fixes to GGA mode in the HC: some
 variants (especially MNPs) were causing problems because they don't have to
 start at the current location to match the allele being genotyped.  Fixed.

---
 .../haplotypecaller/SimpleDeBruijnAssembler.java    | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
index 4f072d720..3c5a1f79c 100755
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/SimpleDeBruijnAssembler.java
@@ -282,10 +282,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
         for( final VariantContext compVC : activeAllelesToGenotype ) {
             for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
                 final Haplotype insertedRefHaplotype = refHaplotype.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart());
-                if( !addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
-                    return returnHaplotypes;
-                    //throw new ReviewedStingException("Unable to add reference+allele haplotype during GGA-enabled assembly: " + insertedRefHaplotype);
-                }
+                addHaplotype( insertedRefHaplotype, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, true );
             }
         }
 
@@ -293,7 +290,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
             for ( final KBestPaths.Path path : KBestPaths.getKBestPaths(graph, NUM_BEST_PATHS_PER_KMER_GRAPH) ) {
 
                 final Haplotype h = new Haplotype( path.getBases( graph ), path.getScore() );
-                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop ) ) {
+                if( addHaplotype( h, fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false ) ) {
 
                     // for GGA mode, add the desired allele into the haplotype if it isn't already present
                     if( !activeAllelesToGenotype.isEmpty() ) {
@@ -308,7 +305,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
                             //  falls into the bin for the 1bp deletion because we keep track of the artificial alleles).
                             if( vcOnHaplotype == null ) {
                                 for( final Allele compAltAllele : compVC.getAlternateAlleles() ) {
-                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop );
+                                    addHaplotype( h.insertAllele(compVC.getReference(), compAltAllele, activeRegionStart + compVC.getStart() - activeRegionWindow.getStart(), compVC.getStart()), fullReferenceWithPadding, returnHaplotypes, activeRegionStart, activeRegionStop, false );
                                 }
                             }
                         }
@@ -332,7 +329,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
         return returnHaplotypes;
     }
 
-    private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop ) {
+    private boolean addHaplotype( final Haplotype haplotype, final byte[] ref, final ArrayList<Haplotype> haplotypeList, final int activeRegionStart, final int activeRegionStop, final boolean FORCE_INCLUSION_FOR_GGA_MODE ) {
         if( haplotype == null ) { return false; }
 
         final SWPairwiseAlignment swConsensus = new SWPairwiseAlignment( ref, haplotype.getBases(), SW_MATCH, SW_MISMATCH, SW_GAP, SW_GAP_EXTEND );
@@ -387,7 +384,7 @@ public class SimpleDeBruijnAssembler extends LocalAssemblyEngine {
             return false;
         }
 
-        if( !haplotypeList.contains(h) ) {
+        if( FORCE_INCLUSION_FOR_GGA_MODE || !haplotypeList.contains(h) ) {
             haplotypeList.add(h);
             return true;
         } else {

From bba63a3b0ed94bf4d604d5a7f15e33f0f52fa930 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 12 Dec 2012 20:25:45 +0000
Subject: [PATCH 228/236] Fix for GSA-615:
 UnifiedGenotyperEngine.getGLModelsToUse takes 5% of the runtime of UG, should
 be optimized away.

---
 .../GenotypeLikelihoodsCalculationModel.java  |  4 +-
 .../genotyper/UnifiedGenotyperEngine.java     | 74 ++++++++++---------
 2 files changed, 42 insertions(+), 36 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
index ae9b01f2d..a8ee4afde 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@@ -59,8 +59,8 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
     public enum Model {
         SNP,
         INDEL,
-        GeneralPloidySNP,
-        GeneralPloidyINDEL,
+        GENERALPLOIDYSNP,
+        GENERALPLOIDYINDEL,
         BOTH
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
index cc086b148..8f2588679 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java
@@ -52,7 +52,7 @@ import java.util.*;
 
 public class UnifiedGenotyperEngine {
     public static final String LOW_QUAL_FILTER_NAME = "LowQual";
-    private static final String GPSTRING = "GeneralPloidy";
+    private static final String GPSTRING = "GENERALPLOIDY";
 
     public static final String NUMBER_OF_DISCOVERED_ALLELES_KEY = "NDA";
 
@@ -79,6 +79,7 @@ public class UnifiedGenotyperEngine {
 
     // the model used for calculating genotypes
     private ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>> glcm = new ThreadLocal<Map<String, GenotypeLikelihoodsCalculationModel>>();
+    private final List<GenotypeLikelihoodsCalculationModel.Model> modelsToUse = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
 
     // the model used for calculating p(non-ref)
     private ThreadLocal<AFCalc> afcm = new ThreadLocal<AFCalc>();
@@ -134,6 +135,8 @@ public class UnifiedGenotyperEngine {
         computeAlleleFrequencyPriors(N, log10AlleleFrequencyPriorsIndels, UAC.INDEL_HETEROZYGOSITY);
 
         filter.add(LOW_QUAL_FILTER_NAME);
+
+        determineGLModelsToUse();
     }
 
     /**
@@ -286,7 +289,7 @@ public class UnifiedGenotyperEngine {
             glcm.set(getGenotypeLikelihoodsCalculationObject(logger, UAC));
         }
 
-        return glcm.get().get(model.name().toUpperCase()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
+        return glcm.get().get(model.name()).getLikelihoods(tracker, refContext, stratifiedContexts, type, alternateAllelesToUse, useBAQedPileup && BAQEnabledOnCMDLine, genomeLocParser, perReadAlleleLikelihoodMap);
     }
 
     private VariantCallContext generateEmptyContext(RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, AlignmentContext rawContext) {
@@ -634,48 +637,51 @@ public class UnifiedGenotyperEngine {
                 (UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES && QualityUtils.phredScaleErrorRate(PofF) >= UAC.STANDARD_CONFIDENCE_FOR_CALLING);
     }
 
+    private void determineGLModelsToUse() {
+
+        String modelPrefix = "";
+        if ( !UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY )
+            modelPrefix = GPSTRING;
+
+        if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
+            modelPrefix += UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
+            modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
+            modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
+        }
+        else {
+            modelsToUse.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
+        }
+    }
+
     // decide whether we are currently processing SNPs, indels, neither, or both
     private List<GenotypeLikelihoodsCalculationModel.Model> getGLModelsToUse(final RefMetaDataTracker tracker,
                                                                              final ReferenceContext refContext,
                                                                              final AlignmentContext rawContext) {
 
-        final List<GenotypeLikelihoodsCalculationModel.Model> models = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(2);
-        String modelPrefix = "";
-        if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") )
-            modelPrefix = UAC.GLmodel.name().toUpperCase().replaceAll("BOTH","");
+        if ( UAC.GenotypingMode != GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES )
+            return modelsToUse;
 
-        if (!UAC.GLmodel.name().contains(GPSTRING) && UAC.samplePloidy != VariantContextUtils.DEFAULT_PLOIDY)
-            modelPrefix = GPSTRING + modelPrefix;
+        // if we're genotyping given alleles then we need to choose the model corresponding to the variant type requested
+        final List<GenotypeLikelihoodsCalculationModel.Model> GGAmodel = new ArrayList<GenotypeLikelihoodsCalculationModel.Model>(1);
+        final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
+        if ( vcInput == null )
+            return GGAmodel; // no work to be done
 
-        // if we're genotyping given alleles and we have a requested SNP at this position, do SNP
-        if ( UAC.GenotypingMode == GenotypeLikelihoodsCalculationModel.GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES ) {
-            final VariantContext vcInput = getVCFromAllelesRod(tracker, refContext, rawContext.getLocation(), false, logger, UAC.alleles);
-            if ( vcInput == null )
-                return models;
-
-            if ( vcInput.isSNP() )  {
-                // ignore SNPs if the user chose INDEL mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("SNP") )
-                    models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
-            }
-            else if ( vcInput.isIndel() || vcInput.isMixed() ) {
-                // ignore INDELs if the user chose SNP mode only
-                if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") || UAC.GLmodel.name().toUpperCase().contains("INDEL") )
-                    models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
-            }
-            // No support for other types yet
+        if ( vcInput.isSNP() )  {
+            // use the SNP model unless the user chose INDEL mode only
+            if ( modelsToUse.size() == 2 || modelsToUse.get(0).name().endsWith("SNP") )
+                GGAmodel.add(modelsToUse.get(0));
         }
-        else {
-            if ( UAC.GLmodel.name().toUpperCase().contains("BOTH") ) {
-                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"SNP"));
-                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+"INDEL"));
-            }
-            else {
-                models.add(GenotypeLikelihoodsCalculationModel.Model.valueOf(modelPrefix+UAC.GLmodel.name().toUpperCase()));
-            }
+        else if ( vcInput.isIndel() || vcInput.isMixed() ) {
+            // use the INDEL model unless the user chose SNP mode only
+            if ( modelsToUse.size() == 2 )
+                GGAmodel.add(modelsToUse.get(1));
+            else if ( modelsToUse.get(0).name().endsWith("INDEL") )
+                GGAmodel.add(modelsToUse.get(0));
         }
+        // No support for other types yet
 
-        return models;
+        return GGAmodel;
     }
 
     public static void computeAlleleFrequencyPriors(final int N, final double[] priors, final double theta) {

From 62eaffdf0a475888bec87771a3c1499b0bbce524 Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Wed, 12 Dec 2012 20:28:04 +0000
Subject: [PATCH 229/236] Fix docs for ReadBackedPhasing

---
 .../sting/gatk/walkers/phasing/ReadBackedPhasing.java          | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
index eda43e6a5..7d848d0d4 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/ReadBackedPhasing.java
@@ -135,6 +135,9 @@ public class ReadBackedPhasing extends RodWalker<PhasingStatsAndOutput, PhasingS
     @Argument(fullName = "permitNoSampleOverlap", shortName = "permitNoSampleOverlap", doc = "Don't exit (just WARN) when the VCF and BAMs do not overlap in samples", required = false)
     private boolean permitNoSampleOverlap = false;
 
+    /**
+     * Important note: do not use this argument if your input data set is not already phased or it will cause the tool to skip over all heterozygous sites.
+     */
     @Argument(fullName = "respectPhaseInInput", shortName = "respectPhaseInInput", doc = "Will only phase genotypes in cases where the resulting output will necessarily be consistent with any existing phase (for example, from trios)", required = false)
     private boolean respectPhaseInInput = false;
 

From 5e66109268a9b17a168cf7ee014c345e53061c4b Mon Sep 17 00:00:00 2001
From: Yossi Farjoun <farjoun@broadinstitute.org>
Date: Wed, 12 Dec 2012 15:26:38 -0500
Subject: [PATCH 230/236] Replaced a useless getInt with a skipInt to remove
 1/4 of the initial seek time in the BAM Index.

---
 .../sting/gatk/datasources/reads/GATKBAMIndex.java          | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
index 73301c511..e3a1b61bd 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java
@@ -282,7 +282,7 @@ public class GATKBAMIndex {
             final int nBins = readInteger();
             // System.out.println("# nBins: " + nBins);
             for (int j = 0; j < nBins; j++) {
-                final int bin = readInteger();
+                skipInteger();
                 final int nChunks = readInteger();
                 // System.out.println("# bin[" + j + "] = " + bin + ", nChunks = " + nChunks);
                 skipBytes(16 * nChunks);
@@ -334,6 +334,10 @@ public class GATKBAMIndex {
         return buffer.getInt();
     }
 
+    private void skipInteger() {
+        skipBytes(INT_SIZE_IN_BYTES);
+     }
+
     /**
      * Reads an array of <count> longs from the file channel, returning the results as an array.
      * @param count Number of longs to read.

From aeab932c63aa2143e4a97a520e6445ab764f82e8 Mon Sep 17 00:00:00 2001
From: Mark DePristo <depristo@broadinstitute.org>
Date: Thu, 13 Dec 2012 12:24:08 -0500
Subject: [PATCH 234/236] Actual working version of unflushing VCFWriter

-- Uses high-performance local writer backed by byte array that writes the entire VCF line in some write operation to the underlying output stream.
-- Fixes problems with indexing of unflushed writes while still allowing efficient block zipping
-- Same (or better) IO performance as previous implementation
-- IndexingVariantContextWriter now properly closes the underlying output stream when it's closed
-- Updated compressed VCF output file
---
 .../UnifiedGenotyperIntegrationTest.java      |   2 +-
 .../writer/IndexingVariantContextWriter.java  |  13 +-
 .../variantcontext/writer/VCFWriter.java      | 129 ++++++++++++------
 3 files changed, 97 insertions(+), 47 deletions(-)

diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
index c768f95ad..4f88732b0 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@@ -96,7 +96,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
     //
     // --------------------------------------------------------------------------------------------------------------
 
-    private final static String COMPRESSED_OUTPUT_MD5 = "3eba6c309514d1e9ee06a20a112b68e6";
+    private final static String COMPRESSED_OUTPUT_MD5 = "af8187e2baf516dde1cddea787a52b8a";
 
     @Test
     public void testCompressedOutput() {
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java
index df0feda8b..298f12da3 100644
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/IndexingVariantContextWriter.java
@@ -93,16 +93,19 @@ abstract class IndexingVariantContextWriter implements VariantContextWriter {
      * attempt to close the VCF file
      */
     public void close() {
-        // try to close the index stream (keep it separate to help debugging efforts)
-        if ( indexer != null ) {
-            try {
+        try {
+            // try to close the index stream (keep it separate to help debugging efforts)
+            if ( indexer != null ) {
                 Index index = indexer.finalizeIndex(positionalOutputStream.getPosition());
                 IndexDictionaryUtils.setIndexSequenceDictionary(index, refDict);
                 index.write(idxStream);
                 idxStream.close();
-            } catch (IOException e) {
-                throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
             }
+
+            // close the underlying output stream as well
+            outputStream.close();
+        } catch (IOException e) {
+            throw new ReviewedStingException("Unable to close index for " + getStreamName(), e);
         }
     }
 
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
index 974e50ced..4c35d6181 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java
@@ -34,6 +34,7 @@ import org.broadinstitute.sting.utils.variantcontext.*;
 
 import java.io.*;
 import java.lang.reflect.Array;
+import java.nio.charset.Charset;
 import java.util.*;
 
 /**
@@ -42,9 +43,6 @@ import java.util.*;
 class VCFWriter extends IndexingVariantContextWriter {
     private final static String VERSION_LINE = VCFHeader.METADATA_INDICATOR + VCFHeaderVersion.VCF4_1.getFormatString() + "=" + VCFHeaderVersion.VCF4_1.getVersionString();
 
-    // the print stream we're writing to
-    final protected BufferedWriter mWriter;
-
     // should we write genotypes or just sites?
     final protected boolean doNotWriteGenotypes;
 
@@ -53,15 +51,33 @@ class VCFWriter extends IndexingVariantContextWriter {
 
     final private boolean allowMissingFieldsInHeader;
 
+    /**
+     * The VCF writer uses an internal Writer, based by the ByteArrayOutputStream lineBuffer,
+     * to temp. buffer the header and per-site output before flushing the per line output
+     * in one go to the super.getOutputStream.  This results in high-performance, proper encoding,
+     * and allows us to avoid flushing explicitly the output stream getOutputStream, which
+     * allows us to properly compress vcfs in gz format without breaking indexing on the fly
+     * for uncompressed streams.
+     */
+    private static final int INITIAL_BUFFER_SIZE = 1024 * 16;
+    private final ByteArrayOutputStream lineBuffer = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
+    private final Writer writer;
+
+    /**
+     * The encoding used for VCF files.  ISO-8859-1
+     */
+    final private Charset charset;
+
     private IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
 
     public VCFWriter(final File location, final OutputStream output, final SAMSequenceDictionary refDict,
                      final boolean enableOnTheFlyIndexing, boolean doNotWriteGenotypes,
                      final boolean allowMissingFieldsInHeader ) {
         super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
-        mWriter = new BufferedWriter(new OutputStreamWriter(getOutputStream())); // todo -- fix buffer size
         this.doNotWriteGenotypes = doNotWriteGenotypes;
         this.allowMissingFieldsInHeader = allowMissingFieldsInHeader;
+        this.charset = Charset.forName("ISO-8859-1");
+        this.writer = new OutputStreamWriter(lineBuffer, charset);
     }
 
     // --------------------------------------------------------------------------------
@@ -70,14 +86,44 @@ class VCFWriter extends IndexingVariantContextWriter {
     //
     // --------------------------------------------------------------------------------
 
+    /**
+     * Write String s to the internal buffered writer.
+     *
+     * flushBuffer() must be called to actually write the data to the true output stream.
+     *
+     * @param s the string to write
+     * @throws IOException
+     */
+    private void write(final String s) throws IOException {
+        writer.write(s);
+    }
+
+    /**
+     * Actually write the line buffer contents to the destination output stream.
+     *
+     * After calling this function the line buffer is reset, so the contents of the buffer can be reused
+     *
+     * @throws IOException
+     */
+    private void flushBuffer() throws IOException {
+        writer.flush();
+        getOutputStream().write(lineBuffer.toByteArray());
+        lineBuffer.reset();
+    }
+
     @Override
     public void writeHeader(VCFHeader header) {
         // note we need to update the mHeader object after this call because they header
         // may have genotypes trimmed out of it, if doNotWriteGenotypes is true
-        mHeader = writeHeader(header, mWriter, doNotWriteGenotypes, getVersionLine(), getStreamName());
+        try {
+            mHeader = writeHeader(header, writer, doNotWriteGenotypes, getVersionLine(), getStreamName());
+            flushBuffer();
+        } catch ( IOException e ) {
+            throw new UserException.CouldNotCreateOutputFile(getStreamName(), e);
+        }
     }
 
-    public static final String getVersionLine() {
+    public static String getVersionLine() {
         return VERSION_LINE;
     }
 
@@ -138,8 +184,8 @@ class VCFWriter extends IndexingVariantContextWriter {
     public void close() {
         // try to close the vcf stream
         try {
-            mWriter.flush();
-            mWriter.close();
+            // TODO -- would it be useful to null out the line buffer so we don't have it around unnecessarily?
+            writer.close();
         } catch (IOException e) {
             throw new ReviewedStingException("Unable to close " + getStreamName(), e);
         }
@@ -166,51 +212,51 @@ class VCFWriter extends IndexingVariantContextWriter {
             Map<Allele, String> alleleMap = buildAlleleMap(vc);
 
             // CHROM
-            mWriter.write(vc.getChr());
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(vc.getChr());
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // POS
-            mWriter.write(String.valueOf(vc.getStart()));
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(String.valueOf(vc.getStart()));
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // ID
             String ID = vc.getID();
-            mWriter.write(ID);
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(ID);
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // REF
             String refString = vc.getReference().getDisplayString();
-            mWriter.write(refString);
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(refString);
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // ALT
             if ( vc.isVariant() ) {
                 Allele altAllele = vc.getAlternateAllele(0);
                 String alt = altAllele.getDisplayString();
-                mWriter.write(alt);
+                write(alt);
 
                 for (int i = 1; i < vc.getAlternateAlleles().size(); i++) {
                     altAllele = vc.getAlternateAllele(i);
                     alt = altAllele.getDisplayString();
-                    mWriter.write(",");
-                    mWriter.write(alt);
+                    write(",");
+                    write(alt);
                 }
             } else {
-                mWriter.write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
+                write(VCFConstants.EMPTY_ALTERNATE_ALLELE_FIELD);
             }
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // QUAL
             if ( !vc.hasLog10PError() )
-                mWriter.write(VCFConstants.MISSING_VALUE_v4);
+                write(VCFConstants.MISSING_VALUE_v4);
             else
-                mWriter.write(formatQualValue(vc.getPhredScaledQual()));
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+                write(formatQualValue(vc.getPhredScaledQual()));
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // FILTER
             String filters = getFilterString(vc);
-            mWriter.write(filters);
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(filters);
+            write(VCFConstants.FIELD_SEPARATOR);
 
             // INFO
             Map<String, String> infoFields = new TreeMap<String, String>();
@@ -229,8 +275,8 @@ class VCFWriter extends IndexingVariantContextWriter {
             // FORMAT
             final GenotypesContext gc = vc.getGenotypes();
             if ( gc.isLazyWithData() && ((LazyGenotypesContext)gc).getUnparsedGenotypeData() instanceof String ) {
-                mWriter.write(VCFConstants.FIELD_SEPARATOR);
-                mWriter.write(((LazyGenotypesContext)gc).getUnparsedGenotypeData().toString());
+                write(VCFConstants.FIELD_SEPARATOR);
+                write(((LazyGenotypesContext) gc).getUnparsedGenotypeData().toString());
             } else {
                 List<String> genotypeAttributeKeys = calcVCFGenotypeKeys(vc, mHeader);
                 if ( ! genotypeAttributeKeys.isEmpty() ) {
@@ -240,16 +286,17 @@ class VCFWriter extends IndexingVariantContextWriter {
 
                     final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
 
-                    mWriter.write(VCFConstants.FIELD_SEPARATOR);
-                    mWriter.write(genotypeFormatString);
+                    write(VCFConstants.FIELD_SEPARATOR);
+                    write(genotypeFormatString);
 
                     addGenotypeData(vc, alleleMap, genotypeAttributeKeys);
                 }
             }
             
-            mWriter.write("\n");
+            write("\n");
             // note that we cannot call flush here if we want block gzipping to work properly
             // calling flush results in all gzipped blocks for each variant
+            flushBuffer();
         } catch (IOException e) {
             throw new RuntimeException("Unable to write the VCF object to " + getStreamName(), e);
         }
@@ -305,7 +352,7 @@ class VCFWriter extends IndexingVariantContextWriter {
      */
     private void writeInfoString(Map<String, String> infoFields) throws IOException {
         if ( infoFields.isEmpty() ) {
-            mWriter.write(VCFConstants.EMPTY_INFO_FIELD);
+            write(VCFConstants.EMPTY_INFO_FIELD);
             return;
         }
 
@@ -314,16 +361,16 @@ class VCFWriter extends IndexingVariantContextWriter {
             if ( isFirst )
                 isFirst = false;
             else
-                mWriter.write(VCFConstants.INFO_FIELD_SEPARATOR);
+                write(VCFConstants.INFO_FIELD_SEPARATOR);
 
             String key = entry.getKey();
-            mWriter.write(key);
+            write(key);
 
             if ( !entry.getValue().equals("") ) {
                 VCFInfoHeaderLine metaData = mHeader.getInfoHeaderLine(key);
                 if ( metaData == null || metaData.getCountType() != VCFHeaderLineCount.INTEGER || metaData.getCount() != 0 ) {
-                    mWriter.write("=");
-                    mWriter.write(entry.getValue());
+                    write("=");
+                    write(entry.getValue());
                 }
             }
         }
@@ -342,7 +389,7 @@ class VCFWriter extends IndexingVariantContextWriter {
         final int ploidy = vc.getMaxPloidy(2);
 
         for ( String sample : mHeader.getGenotypeSamples() ) {
-            mWriter.write(VCFConstants.FIELD_SEPARATOR);
+            write(VCFConstants.FIELD_SEPARATOR);
 
             Genotype g = vc.getGenotype(sample);
             if ( g == null ) g = GenotypeBuilder.createMissing(sample, ploidy);
@@ -356,7 +403,7 @@ class VCFWriter extends IndexingVariantContextWriter {
 
                     writeAllele(g.getAllele(0), alleleMap);
                     for (int i = 1; i < g.getPloidy(); i++) {
-                        mWriter.write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
+                        write(g.isPhased() ? VCFConstants.PHASED : VCFConstants.UNPHASED);
                         writeAllele(g.getAllele(i), alleleMap);
                     }
 
@@ -420,8 +467,8 @@ class VCFWriter extends IndexingVariantContextWriter {
 
             for (int i = 0; i < attrs.size(); i++) {
                 if ( i > 0 || genotypeFormatKeys.contains(VCFConstants.GENOTYPE_KEY) )
-                    mWriter.write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
-                mWriter.write(attrs.get(i));
+                    write(VCFConstants.GENOTYPE_FIELD_SEPARATOR);
+                write(attrs.get(i));
             }
         }
     }
@@ -435,7 +482,7 @@ class VCFWriter extends IndexingVariantContextWriter {
         String encoding = alleleMap.get(allele);
         if ( encoding == null )
             throw new TribbleException.InternalCodecException("Allele " + allele + " is not an allele in the variant context");
-        mWriter.write(encoding);
+        write(encoding);
     }
 
     /**

From 696bf95fbadfbe2a23d6169899ac2904150d033d Mon Sep 17 00:00:00 2001
From: Eric Banks <ebanks@broadinstitute.org>
Date: Thu, 13 Dec 2012 23:28:30 +0000
Subject: [PATCH 235/236] Fix for PBT bug reported on the forum: the AD is
 actually output correctly now (rather than with 'null' or some gibberish
 memory pointer).

---
 .../walkers/phasing/PhaseByTransmission.java  | 26 ++++++++++++++-----
 .../PhaseByTransmissionIntegrationTest.java   | 12 ++++-----
 2 files changed, 25 insertions(+), 13 deletions(-)

diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
index 7ebfec49e..660942b5b 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
@@ -811,9 +811,9 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
                     updateTrioMetricsCounters(phasedMother,phasedFather,phasedChild,mvCount,metricsCounters);
                     mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
                             vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
-                            phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),
-                            phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
-                            phasedChild.getGenotypeString(),Arrays.asList(phasedChild.getDP()),phasedChild.getAD(),phasedChild.getLikelihoodsString());
+                            phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),
+                            phasedMother.getLikelihoodsString(), phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
+                            phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
                     if(!(phasedMother.getType()==mother.getType() && phasedFather.getType()==father.getType() && phasedChild.getType()==child.getType()))
                         metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
                 }
@@ -823,8 +823,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
                         metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
                     mvfLine = String.format("%s\t%d\t%s\t%s\t%s\t%s:%s:%s:%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s",
                             vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
-                            phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),Arrays.asList(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
-                            phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
+                            phasedMother.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedMother.getGenotypeString(),phasedMother.getDP(),printAD(phasedMother.getAD()),phasedMother.getLikelihoodsString(),
+                            phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
                 }
             }
             else{
@@ -834,8 +834,8 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
                     metricsCounters.put(NUM_GENOTYPES_MODIFIED,metricsCounters.get(NUM_GENOTYPES_MODIFIED)+1);
                 mvfLine =   String.format("%s\t%d\t%s\t%s\t%s\t.\t.\t.\t.\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s",
                         vc.getChr(),vc.getStart(),vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY),sample.getFamilyID(),
-                        phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),Arrays.asList(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
-                        phasedChild.getGenotypeString(),phasedChild.getDP(),Arrays.asList(phasedChild.getAD()),phasedChild.getLikelihoodsString());
+                        phasedFather.getExtendedAttribute(TRANSMISSION_PROBABILITY_TAG_NAME),phasedFather.getGenotypeString(),phasedFather.getDP(),printAD(phasedFather.getAD()),phasedFather.getLikelihoodsString(),
+                        phasedChild.getGenotypeString(),phasedChild.getDP(),printAD(phasedChild.getAD()),phasedChild.getLikelihoodsString());
             }
 
             //Report violation if set so
@@ -850,6 +850,18 @@ public class PhaseByTransmission extends RodWalker<HashMap<Byte,Integer>, HashMa
         return metricsCounters;
     }
 
+    private static String printAD(final int[] AD) {
+        if ( AD == null || AD.length == 0 )
+            return ".";
+        final StringBuilder sb = new StringBuilder();
+        sb.append(AD[0]);
+        for ( int i = 1; i < AD.length; i++) {
+            sb.append(",");
+            sb.append(AD[i]);
+        }
+        return sb.toString();
+    }
+
     /**
      * Initializes the reporting counters.
      *
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
index 9b0fbf650..4208f6c07 100644
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
@@ -29,7 +29,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 2,
-                Arrays.asList("f4b0b5471e03306ee2fad27d88b217b6","f8721f4f5d3bae2848ae15c3f120709b")
+                Arrays.asList("af979bcb353edda8dee2127605c71daf","1ea9994f937012e8de599ec7bcd62a0e")
         );
         executeTest("testTrueNegativeMV", spec);
     }
@@ -48,7 +48,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 2,
-                Arrays.asList("dbc64776dcc9e01a468b61e4e0db8277","547fdfef393f3045a96d245ef6af8acb")
+                Arrays.asList("1dc36ff8d1d5f5d2c1c1bf21517263bf","547fdfef393f3045a96d245ef6af8acb")
         );
         executeTest("testTruePositiveMV", spec);
     }
@@ -67,7 +67,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 2,
-                Arrays.asList("37793e78861bb0bc070884da67dc10e6","9529e2bf214d72e792d93fbea22a3b91")
+                Arrays.asList("ae60f2db6102ca1f4e93cd18d0634d7a","9529e2bf214d72e792d93fbea22a3b91")
         );
         executeTest("testFalsePositiveMV", spec);
     }
@@ -86,7 +86,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 2,
-                Arrays.asList("e4da7639bb542d6440975da12b94973f","8c157d79dd00063d2932f0d2b96f53d8")
+                Arrays.asList("590ee56e745984296f73e4277277eac7","8c157d79dd00063d2932f0d2b96f53d8")
         );
         executeTest("testSpecialCases", spec);
     }
@@ -108,7 +108,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-o %s"
                 ),
                 2,
-                Arrays.asList("ab92b714471a000285577d540e1fdc2e","343e418850ae4a687ebef2acd55fcb07")
+                Arrays.asList("78158d738917b8f0b7a736a1739b2cc5","343e418850ae4a687ebef2acd55fcb07")
         );
         executeTest("testPriorOption", spec);
     }
@@ -149,7 +149,7 @@ public class PhaseByTransmissionIntegrationTest extends WalkerTest {
                         "-fatherAlleleFirst"
                 ),
                 2,
-                Arrays.asList("4b937c1b4e96602a7479b07b59254d06","52ffa82428e63ade22ea37b72ae58492")
+                Arrays.asList("dc6afb769b55e6038677fa590b2b2e89","52ffa82428e63ade22ea37b72ae58492")
         );
         executeTest("testFatherAlleleFirst", spec);
     }

From 5f1afb41368407db592073d3e4d95afff5d0919a Mon Sep 17 00:00:00 2001
From: Mauricio Carneiro <carneiro@broadinstitute.org>
Date: Thu, 13 Dec 2012 21:56:44 -0500
Subject: [PATCH 236/236] Fixing an off-by-one clipping error in ReduceReads
 for reads off the contig

Reads that are soft-clipped off the contig (before the beginning of the contig) were being soft-clipped to position 0 instead of 1 because of an off-by-one issue. Fixed and included in the integration test.
---
 .../sting/gatk/walkers/compression/reducereads/ReduceReads.java | 2 +-
 .../compression/reducereads/ReduceReadsIntegrationTest.java     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
index 2061c5364..39a284d98 100644
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReads.java
@@ -300,7 +300,7 @@ public class ReduceReads extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceRea
         // Check if the read goes beyond the boundaries of the chromosome, and hard clip those boundaries.
         int chromosomeLength = ref.getGenomeLocParser().getContigInfo(read.getReferenceName()).getSequenceLength();
         if (read.getSoftStart() < 0)
-            read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart() - 1);
+            read = ReadClipper.hardClipByReadCoordinates(read, 0, -read.getSoftStart());
         if (read.getSoftEnd() > chromosomeLength)
             read = ReadClipper.hardClipByReadCoordinates(read, chromosomeLength - read.getSoftStart() + 1, read.getReadLength() - 1);
 
diff --git a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
index f0e8b76d4..446c3cfc3 100755
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@@ -94,7 +94,7 @@ public class ReduceReadsIntegrationTest extends WalkerTest {
     @Test(enabled = true)
     public void testReadOffContig() {
         String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, OFFCONTIG_BAM) + " -o %s ";
-        executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("53e16367d333da0b7d40a7683a35c95f")));
+        executeTest("testReadOffContig", new WalkerTestSpec(base, Arrays.asList("2f17c1a78e9d0138217fdb83cede8f68")));
     }
 
 }