Moving ReduceReads into protected

2012-06-25 17:00:56 -04:00 · 2012-06-25 17:00:56 -04:00 · 12d1c594df
parent 2d6511b9ac
commit 12d1c594df
15 changed files with 2968 additions and 0 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseAndQualsCounts.java
@ -0,0 +1,56 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An object that keeps track of the base counts as well as the sum of the base, insertion and deletion qualities of each base.
+ *
+ * @author Mauricio Carneiro
+ * @since 6/15/12
+ */
+public class BaseAndQualsCounts extends BaseCounts {
+    private final Map<BaseIndex, Long> sumInsertionQuals;
+    private final Map<BaseIndex, Long> sumDeletionQuals;
+
+    public BaseAndQualsCounts() {
+        super();
+        this.sumInsertionQuals = new HashMap<BaseIndex, Long>();
+        this.sumDeletionQuals  = new HashMap<BaseIndex, Long>();
+        for (BaseIndex i : BaseIndex.values()) {
+            sumInsertionQuals.put(i, 0L);
+            sumDeletionQuals.put(i, 0L);
+        }
+    }
+
+    public void incr(byte base, byte baseQual, byte insQual, byte delQual) {
+        super.incr(base, baseQual);
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) {                                                                                                // do not allow Ns
+            sumInsertionQuals.put(i, sumInsertionQuals.get(i) + insQual);
+            sumDeletionQuals.put(i, sumDeletionQuals.get(i) + delQual);
+        }
+    }
+
+    public void decr(byte base, byte baseQual, byte insQual, byte delQual) {
+        super.decr(base, baseQual);
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) {                                                                                                // do not allow Ns
+            sumInsertionQuals.put(i, sumInsertionQuals.get(i) - insQual);
+            sumDeletionQuals.put(i, sumDeletionQuals.get(i) - delQual);
+        }
+    }
+
+    public byte averageInsertionQualsOfMostCommonBase() {
+        return getGenericAverageQualOfMostCommonBase(sumInsertionQuals);
+    }
+
+    public byte averageDeletionQualsOfMostCommonBase() {
+        return getGenericAverageQualOfMostCommonBase(sumDeletionQuals);
+    }
+
+    private byte getGenericAverageQualOfMostCommonBase(Map<BaseIndex, Long> sumQuals) {
+        BaseIndex base = BaseIndex.byteToBase(baseWithMostCounts());
+        return (byte) (sumQuals.get(base) / getCount(base));
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCounts.java
@ -0,0 +1,223 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+
+import java.util.EnumMap;
+import java.util.Map;
+
+/**
+ * An object to keep track of the number of occurences of each base and it's quality.
+ *
+ * User: depristo
+ * Date: 4/8/11
+ * Time: 2:55 PM
+ */
+
+ public class BaseCounts {
+    public final static BaseIndex MAX_BASE_INDEX_WITH_NO_COUNTS = BaseIndex.N;
+    public final static byte MAX_BASE_WITH_NO_COUNTS = MAX_BASE_INDEX_WITH_NO_COUNTS.getByte();
+
+    private final Map<BaseIndex, Integer> counts;   // keeps track of the base counts
+    private final Map<BaseIndex, Long> sumQuals;    // keeps track of the quals of each base
+
+    public BaseCounts() {
+        counts = new EnumMap<BaseIndex, Integer>(BaseIndex.class);
+        sumQuals = new EnumMap<BaseIndex, Long>(BaseIndex.class);
+        for (BaseIndex i : BaseIndex.values()) {
+            counts.put(i, 0);
+            sumQuals.put(i, 0L);
+        }
+    }
+
+    public static BaseCounts createWithCounts(int[] countsACGT) {
+        BaseCounts baseCounts = new BaseCounts();
+        baseCounts.counts.put(BaseIndex.A, countsACGT[0]);
+        baseCounts.counts.put(BaseIndex.C, countsACGT[1]);
+        baseCounts.counts.put(BaseIndex.G, countsACGT[2]);
+        baseCounts.counts.put(BaseIndex.T, countsACGT[3]);
+        return baseCounts;
+    }
+
+    @Requires("other != null")
+    public void add(BaseCounts other) {
+        for (BaseIndex i : BaseIndex.values())
+            counts.put(i, counts.get(i) + other.counts.get(i));
+    }
+
+    @Requires("other != null")
+    public void sub(BaseCounts other) {
+        for (BaseIndex i : BaseIndex.values())
+            counts.put(i, counts.get(i) - other.counts.get(i));
+    }
+
+    @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
+    public void incr(byte base) {
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) // no Ns
+            counts.put(i, counts.get(i) + 1);
+    }
+
+    @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) + 1")
+    public void incr(byte base, byte qual) {
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) { // no Ns
+            counts.put(i, counts.get(i) + 1);
+            sumQuals.put(i, sumQuals.get(i) + qual);
+        }
+    }
+
+    @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
+    public void decr(byte base) {
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) // no Ns
+            counts.put(i, counts.get(i) - 1);
+    }
+
+    @Ensures("totalCount() == old(totalCount()) || totalCount() == old(totalCount()) - 1")
+    public void decr(byte base, byte qual) {
+        BaseIndex i = BaseIndex.byteToBase(base);
+        if (i != null) { // no Ns
+            counts.put(i, counts.get(i) - 1);
+            sumQuals.put(i, sumQuals.get(i) - qual);
+        }
+    }
+
+
+
+    @Ensures("result >= 0")
+    public int getCount(byte base) {
+        return getCount(BaseIndex.byteToBase(base));
+    }
+
+    @Ensures("result >= 0")
+    public int getCount(BaseIndex base) {
+        return counts.get(base);
+    }
+
+    @Ensures("result >= 0")
+    public long getSumQuals(byte base) {
+        return getSumQuals(BaseIndex.byteToBase(base));
+    }
+
+    @Ensures("result >= 0")
+    public long getSumQuals(BaseIndex base) {
+        return sumQuals.get(base);
+    }
+
+    @Ensures("result >= 0")
+    public byte averageQuals(byte base) {
+        return (byte) (getSumQuals(base) / getCount(base));
+    }
+
+    @Ensures("result >= 0")
+    public byte averageQuals(BaseIndex base) {
+        return (byte) (getSumQuals(base) / getCount(base));
+    }
+
+    public byte baseWithMostCounts() {
+        return baseIndexWithMostCounts().getByte();
+    }
+
+    @Ensures("result >= 0")
+    public int countOfMostCommonBase() {
+        return counts.get(baseIndexWithMostCounts());
+    }
+
+    @Ensures("result >= 0")
+    public long sumQualsOfMostCommonBase() {
+        return sumQuals.get(baseIndexWithMostCounts());
+    }
+
+    @Ensures("result >= 0")
+    public byte averageQualsOfMostCommonBase() {
+        return (byte) (sumQualsOfMostCommonBase() / countOfMostCommonBase());
+    }
+
+
+    @Ensures("result >= 0")
+    public int totalCount() {
+        int sum = 0;
+        for (int c : counts.values())
+            sum += c;
+
+        return sum;
+    }
+
+    /**
+     * Given a base , it returns the proportional count of this base compared to all other bases
+     *
+     * @param base
+     * @return the proportion of this base over all other bases
+     */
+    @Ensures({"result >=0.0", "result<= 1.0"})
+    public double baseCountProportion(byte base) {
+        return (double) counts.get(BaseIndex.byteToBase(base)) / totalCount();
+    }
+
+    /**
+     * Given a base , it returns the proportional count of this base compared to all other bases
+     *
+     * @param baseIndex
+     * @return the proportion of this base over all other bases
+     */
+    @Ensures({"result >=0.0", "result<= 1.0"})
+    public double baseCountProportion(BaseIndex baseIndex) {
+        int total = totalCount();
+        if (total == 0)
+            return 0.0;
+        return (double) counts.get(baseIndex) / totalCount();
+    }
+
+
+    @Ensures("result != null")
+    public String toString() {
+        StringBuilder b = new StringBuilder();
+        for (Map.Entry<BaseIndex, Integer> elt : counts.entrySet()) {
+            b.append(elt.toString()).append("=").append(elt.getValue()).append(",");
+        }
+        return b.toString();
+    }
+
+    @Ensures("result != null")
+    public BaseIndex baseIndexWithMostCounts() {
+        BaseIndex maxI = MAX_BASE_INDEX_WITH_NO_COUNTS;
+        for (BaseIndex i : counts.keySet())
+            if (counts.get(i) > counts.get(maxI))
+                maxI = i;
+        return maxI;
+    }
+
+    @Ensures("result != null")
+    public BaseIndex baseIndexWithMostCountsWithoutIndels() {
+        BaseIndex mostCounts = MAX_BASE_INDEX_WITH_NO_COUNTS;
+        for (BaseIndex index : counts.keySet())
+            if (index.isNucleotide() && counts.get(index) > counts.get(mostCounts))
+                mostCounts = index;
+        return mostCounts;
+    }
+
+    @Ensures("result >=0")
+    public int totalCountWithoutIndels() {
+        int sum = 0;
+        for (BaseIndex index : counts.keySet())
+            if (index.isNucleotide())
+                sum += counts.get(index);
+        return sum;
+    }
+
+    /**
+     * Calculates the proportional count of a base compared to all other bases except indels (I and D)
+     *
+     * @param index
+     * @return the proportion of this base over all other bases except indels
+     */
+    @Requires("index.isNucleotide()")
+    @Ensures({"result >=0.0", "result<= 1.0"})
+    public double baseCountProportionWithoutIndels(BaseIndex index) {
+        int total = totalCountWithoutIndels();
+        if (total == 0)
+            return 0.0;
+        return (double) counts.get(index) / totalCountWithoutIndels();
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseIndex.java
@ -0,0 +1,82 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+/**
+ * Simple byte / base index conversions
+ *
+ *
+ * @author carneiro
+ * @since 8/26/11
+ */
+public enum BaseIndex {
+    A ( 'A', 0 ),
+    C ( 'C', 1 ),
+    G ( 'G', 2 ),
+    T ( 'T', 3 ),
+    D ( 'D', 4 ),
+    I ( 'I', 5 ), // insertion to the right of the base
+    N ( 'N', 6 );
+
+    final byte b;
+    final int index;
+
+    public byte getByte() { return b; }
+
+    private BaseIndex(char base, int index) {
+        this.b = (byte)base;
+        this.index = index;
+    }
+
+    /**
+     * Converts a byte representation of a base to BaseIndex
+     *
+     * @param base the byte representation of the base
+     * @return the BaseIndex representation of the base;
+     */
+    public static BaseIndex byteToBase(final byte base) {
+        switch (base) {
+            case 'A':
+            case 'a':
+                return A;
+            case 'C':
+            case 'c':
+                return C;
+            case 'G':
+            case 'g':
+                return G;
+            case 'T':
+            case 't':
+                return T;
+            case 'D':
+            case 'd':
+            case '-':
+                return D;
+            case 'I':
+            case 'i':
+                return I;
+            case 'N':
+            case 'n':
+                return N;
+            default: return null;
+        }
+    }
+
+    /**
+     * Definition of a nucleotide for the BaseIndex is anything that has been read as a base
+     * by the machine (A,C,G,T), even if it couldn't tell which base it was, but it knows
+     * there is a base there (N).
+     *
+     * @return whether or not it is a nucleotide, given the definition above
+     */
+    public boolean isNucleotide() {
+        return this == A || this == C || this == G || this == T || this == N;
+    }
+
+    /**
+     * Whether or not this base is an insertion or a deletion
+     *
+     * @return true for I or D, false otherwise
+     */
+    public boolean isIndel() {
+        return this == D || this == I;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/CompareBAMWalker.java
@ -0,0 +1,182 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.DuplicateReadFilter;
+import org.broadinstitute.sting.gatk.filters.FailsVendorQualityCheckFilter;
+import org.broadinstitute.sting.gatk.filters.NotPrimaryAlignmentFilter;
+import org.broadinstitute.sting.gatk.filters.UnmappedReadFilter;
+import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.LocusWalker;
+import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Given two BAMs with different read groups, it compares them based on ReduceReads metrics.
+ * <p>
+ * This is a test walker used for asserting that the ReduceReads procedure is not making blatant mistakes when compressing bam files.
+ * </p>
+ * <h2>Input</h2>
+ * <p>
+ * Two BAM files (using -I) with different read group IDs
+ * </p>
+ * <h2>Output</h2>
+ * <p>
+ * [Output description]
+ * </p>
+ * <h2>Examples</h2>
+ * <pre>
+ *    java
+ *      -jar GenomeAnalysisTK.jar
+ *      -T $WalkerName
+ *  </pre>
+ *
+ * @author carneiro
+ * @since 10/30/11
+ */
+
+@ReadFilters({UnmappedReadFilter.class,NotPrimaryAlignmentFilter.class,DuplicateReadFilter.class,FailsVendorQualityCheckFilter.class})
+public class CompareBAMWalker extends LocusWalker<Map<CompareBAMWalker.TestName, Boolean>, CompareBAMWalker.TestResults> {
+    @Argument(required = true,  shortName = "rr",  fullName = "reduced_readgroup", doc = "The read group ID corresponding to the compressed BAM being tested") public String reducedReadGroupID;
+    @Argument(required = false, shortName = "teq", fullName = "test_equal_bases",  doc = "Test if the bases marked as '=' are indeed ref bases.")              public boolean TEST_EQUAL_BASES = false;
+    @Argument(required = false, shortName = "tbc", fullName = "test_base_counts",  doc = "Test if the base counts tag in consensus reads are accurate.")       public boolean TEST_BASE_COUNTS = false;
+    @Argument(required = false, shortName = "mbq", fullName = "min_base_qual",     doc = "Minimum base quality to be considered.")                             public int MIN_BASE_QUAL    = 20;
+    @Argument(required = false, shortName = "mmq", fullName = "min_mapping_qual",  doc = "Minimum mapping quality to be considered.")                          public int MIN_MAPPING_QUAL = 20;
+
+
+    @Override
+    public Map<TestName, Boolean> map (RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
+        Map<TestName, Boolean> result = new HashMap<TestName, Boolean>();
+
+        if (TEST_EQUAL_BASES) result.put(TestName.EQUAL_BASES, testEqualBases(ref, context));
+        if (TEST_BASE_COUNTS) result.put(TestName.BASE_COUNTS, testBaseCounts(ref, context));
+
+        return result;
+    }
+
+    @Override
+    public TestResults reduceInit () {
+        TestResults sum = new TestResults();      // a fresh new TestResults object to sum up the results of every object passed by MAP.
+
+        if (TEST_EQUAL_BASES) sum.createTest(TestName.EQUAL_BASES);
+        if (TEST_BASE_COUNTS) sum.createTest(TestName.BASE_COUNTS);
+
+        return sum;
+    }
+
+    @Override
+    public TestResults reduce (Map<TestName,Boolean> mapResult, TestResults sum) {
+        for (TestName test : mapResult.keySet()) {
+            if (mapResult.get(test))
+                sum.reportSuccess(test);
+            else
+                sum.reportFailed(test);
+        }
+
+        return sum;
+    }
+
+    public void onTraversalDone (TestResults finalResults) {
+        finalResults.report();
+    }
+
+    private boolean testEqualBases (ReferenceContext ref, AlignmentContext context) {
+        return true;
+    }
+
+    private boolean testBaseCounts (ReferenceContext ref, AlignmentContext context) {
+
+        return true;
+    }
+
+    public enum TestName {
+        EQUAL_BASES ("testEqualBases"),
+        BASE_COUNTS ("testBaseCounts");
+
+        private String testName;
+
+        TestName(String testName) {
+            this.testName = testName;
+        }
+
+        public String getTestName() {
+            return testName;
+        }
+    }
+
+    public class TestResults {
+        private Map<TestName, TestOutcome> testStats = new HashMap<TestName, TestOutcome>();
+
+        public void createTest (TestName test) {
+            testStats.put(test, new TestOutcome());
+        }
+
+        public void reportSuccess(TestName test) {
+            if (testStats.containsKey(test))
+                testStats.get(test).incPassed();
+            else
+                throw new ReviewedStingException("No such test: " + test);
+        }
+
+        public void reportFailed(TestName test) {
+            if (testStats.containsKey(test))
+                testStats.get(test).incFailed();
+            else
+                throw new ReviewedStingException("No such test: " + test);
+        }
+
+        public void report() {
+            System.out.println();
+            System.out.println(String.format("%20s\tPASS\tFAIL", ""));
+            for (TestName test : testStats.keySet())
+                System.out.println(String.format("%20s\t%d\t%d", test.getTestName(), testStats.get(test).getPassed(), testStats.get(test).getFailed()));
+            System.out.println();
+        }
+    }
+
+    private class TestOutcome {
+        private long passed;
+        private long failed;
+
+        public long getPassed() {
+            return passed;
+        }
+
+        public void incPassed() {
+            this.passed++;
+        }
+
+        public long getFailed() {
+            return failed;
+        }
+
+        public void incFailed() {
+            this.failed++;
+        }
+    }
+
+    private BaseCounts getFilteredBaseCounts(AlignmentContext context) {
+        return getBaseCounts(context, MIN_BASE_QUAL, MIN_MAPPING_QUAL);
+    }
+
+    private BaseCounts getFullBaseCounts(AlignmentContext context) {
+        return getBaseCounts(context, 3, 0);
+    }
+
+    private BaseCounts getBaseCounts(AlignmentContext context, int mbq, int mmq) {
+        BaseCounts fullBaseCounts = new BaseCounts();
+        for (String rg : context.getBasePileup().getReadGroups()) {
+            if (!rg.equals(reducedReadGroupID)) {
+                BaseCounts b = BaseCounts.createWithCounts(context.getBasePileup().getPileupForReadGroup(rg).getBaseAndMappingFilteredPileup(mbq, mmq).getBaseCounts());
+                fullBaseCounts.add(b);
+            }
+        }
+        return fullBaseCounts;
+    }
+
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/Compressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/Compressor.java
@ -0,0 +1,62 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import com.google.java.contract.Ensures;
+import com.google.java.contract.Requires;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: depristo
+ * Date: 4/10/11
+ * Time: 8:49 AM
+ *
+ * A general interface for ReadCompressors.  Read compressors have the following semantics:
+ *
+ * The accept a stream of reads, in order, and after each added read returns a compressed stream
+ * of reads for emission.  This stream of reads is a "reduced" representation of the total stream
+ * of reads.  The actual compression approach is left up to the implementing class.
+ */
+public interface Compressor {
+    /**
+     * Adds the read to the compressor.  The returned iteratable collection of
+     * reads represents the incremental compressed output.
+     * @param read the next uncompressed read in the input stream to the compressor
+     * @return an iterator over the incrementally available compressed reads
+     */
+    @Requires("read != null")
+    @Ensures("result != null")
+    Iterable<GATKSAMRecord> addAlignment(GATKSAMRecord read);
+
+    /**
+     * Must be called after the last read has been added to finalize the compressor state
+     * and return the last compressed reads from the compressor.
+     * @return an iterator over the final compressed reads of this compressor
+     */
+    @Ensures("result != null")
+    Iterable<GATKSAMRecord> close();
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/HeaderElement.java
@ -0,0 +1,204 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.LinkedList;
+
+/**
+ * The element that describes the header of the sliding window.
+ *
+ * Each site has a header element containing the counts of each base, it's reference based location and whether or
+ * not the site has insertions (to it's right). It also contains information about the bases that have been filtered
+ * out due to mapping or base quality.
+ */
+public class HeaderElement {
+    private BaseAndQualsCounts consensusBaseCounts;                                                                     // How many A,C,G,T (and D's) are in this site.
+    private BaseAndQualsCounts filteredBaseCounts;                                                                      // How many A,C,G,T (and D's) were filtered out in this site.
+    private int insertionsToTheRight;                                                                                   // How many reads in this site had insertions to the immediate right
+    private int nSoftClippedBases;                                                                                      // How many bases in this site came from soft clipped bases
+    private int location;                                                                                               // Genome location of this site (the sliding window knows which contig we're at
+    private LinkedList<Integer> mappingQuality;                                                                         // keeps the mapping quality of each read that contributed to this element (site)
+
+    public int getLocation() {
+        return location;
+    }
+
+    public BaseAndQualsCounts getFilteredBaseCounts() {
+        return filteredBaseCounts;
+    }
+
+    public BaseAndQualsCounts getConsensusBaseCounts() {
+        return consensusBaseCounts;
+    }
+
+    /**
+     * Creates a new HeaderElement with the following default values: - empty consensusBaseCounts - empty
+     * filteredBaseCounts - 0 insertions to the right - empty mappingQuality list
+     *
+     * @param location the reference location for the new element
+     */
+    public HeaderElement(int location) {
+        this(new BaseAndQualsCounts(), new BaseAndQualsCounts(), 0, 0, location, new LinkedList<Integer>());
+    }
+
+    /**
+     * Creates a new HeaderElement with all given parameters
+     *
+     * @param consensusBaseCounts  the BaseCounts object for the running consensus synthetic read
+     * @param filteredBaseCounts   the BaseCounts object for the filtered data synthetic read
+     * @param insertionsToTheRight number of insertions to the right of this HeaderElement
+     * @param location             the reference location of this reference element
+     * @param mappingQuality       the list of mapping quality values of all reads that contributed to this
+     *                             HeaderElement
+     */
+    public HeaderElement(BaseAndQualsCounts consensusBaseCounts, BaseAndQualsCounts filteredBaseCounts, int insertionsToTheRight, int nSoftClippedBases, int location, LinkedList<Integer> mappingQuality) {
+        this.consensusBaseCounts = consensusBaseCounts;
+        this.filteredBaseCounts = filteredBaseCounts;
+        this.insertionsToTheRight = insertionsToTheRight;
+        this.nSoftClippedBases = nSoftClippedBases;
+        this.location = location;
+        this.mappingQuality = mappingQuality;
+    }
+
+    /**
+     * Whether or not the site represented by this HeaderElement is variant according to the definitions of variant
+     * by insertion, deletion and mismatches.
+     *
+     * @return true if site is variant by any definition. False otherwise.
+     */
+    public boolean isVariant(double minVariantProportion, double minIndelProportion) {
+        return hasConsensusData() && (isVariantFromInsertions(minIndelProportion) || isVariantFromMismatches(minVariantProportion) || isVariantFromDeletions(minIndelProportion) || isVariantFromSoftClips());
+    }
+
+    /**
+     * Adds a new base to the HeaderElement updating all counts accordingly
+     *
+     * @param base           the base to add
+     * @param baseQual           the base quality
+     * @param baseMappingQuality the mapping quality of the read this base belongs to
+     */
+    public void addBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
+        if (basePassesFilters(baseQual, minBaseQual, baseMappingQuality, minMappingQual))
+            consensusBaseCounts.incr(base, baseQual, insQual, delQual);                                                 // If the base passes filters, it is included in the consensus base counts
+        else
+            filteredBaseCounts.incr(base, baseQual, insQual, delQual);                                                  // If the base fails filters, it is included with the filtered data base counts
+
+        this.mappingQuality.add(baseMappingQuality);                                                                    // Filtered or not, the RMS mapping quality includes all bases in this site
+        nSoftClippedBases += isSoftClipped ? 1 : 0;                                                                     // if this base is softclipped, add the counter
+    }
+
+    public void removeBase(byte base, byte baseQual, byte insQual, byte delQual, int baseMappingQuality, int minBaseQual, int minMappingQual, boolean isSoftClipped) {
+        if (basePassesFilters(baseQual, minBaseQual, baseMappingQuality, minMappingQual))
+            consensusBaseCounts.decr(base, baseQual, insQual, delQual);                                                 // If the base passes filters, it is included in the consensus base counts
+        else
+            filteredBaseCounts.decr(base, baseQual, insQual, delQual);                                                  // If the base fails filters, it is included with the filtered data base counts
+
+        this.mappingQuality.remove((Integer) baseMappingQuality);                                                       // Filtered or not, the RMS mapping quality includes all bases in this site
+        nSoftClippedBases -= isSoftClipped ? 1 : 0;                                                                     // if this base is softclipped, add the counter
+    }
+    /**
+     * Adds an insertions to the right of the HeaderElement and updates all counts accordingly. All insertions
+     * should be added to the right of the element.
+     */
+    public void addInsertionToTheRight() {
+        insertionsToTheRight++;
+    }
+
+    /**
+     * Does this HeaderElement contain consensus data?
+     *
+     * @return whether or not this HeaderElement contains consensus data
+     */
+    public boolean hasConsensusData() {
+        return consensusBaseCounts.totalCount() > 0;
+    }
+
+    /**
+     * Does this HeaderElement contain filtered data?
+     *
+     * @return whether or not this HeaderElement contains filtered data
+     */
+    public boolean hasFilteredData() {
+        return filteredBaseCounts.totalCount() > 0;
+    }
+
+    /**
+     * A HeaderElement is empty if it has no consensus or filtered data
+     *
+     * @return whether or not this HeaderElement has no data
+     */
+    public boolean isEmpty() {
+        return (!hasFilteredData() && !hasConsensusData());
+    }
+
+    /**
+     * The RMS of the mapping qualities of all reads that contributed to this HeaderElement
+     *
+     * @return the RMS of the mapping qualities of all reads that contributed to this HeaderElement
+     */
+    public double getRMS() {
+        return MathUtils.rms(mappingQuality);
+    }
+
+    /**
+     * removes an insertion from this element (if you removed a read that had an insertion)
+     */
+    public void removeInsertionToTheRight() {
+        this.insertionsToTheRight--;
+        if (insertionsToTheRight < 0)
+            throw new ReviewedStingException("Removed too many insertions, header is now negative!");
+    }
+
+    /**
+     * Whether or not the HeaderElement is variant due to excess insertions
+     *
+     * @return whether or not the HeaderElement is variant due to excess insertions
+     */
+    private boolean isVariantFromInsertions(double minIndelProportion) {
+        int numberOfBases = consensusBaseCounts.totalCount();
+        if (numberOfBases == 0 && insertionsToTheRight > 0)
+            return true;  // we only have insertions
+        else if (numberOfBases == 0)
+            return false; // we don't have anything
+
+        // if we have bases and insertions, check the ratio
+        return ((double) insertionsToTheRight / numberOfBases) > minIndelProportion;
+    }
+
+    /**
+     * Whether or not the HeaderElement is variant due to excess deletions
+     *
+     * @return whether or not the HeaderElement is variant due to excess insertions
+     */
+    private boolean isVariantFromDeletions(double minIndelProportion) {
+        return consensusBaseCounts.baseIndexWithMostCounts() == BaseIndex.D || consensusBaseCounts.baseCountProportion(BaseIndex.D) > minIndelProportion;
+    }
+
+    /**
+     * Whether or not the HeaderElement is variant due to excess mismatches
+     *
+     * @return whether or not the HeaderElement is variant due to excess insertions
+     */
+    private boolean isVariantFromMismatches(double minVariantProportion) {
+        BaseIndex mostCommon = consensusBaseCounts.baseIndexWithMostCountsWithoutIndels();
+        double mostCommonProportion = consensusBaseCounts.baseCountProportionWithoutIndels(mostCommon);
+        return mostCommonProportion != 0.0 && mostCommonProportion < (1 - minVariantProportion);
+    }
+
+    /**
+     * This handles the special case where we have more bases that came from soft clips than bases that came from
+     * normal bases by forcing it to become a variant region. We don't want a consensus based on too little information.
+     *
+     * @return true if we had more soft clipped bases contributing to this site than matches/mismatches.
+     */
+    private boolean isVariantFromSoftClips() {
+        return nSoftClippedBases >= (consensusBaseCounts.totalCount() - nSoftClippedBases);
+    }
+
+    private boolean basePassesFilters(byte baseQual, int minBaseQual, int baseMappingQuality, int minMappingQual) {
+        return baseQual >= minBaseQual && baseMappingQuality >= minMappingQual;
+    }
+
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/MultiSampleCompressor.java
@ -0,0 +1,81 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import net.sf.samtools.SAMFileHeader;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.SampleUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/*
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ *
+ * @author depristo
+ */
+public class MultiSampleCompressor implements Compressor {
+    protected static final Logger logger = Logger.getLogger(MultiSampleCompressor.class);
+
+    protected Map<String, SingleSampleCompressor> compressorsPerSample = new HashMap<String, SingleSampleCompressor>();
+
+    public MultiSampleCompressor(SAMFileHeader header,
+                                 final int contextSize,
+                                 final int downsampleCoverage,
+                                 final int minMappingQuality,
+                                 final double minAltProportionToTriggerVariant,
+                                 final double minIndelProportionToTriggerVariant,
+                                 final int minBaseQual,
+                                 final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) {
+        for ( String name : SampleUtils.getSAMFileSamples(header) ) {
+            compressorsPerSample.put(name,
+                    new SingleSampleCompressor(name, contextSize, downsampleCoverage,
+                                    minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
+        }
+    }
+
+    @Override
+    public Iterable<GATKSAMRecord> addAlignment(GATKSAMRecord read) {
+        String sample = read.getReadGroup().getSample();
+        SingleSampleCompressor compressor = compressorsPerSample.get(sample);
+        if ( compressor == null )
+            throw new ReviewedStingException("No compressor for sample " + sample);
+        return compressor.addAlignment(read);
+    }
+
+    @Override
+    public Iterable<GATKSAMRecord> close() {
+        SortedSet<GATKSAMRecord> reads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        for ( SingleSampleCompressor comp : compressorsPerSample.values() )
+            for ( GATKSAMRecord read : comp.close() )
+                reads.add(read);
+        return reads;
+    }
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsStash.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsStash.java
@ -0,0 +1,110 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+
+import java.util.LinkedList;
+import java.util.List;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+/**
+ * This class implements a "read stash" that keeps reads always sorted in alignment order. Useful
+ * for read walkers that alter the alignment information of the incoming reads, but need to
+ * maintain the reads sorted for the reduce step. (e.g. ReduceReads)
+ */
+
+public class ReduceReadsStash {
+    protected MultiSampleCompressor compressor;
+    SortedSet<GATKSAMRecord> outOfOrderReads;
+
+    /**
+     * Creates a stash with the default sorting order (read alignment)
+     * @param compressor the MultiSampleCompressor object to be used with this stash (for stash.close())
+     */
+    public ReduceReadsStash(MultiSampleCompressor compressor) {
+        this.compressor = compressor;
+        this.outOfOrderReads = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+    }
+
+    /**
+     * Get all reads before a given read (for processing)
+     *
+     * @param read the original read
+     * @return all reads that have alignment start before the original read.
+     */
+    public List<GATKSAMRecord> getAllReadsBefore(GATKSAMRecord read) {
+        List<GATKSAMRecord> result = new LinkedList<GATKSAMRecord>();
+        GATKSAMRecord newHead = null;
+
+        for (GATKSAMRecord stashedRead : outOfOrderReads) {
+            if (ReadUtils.compareSAMRecords(stashedRead, read) <= 0)
+                result.add(stashedRead);
+            else {
+                newHead = stashedRead;
+                break;
+            }
+        }
+
+        if (result.size()  > 0) {
+            if (result.size() == outOfOrderReads.size())
+                outOfOrderReads.clear();
+            else
+                outOfOrderReads = new TreeSet<GATKSAMRecord>(outOfOrderReads.tailSet(newHead));
+        }
+
+        return result;
+    }
+
+    /**
+     * sends the read to the MultiSampleCompressor
+     *
+     * @param read the read to be compressed
+     * @return any compressed reads that may have resulted from adding this read to the machinery (due to the sliding window)
+     */
+    public Iterable<GATKSAMRecord> compress(GATKSAMRecord read) {
+        return compressor.addAlignment(read);
+    }
+
+    /**
+     * Add a read to the stash
+     *
+     * @param read any read
+     */
+    public void add(GATKSAMRecord read) {
+        outOfOrderReads.add(read);
+    }
+
+    /**
+     * Close the stash, processing all remaining reads in order
+     *
+     * @return a list of all the reads produced by the SlidingWindow machinery)
+     */
+    public Iterable<GATKSAMRecord> close() {
+        LinkedList<GATKSAMRecord> result = new LinkedList<GATKSAMRecord>();
+
+        // compress all the stashed reads (in order)
+        for (GATKSAMRecord read : outOfOrderReads)
+            for (GATKSAMRecord compressedRead : compressor.addAlignment(read))
+                result.add(compressedRead);
+
+        // output any remaining reads from the compressor
+        for (GATKSAMRecord read : compressor.close())
+            result.add(read);
+
+        return result;
+    }
+
+    /**
+     * Useful debug functionality, outputs all elements in the stash
+     */
+    public void print() {
+        int i = 1;
+        System.out.println("Stash Contents:");
+        for (GATKSAMRecord read : outOfOrderReads) 
+            System.out.println(String.format("%3d: %s %d %d", i++, read.getCigarString(), read.getAlignmentStart(), read.getAlignmentEnd()));
+        System.out.println();
+    }
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsWalker.java
@ -0,0 +1,665 @@
+/*
+ * Copyright (c) 2010 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.util.SequenceUtil;
+import org.broadinstitute.sting.commandline.Argument;
+import org.broadinstitute.sting.commandline.Hidden;
+import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
+import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
+import org.broadinstitute.sting.gatk.filters.*;
+import org.broadinstitute.sting.gatk.io.StingSAMFileWriter;
+import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
+import org.broadinstitute.sting.gatk.walkers.PartitionBy;
+import org.broadinstitute.sting.gatk.walkers.PartitionType;
+import org.broadinstitute.sting.gatk.walkers.ReadFilters;
+import org.broadinstitute.sting.gatk.walkers.ReadWalker;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocComparator;
+import org.broadinstitute.sting.utils.Utils;
+import org.broadinstitute.sting.utils.clipping.ReadClipper;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+
+import java.util.*;
+
+/**
+ * Reduces the BAM file using read based compression that keeps only essential information for variant calling
+ * <p/>
+ * <p>
+ * This walker will generated reduced versions of the BAM files that still follow the BAM spec
+ * and contain all the information necessary for the GSA variant calling pipeline. Some options
+ * allow you to tune in how much compression you want to achieve. The default values have been
+ * shown to reduce a typical whole exome BAM file 100x. The higher the coverage, the bigger the
+ * savings in file size and performance of the downstream tools.
+ * <p/>
+ * <h2>Input</h2>
+ * <p>
+ * The BAM file to be compressed
+ * </p>
+ * <p/>
+ * <h2>Output</h2>
+ * <p>
+ * The compressed (reduced) BAM file.
+ * </p>
+ * <p/>
+ * <h2>Examples</h2>
+ * <pre>
+ * java -Xmx4g -jar GenomeAnalysisTK.jar \
+ *   -R ref.fasta \
+ *   -T ReduceReads \
+ *   -I myData.bam \
+ *   -o myData.reduced.bam
+ * </pre>
+ */
+
+@PartitionBy(PartitionType.INTERVAL)
+@ReadFilters({UnmappedReadFilter.class, NotPrimaryAlignmentFilter.class, DuplicateReadFilter.class, FailsVendorQualityCheckFilter.class, BadCigarFilter.class})
+public class ReduceReadsWalker extends ReadWalker<LinkedList<GATKSAMRecord>, ReduceReadsStash> {
+
+    @Output
+    protected StingSAMFileWriter out;
+
+    /**
+     * The number of bases to keep around mismatches (potential variation)
+     */
+    @Argument(fullName = "context_size", shortName = "cs", doc = "", required = false)
+    protected int contextSize = 10;
+
+    /**
+     * The minimum mapping quality to be considered for the consensus synthetic read. Reads that have
+     * mapping quality below this threshold will not be counted towards consensus, but are still counted
+     * towards variable regions.
+     */
+    @Argument(fullName = "minimum_mapping_quality", shortName = "minmap", doc = "", required = false)
+    protected int minMappingQuality = 20;
+
+    /**
+     * The minimum base quality to be considered for the consensus synthetic read. Reads that have
+     * base quality below this threshold will not be counted towards consensus, but are still counted
+     * towards variable regions.
+     */
+    @Argument(fullName = "minimum_base_quality_to_consider", shortName = "minqual", doc = "", required = false)
+    protected byte minBaseQual = 20;
+
+    /**
+     * Reads have notoriously low quality bases on the tails (left and right). Consecutive bases with quality
+     * lower than this threshold will be hard clipped off before entering the reduce reads algorithm.
+     */
+    @Argument(fullName = "minimum_tail_qualities", shortName = "mintail", doc = "", required = false)
+    protected byte minTailQuality = 2;
+
+    /**
+     * Do not simplify read (strip away all extra information of the read -- anything other than bases, quals
+     * and read group).
+     */
+    @Argument(fullName = "dont_simplify_reads", shortName = "nosimplify", doc = "", required = false)
+    protected boolean DONT_SIMPLIFY_READS = false;
+
+    /**
+     * Do not hard clip adaptor sequences. Note: You don't have to turn this on for reads that are not mate paired.
+     * The program will behave correctly in those cases.
+     */
+    @Argument(fullName = "dont_hardclip_adaptor_sequences", shortName = "noclip_ad", doc = "", required = false)
+    protected boolean DONT_CLIP_ADAPTOR_SEQUENCES = false;
+
+    /**
+     * Do not hard clip the low quality tails of the reads. This option overrides the argument of minimum tail
+     * quality.
+     */
+    @Argument(fullName = "dont_hardclip_low_qual_tails", shortName = "noclip_tail", doc = "", required = false)
+    protected boolean DONT_CLIP_LOW_QUAL_TAILS = false;
+
+    /**
+     * Do not use high quality soft-clipped bases. By default, ReduceReads will hard clip away any low quality soft clipped
+     * base left by the aligner and use the high quality soft clipped bases in it's traversal algorithm to identify variant
+     * regions. The minimum quality for soft clipped bases is the same as the minimum base quality to consider (minqual)
+     */
+    @Argument(fullName = "dont_use_softclipped_bases", shortName = "no_soft", doc = "", required = false)
+    protected boolean DONT_USE_SOFTCLIPPED_BASES = false;
+
+    /**
+     * Do not compress read names. By default, ReduceReads will compress read names to numbers and guarantee 
+     * uniqueness and reads with similar name will still have similar compressed names. Note: If you scatter/gather
+     * there is no guarantee that read name uniqueness will be maintained -- in this case we recommend not compressing. 
+     */
+    @Argument(fullName = "dont_compress_read_names", shortName = "nocmp_names", doc = "", required = false)
+    protected boolean DONT_COMPRESS_READ_NAMES = false;
+
+    /**
+     * Minimum proportion of mismatches in a site to trigger a variant region. Anything below this will be
+     * considered consensus.
+     */
+    @Argument(fullName = "minimum_alt_proportion_to_trigger_variant", shortName = "minvar", doc = "", required = false)
+    protected double minAltProportionToTriggerVariant = 0.05;
+
+    /**
+     * Minimum proportion of indels in a site to trigger a variant region. Anything below this will be
+     * considered consensus.
+     */
+    @Argument(fullName = "minimum_del_proportion_to_trigger_variant", shortName = "mindel", doc = "", required = false)
+    protected double minIndelProportionToTriggerVariant = 0.01;
+
+    /**
+     * Downsamples the coverage of a variable region approximately (guarantees the minimum to be equal to this).
+     * A value of 0 turns downsampling off.
+     */
+    @Argument(fullName = "downsample_coverage", shortName = "ds", doc = "", required = false)
+    protected int downsampleCoverage = 0;
+
+    @Hidden
+    @Argument(fullName = "", shortName = "dl", doc = "", required = false)
+    protected int debugLevel = 0;
+
+    @Hidden
+    @Argument(fullName = "", shortName = "dr", doc = "", required = false)
+    protected String debugRead = "";
+
+    @Hidden
+    @Argument(fullName = "downsample_strategy", shortName = "dm", doc = "", required = false)
+    protected DownsampleStrategy downsampleStrategy = DownsampleStrategy.Normal;
+    
+    @Hidden 
+    @Argument(fullName = "no_pg_tag", shortName = "npt", doc ="", required = false)
+    private boolean NO_PG_TAG = false;
+
+    public enum DownsampleStrategy {
+        Normal,
+        Adaptive
+    }
+    
+    protected int totalReads = 0;
+    int nCompressedReads = 0;
+
+    HashMap<String, Long> readNameHash;                                     // This hash will keep the name of the original read the new compressed name (a number).
+    Long nextReadNumber = 1L;                                               // The next number to use for the compressed read name.
+
+    SortedSet<GenomeLoc> intervalList;
+    
+    private static final String PROGRAM_RECORD_NAME = "GATK ReduceReads";   // The name that will go in the @PG tag
+
+    /**
+     * Basic generic initialization of the readNameHash and the intervalList. Output initialization
+     * is done at the reduceInit method
+     */
+    @Override
+    public void initialize() {
+        super.initialize();
+        GenomeAnalysisEngine toolkit = getToolkit();
+        readNameHash = new HashMap<String, Long>();                         // prepare the read name hash to keep track of what reads have had their read names compressed
+        intervalList = new TreeSet<GenomeLoc>(new GenomeLocComparator());   // get the interval list from the engine. If no interval list was provided, the walker will work in WGS mode
+
+        if (toolkit.getIntervals() != null)
+            intervalList.addAll(toolkit.getIntervals());
+
+        if (!NO_PG_TAG)
+            Utils.setupWriter(out, toolkit, false, true, this, PROGRAM_RECORD_NAME);
+        else
+            out.setPresorted(false);
+    }
+
+    /**
+     * Takes in a read and prepares it for the SlidingWindow machinery by performing the
+     * following optional clipping operations:
+     * 1. Hard clip adaptor sequences
+     * 2. Hard clip low quality tails
+     * 3. Hard clip all remaining soft clipped bases
+     * 4. Hard clip read to the intervals in the interval list (this step may produce multiple reads)
+     *
+     * @param ref             default map parameter
+     * @param read            default map parameter
+     * @param metaDataTracker default map parameter
+     * @return a linked list with all the reads produced by the clipping operations
+     */
+    @Override
+    public LinkedList<GATKSAMRecord> map(ReferenceContext ref, GATKSAMRecord read, ReadMetaDataTracker metaDataTracker) {
+        LinkedList<GATKSAMRecord> mappedReads;
+        totalReads++;
+        if (!debugRead.isEmpty() && read.getReadName().contains(debugRead))
+            System.out.println("Found debug read!");
+
+        if (debugLevel == 1)
+            System.out.printf("\nOriginal: %s %s %d %d\n", read, read.getCigar(), read.getAlignmentStart(), read.getAlignmentEnd());
+
+        // we write the actual alignment starts to their respectiv alignment shift tags in the temporary
+        // attribute hash so we can determine later if we need to write down the alignment shift to the reduced BAM file
+        read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, read.getAlignmentStart());
+        read.setTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, read.getAlignmentEnd());
+        
+        if (!DONT_SIMPLIFY_READS)
+            read.simplify();                                                                                            // Clear all unnecessary attributes
+        if (!DONT_CLIP_ADAPTOR_SEQUENCES)
+            read = ReadClipper.hardClipAdaptorSequence(read);                                                           // Strip away adaptor sequences, if any.
+        if (!DONT_CLIP_LOW_QUAL_TAILS)
+            read = ReadClipper.hardClipLowQualEnds(read, minTailQuality);                                               // Clip low quality tails
+        if (!isWholeGenome())
+            mappedReads = hardClipReadToInterval(read);                                                                 // Hard clip the remainder of the read to the desired interval
+        else {
+            mappedReads = new LinkedList<GATKSAMRecord>();
+            if (!read.isEmpty())
+                mappedReads.add(read);
+        }
+
+        if (!mappedReads.isEmpty() && !DONT_USE_SOFTCLIPPED_BASES) {
+            LinkedList<GATKSAMRecord> tempList = new LinkedList<GATKSAMRecord>();
+            for (GATKSAMRecord mRead : mappedReads) {
+                GATKSAMRecord clippedRead = ReadClipper.hardClipLowQualitySoftClips(mRead, minBaseQual);
+                if (!clippedRead.isEmpty())
+                    tempList.add(clippedRead);
+            }
+            mappedReads = tempList;
+        }
+
+        if (debugLevel == 1)
+            for (GATKSAMRecord mappedRead : mappedReads)
+                System.out.printf("MAPPED: %s %d %d\n", mappedRead.getCigar(), mappedRead.getAlignmentStart(), mappedRead.getAlignmentEnd());
+
+        return mappedReads;
+
+    }
+
+    /**
+     * Initializes the ReduceReadsStash that keeps track of all reads that are waiting to
+     * enter the SlidingWindow machinery. The stash makes sure reads are served in order
+     * even though map() may generate reads that are only supposed to enter the machinery
+     * in the future.
+     *
+     * @return the empty stash
+     */
+    @Override
+    public ReduceReadsStash reduceInit() {
+        return new ReduceReadsStash(new MultiSampleCompressor(getToolkit().getSAMFileHeader(), contextSize, downsampleCoverage, minMappingQuality, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, downsampleStrategy));
+    }
+
+    /**
+     * Takes the list of reads produced by map(), adds them to the stash (which keeps them sorted) and process
+     * all reads that come before the original read (the read that was passed to map) including the original
+     * read. This is where we send reads, in order, to the SlidingWindow machinery.
+     *
+     * @param mappedReads the list of reads sent by map
+     * @param stash       the stash that keeps the reads in order for processing
+     * @return the stash with all reads that have not been processed yet
+     */
+    public ReduceReadsStash reduce(LinkedList<GATKSAMRecord> mappedReads, ReduceReadsStash stash) {
+        if (debugLevel == 1)
+            stash.print();
+
+        boolean firstRead = true;
+        for (GATKSAMRecord read : mappedReads) {
+            boolean originalRead = firstRead && isOriginalRead(mappedReads, read);
+
+            if (read.getReadLength() == 0)
+                throw new ReviewedStingException("Empty read sent to reduce, this should never happen! " + read.getReadName() + " -- " + read.getCigar() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd());
+
+            if (originalRead) {
+                List<GATKSAMRecord> readsReady = new LinkedList<GATKSAMRecord>();
+                readsReady.addAll(stash.getAllReadsBefore(read));
+                readsReady.add(read);
+
+                for (GATKSAMRecord readReady : readsReady) {
+                    if (debugLevel == 1)
+                        System.out.println("REDUCE: " + readReady.getCigar() + " " + readReady.getAlignmentStart() + " " + readReady.getAlignmentEnd());
+
+                    for (GATKSAMRecord compressedRead : stash.compress(readReady))
+                        outputRead(compressedRead);
+
+                }
+            } else
+                stash.add(read);
+
+            firstRead = false;
+        }
+
+        return stash;
+    }
+
+    /**
+     * Now that now more reads will come, we process all the remaining reads in the stash, in order.
+     *
+     * @param stash the ReduceReadsStash with all unprocessed reads (from reduce)
+     */
+    @Override
+    public void onTraversalDone(ReduceReadsStash stash) {
+
+        // output any remaining reads in the compressor
+        for (GATKSAMRecord read : stash.close())
+            outputRead(read);
+    }
+
+    /**
+     * Hard clips away all parts of the read that doesn't agree with the intervals selected.
+     *
+     * Note: If read overlaps more than one interval, it will be hard clipped to all
+     * the intervals it overlaps with
+     *
+     * @param read the read to be hard clipped to the interval.
+     * @return a shallow copy of the read hard clipped to the interval
+     */
+    private LinkedList<GATKSAMRecord> hardClipReadToInterval(GATKSAMRecord read) {
+        LinkedList<GATKSAMRecord> clippedReads = new LinkedList<GATKSAMRecord>();
+
+        GenomeLoc intervalOverlapped = null;       // marks the interval to which the original read overlapped (so we can cut all previous intervals from the list)
+
+        boolean originalRead = true;               // false if this is the right tail of the original read
+        boolean overlap;                           // keeps track of the interval that overlapped the original read
+        boolean doneClipping;                      // triggers an early exit if we are done clipping this read
+
+        if (isWholeGenome())
+            clippedReads.add(read);                // if we don't have intervals (wgs) the read goes in unchanged
+
+        for (GenomeLoc interval : intervalList) {
+
+            if (read.isEmpty())                    // nothing to do with an empty read (could have been fully clipped before)
+                break;
+
+            GATKSAMRecord clippedRead = null;      // this will hold the read clipped to the interval to be added in the end of the switch
+
+            switch (ReadUtils.getReadAndIntervalOverlapType(read, interval)) {
+                case NO_OVERLAP_RIGHT:             // no reads on this interval, check the next interval if this is the original read
+                    if (!originalRead)             // something went wrong if this is the tail of the read
+                        throw new ReviewedStingException("tail of the read should never NO_OVERLAP_RIGHT the following interval. " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
+                    overlap = false;
+                    doneClipping = false;
+                    break;
+
+
+                case NO_OVERLAP_HARDCLIPPED_RIGHT: // read used to overlap but got hard clipped and doesn't overlap anymore
+                    if (originalRead) {
+                        overlap = true;            // effectively, we have found the read's location and now we are going to try and match it's tail (which happens to be the entire read).
+                        clippedRead = GATKSAMRecord.emptyRead(read);
+                    } else
+                        overlap = false;
+
+                    doneClipping = false;
+                    break;
+
+                case NO_OVERLAP_CONTIG:            // read is in a different contig
+                    if (originalRead) {                                                                                 // the original read can be in a bigger contig, but not on a smaller one.
+                        if (read.getReferenceIndex() < interval.getContigIndex())
+                            throw new ReviewedStingException("read is behind interval list. (contig) " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
+                        else {
+                            overlap = false;
+                            doneClipping = false;
+                        }
+                    }                                                                                                   // tail read CANNOT be in a different contig.
+                    else {
+                        if (read.getReferenceIndex() < interval.getContigIndex()) {
+                            overlap = false;
+                            doneClipping = true;
+                        } else
+                            throw new ReviewedStingException("Tail read is in bigger contig than interval traversal. " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
+
+                    }
+                    break;
+
+                case NO_OVERLAP_LEFT:
+                    if (originalRead)                                                                                   // if this is the first read this should never happen.
+                        throw new ReviewedStingException("original read cannot be behind the first interval. (position) " + read.getReadName() + " -- " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd() + " x " + interval.getLocation().toString());
+
+                    overlap = false;
+                    doneClipping = true;
+                    break;
+
+                case NO_OVERLAP_HARDCLIPPED_LEFT:                                                                       // read used to overlap but got hard clipped and doesn't overlap anymore
+                    overlap = originalRead;                                                                             // if this is the original read, we should not advance the interval list, the original overlap was here.
+                    doneClipping = true;
+                    break;
+
+                case OVERLAP_LEFT:                                                                                      // clip the left tail of the read
+                    clippedRead = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStart() - 1);
+
+                    overlap = true;
+                    doneClipping = true;
+                    break;
+
+                case OVERLAP_RIGHT:                                                                                     // clip the right tail of the read and try to match it to the next interval
+                    clippedRead = ReadClipper.hardClipByReferenceCoordinatesRightTail(read, interval.getStop() + 1);
+                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStop());
+
+                    overlap = true;
+                    doneClipping = false;
+                    break;
+
+                case OVERLAP_LEFT_AND_RIGHT:                                                                            // clip both left and right ends of the read
+                    clippedRead = ReadClipper.hardClipBothEndsByReferenceCoordinates(read, interval.getStart() - 1, interval.getStop() + 1);
+                    read = ReadClipper.hardClipByReferenceCoordinatesLeftTail(read, interval.getStop());
+
+                    overlap = true;
+                    doneClipping = false;
+                    break;
+
+                case OVERLAP_CONTAINED:                                                                                 // don't do anything to the read
+                    clippedRead = read;
+
+                    overlap = true;
+                    doneClipping = true;
+                    break;
+
+                default:
+                    throw new ReviewedStingException("interval overlap returned an unknown / unhandled state. If new state was added to intervalOverlap, it should be handled by hardClipReadToInterval.");
+            }
+
+            if (overlap && originalRead)
+                intervalOverlapped = interval;
+
+            if (clippedRead != null) {
+                originalRead = false;
+
+                if (!clippedRead.isEmpty())
+                    clippedReads.add(clippedRead); // if the read overlaps the interval entirely within a deletion, it will be entirely clipped off
+            }
+
+            if (doneClipping)
+                break;
+        }
+
+        if (intervalOverlapped != null)
+            intervalList = intervalList.tailSet(intervalOverlapped);
+
+        return clippedReads;
+    }
+
+    /**
+     * Compresses the read name and adds it to output BAM file (reduced BAM)
+     * after performing some quality control
+     *
+     * @param read any read
+     */
+    private void outputRead(GATKSAMRecord read) {
+        if (debugLevel == 2) {
+            checkForHighMismatch(read);
+            checkCigar(read);
+        }
+
+        if (read.isReducedRead())
+            nCompressedReads++;
+        else {
+            int originalAlignmentStart =  (Integer) read.getTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT);
+            int originalAlignmentEnd   =  (Integer) read.getTemporaryAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT);
+
+            int startShift = originalAlignmentStart - read.getUnclippedStart();                                         // we annotate the shifts for better compression
+            int endShift = read.getUnclippedEnd() - originalAlignmentEnd;                                               // we annotate the shifts for better compression
+
+            if (startShift > 0)
+                read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_START_SHIFT, startShift);               // If the read had any soft clips before getting chopped (variant region) annotate it's original alignment (start)
+            if (endShift > 0)
+                read.setAttribute(GATKSAMRecord.REDUCED_READ_ORIGINAL_ALIGNMENT_END_SHIFT, endShift);                   // If the read had any soft clips before getting chopped (variant region) annotate it's original alignment (end)
+            
+            totalReads++;
+        }
+
+        if (debugLevel == 1)
+            System.out.println("BAM: " + read.getCigar() + " " + read.getAlignmentStart() + " " + read.getAlignmentEnd());
+
+//        if (!DONT_USE_SOFTCLIPPED_BASES)
+//            reSoftClipBases(read);
+
+        if (!DONT_COMPRESS_READ_NAMES)
+            compressReadName(read);
+
+        out.addAlignment(read);
+    }
+
+    private void reSoftClipBases(GATKSAMRecord read) {
+        Integer left = (Integer) read.getTemporaryAttribute("SL");
+        Integer right = (Integer) read.getTemporaryAttribute("SR");
+        if (left != null || right != null) {
+            Cigar newCigar = new Cigar();
+            for (CigarElement element : read.getCigar().getCigarElements()) {
+                newCigar.add(new CigarElement(element.getLength(), element.getOperator()));
+            }
+
+            if (left != null) {
+                newCigar = updateFirstSoftClipCigarElement(left, newCigar);
+                read.setAlignmentStart(read.getAlignmentStart() + left);
+            }
+
+            if (right != null) {
+                Cigar invertedCigar = invertCigar(newCigar);
+                newCigar = invertCigar(updateFirstSoftClipCigarElement(right, invertedCigar));
+            }
+            read.setCigar(newCigar);
+        }
+    }
+
+    /**
+     * Facility routine to revert the first element of a Cigar string (skipping hard clips) into a soft-clip.
+     * To be used on both ends if provided a flipped Cigar
+     *
+     * @param softClipSize  the length of the soft clipped element to add
+     * @param originalCigar the original Cigar string
+     * @return a new Cigar object with the soft clips added
+     */
+    private Cigar updateFirstSoftClipCigarElement (int softClipSize, Cigar originalCigar) {
+        Cigar result = new Cigar();
+        CigarElement leftElement = new CigarElement(softClipSize, CigarOperator.S);
+        boolean updated = false;
+        for (CigarElement element : originalCigar.getCigarElements()) {
+            if (!updated && element.getOperator() == CigarOperator.M) {
+                result.add(leftElement);
+                int newLength = element.getLength() - softClipSize;
+                if (newLength > 0)
+                    result.add(new CigarElement(newLength, CigarOperator.M));
+                updated = true;
+            }
+            else
+                result.add(element);
+        }
+        return result;
+    }
+
+    /**
+     * Given a cigar string, returns the inverted cigar string.
+     *
+     * @param cigar the original cigar
+     * @return the inverted cigar
+     */
+    private Cigar invertCigar(Cigar cigar) {
+        Stack<CigarElement> stack = new Stack<CigarElement>();
+        for (CigarElement e : cigar.getCigarElements())
+            stack.push(e);
+        Cigar inverted = new Cigar();
+        while (!stack.empty()) {
+            inverted.add(stack.pop());
+        }
+        return inverted;
+    }
+
+
+    /**
+     * Quality control procedure that checks if the consensus reads contains too many
+     * mismatches with the reference. This should never happen and is a good trigger for
+     * errors with the algorithm.
+     *
+     * @param read any read
+     */
+    private void checkForHighMismatch(GATKSAMRecord read) {
+        final int start = read.getAlignmentStart();
+        final int stop = read.getAlignmentEnd();
+        final byte[] ref = getToolkit().getReferenceDataSource().getReference().getSubsequenceAt(read.getReferenceName(), start, stop).getBases();
+        final int nm = SequenceUtil.countMismatches(read, ref, start - 1);
+        final int readLen = read.getReadLength();
+        final double nmFraction = nm / (1.0 * readLen);
+        if (nmFraction > 0.4 && readLen > 20 && read.getAttribute(GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG) != null && read.getReadName().startsWith("Consensus"))
+            throw new ReviewedStingException("BUG: High mismatch fraction found in read " + read.getReadName() + " position: " + read.getReferenceName() + ":" + read.getAlignmentStart() + "-" + read.getAlignmentEnd());
+    }
+
+    private void checkCigar (GATKSAMRecord read) {
+        if (read.getCigar().isValid(null, -1) != null) {
+            throw new ReviewedStingException("BUG: cigar string is not valid: " + read.getCigarString());
+        }
+
+    }
+
+
+    /**
+     * Compresses the read name using the readNameHash if we have already compressed
+     * this read name before.
+     *
+     * @param read any read
+     */
+    private void compressReadName(GATKSAMRecord read) {
+        String name = read.getReadName();
+        String compressedName = read.isReducedRead() ? "C" : "";
+        if (readNameHash.containsKey(name))
+            compressedName += readNameHash.get(name).toString();
+        else {
+            readNameHash.put(name, nextReadNumber);
+            compressedName += nextReadNumber.toString();
+            nextReadNumber++;
+        }
+
+        read.setReadName(compressedName);
+    }
+
+    /**
+     * Returns true if the read is the original read that went through map().
+     *
+     * This is important to know so we can decide what reads to pull from the stash. Only reads that came before the original read should be pulled.
+     *
+     * @param list the list
+     * @param read the read
+     * @return Returns true if the read is the original read that went through map().
+     */
+    private boolean isOriginalRead(LinkedList<GATKSAMRecord> list, GATKSAMRecord read) {
+        return isWholeGenome() || (list.getFirst().equals(read) && ReadUtils.getReadAndIntervalOverlapType(read, intervalList.first()) == ReadUtils.ReadAndIntervalOverlap.OVERLAP_CONTAINED);
+    }
+
+    /**
+     * Checks whether or not the intervalList is empty, meaning we're running in WGS mode.
+     *
+     * @return whether or not we're running in WGS mode.
+     */
+    private boolean isWholeGenome() {
+        return intervalList.isEmpty();
+    }
+
+}
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SingleSampleCompressor.java
@ -0,0 +1,83 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.sam.AlignmentStartWithNoTiesComparator;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.TreeSet;
+
+/**
+ *
+ * @author depristo
+ * @version 0.1
+ */
+public class SingleSampleCompressor implements Compressor {
+    protected static final Logger logger = Logger.getLogger(SingleSampleCompressor.class);
+
+    protected final int contextSize;
+    protected final int downsampleCoverage;
+    protected int minMappingQuality;
+    protected int slidingWindowCounter;
+
+    protected final String sampleName;
+
+    protected SlidingWindow slidingWindow;
+    protected double minAltProportionToTriggerVariant;
+    protected double minIndelProportionToTriggerVariant;
+    protected int minBaseQual;
+
+    protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy;
+
+    public SingleSampleCompressor(final String sampleName,
+                                  final int contextSize,
+                                  final int downsampleCoverage,
+                                  final int minMappingQuality,
+                                  final double minAltProportionToTriggerVariant,
+                                  final double minIndelProportionToTriggerVariant,
+                                  final int minBaseQual,
+                                  final ReduceReadsWalker.DownsampleStrategy downsampleStrategy) {
+        this.sampleName = sampleName;
+        this.contextSize = contextSize;
+        this.downsampleCoverage = downsampleCoverage;
+        this.minMappingQuality = minMappingQuality;
+        this.slidingWindowCounter = 0;
+        this.minAltProportionToTriggerVariant = minAltProportionToTriggerVariant;
+        this.minIndelProportionToTriggerVariant = minIndelProportionToTriggerVariant;
+        this.minBaseQual = minBaseQual;
+        this.downsampleStrategy = downsampleStrategy;
+    }
+
+    /**
+     * @{inheritDoc}
+     */
+    @Override
+    public Iterable<GATKSAMRecord> addAlignment( GATKSAMRecord read ) {
+        TreeSet<GATKSAMRecord> result = new TreeSet<GATKSAMRecord>(new AlignmentStartWithNoTiesComparator());
+        int readOriginalStart = read.getUnclippedStart();
+
+        // create a new window if:
+        if ((slidingWindow != null) &&
+            ( ( read.getReferenceIndex() != slidingWindow.getContigIndex() ) ||        // this is a brand new contig
+              (readOriginalStart - contextSize > slidingWindow.getStopLocation()))) {  // this read is too far away from the end of the current sliding window
+
+            // close the current sliding window
+            result.addAll(slidingWindow.close());
+            slidingWindow = null;                                                      // so we create a new one on the next if
+        }
+
+        if ( slidingWindow == null) {                                                  // this is the first read
+            slidingWindow = new SlidingWindow(read.getReferenceName(), read.getReferenceIndex(), contextSize, read.getHeader(), read.getReadGroup(), slidingWindowCounter, minAltProportionToTriggerVariant, minIndelProportionToTriggerVariant, minBaseQual, minMappingQuality, downsampleCoverage, downsampleStrategy, read.hasBaseIndelQualities());
+            slidingWindowCounter++;
+        }
+
+        result.addAll(slidingWindow.addRead(read));
+        return result;
+    }
+
+    @Override
+    public Iterable<GATKSAMRecord> close() {
+        return (slidingWindow != null) ? slidingWindow.close() : new TreeSet<GATKSAMRecord>();
+    }
+
+}
+
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SlidingWindow.java
@ -0,0 +1,713 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import com.google.java.contract.Requires;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.gatk.downsampling.FractionalDownsampler;
+import org.broadinstitute.sting.utils.collections.Pair;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.broadinstitute.sting.utils.sam.ReadUtils;
+
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ListIterator;
+
+/**
+ * Created by IntelliJ IDEA.
+ * User: roger
+ * Date: 8/3/11
+ * Time: 2:24 PM
+ */
+public class SlidingWindow {
+
+    // Sliding Window data
+    final private LinkedList<GATKSAMRecord> readsInWindow;
+    final private LinkedList<HeaderElement> windowHeader;
+    protected int contextSize;                                                                                          // the largest context size (between mismatches and indels)
+    protected int stopLocation;
+    protected String contig;
+    protected int contigIndex;
+    protected SAMFileHeader header;
+    protected GATKSAMReadGroupRecord readGroupAttribute;
+    protected int downsampleCoverage;
+
+    // Running consensus data
+    protected SyntheticRead runningConsensus;
+    protected int consensusCounter;
+    protected String consensusReadName;
+
+    // Filtered Data Consensus data
+    protected SyntheticRead filteredDataConsensus;
+    protected int filteredDataConsensusCounter;
+    protected String filteredDataReadName;
+
+
+    // Additional parameters
+    protected double MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT;                                                        // proportion has to be greater than this value to trigger variant region due to mismatches
+    protected double MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT;                                                      // proportion has to be greater than this value to trigger variant region due to deletions
+    protected int MIN_BASE_QUAL_TO_COUNT;                                                                               // qual has to be greater than or equal to this value
+    protected int MIN_MAPPING_QUALITY;
+
+    protected ReduceReadsWalker.DownsampleStrategy downsampleStrategy;
+    private boolean hasIndelQualities;
+
+    /**
+     * The types of synthetic reads to use in the finalizeAndAdd method
+     */
+    private enum ConsensusType {
+        CONSENSUS,
+        FILTERED,
+        BOTH
+    }
+
+    public int getStopLocation() {
+        return stopLocation;
+    }
+
+    public String getContig() {
+        return contig;
+    }
+
+    public int getContigIndex() {
+        return contigIndex;
+    }
+
+    public int getStartLocation() {
+        return windowHeader.isEmpty() ? -1 : windowHeader.peek().getLocation();
+    }
+
+
+    public SlidingWindow(String contig, int contigIndex, int contextSize, SAMFileHeader header, GATKSAMReadGroupRecord readGroupAttribute, int windowNumber, final double minAltProportionToTriggerVariant, final double minIndelProportionToTriggerVariant, int minBaseQual, int minMappingQuality, int downsampleCoverage, final ReduceReadsWalker.DownsampleStrategy downsampleStrategy, boolean hasIndelQualities) {
+        this.stopLocation = -1;
+        this.contextSize = contextSize;
+        this.downsampleCoverage = downsampleCoverage;
+
+        this.MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT = minAltProportionToTriggerVariant;
+        this.MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT = minIndelProportionToTriggerVariant;
+        this.MIN_BASE_QUAL_TO_COUNT = minBaseQual;
+        this.MIN_MAPPING_QUALITY = minMappingQuality;
+
+        this.windowHeader = new LinkedList<HeaderElement>();
+        this.readsInWindow = new LinkedList<GATKSAMRecord>();
+
+        this.contig = contig;
+        this.contigIndex = contigIndex;
+        this.header = header;
+        this.readGroupAttribute = readGroupAttribute;
+
+        this.consensusCounter = 0;
+        this.consensusReadName = "Consensus-" + windowNumber + "-";
+
+        this.filteredDataConsensusCounter = 0;
+        this.filteredDataReadName = "Filtered-" + windowNumber + "-";
+
+        this.runningConsensus = null;
+        this.filteredDataConsensus = null;
+        
+        this.downsampleStrategy = downsampleStrategy;
+        this.hasIndelQualities = hasIndelQualities;
+    }
+
+    /**
+     * Add a read to the sliding window and slides the window accordingly.
+     * 
+     * Reads are assumed to be in order, therefore, when a read is added the sliding window can
+     * assume that no more reads will affect read.getUnclippedStart() - contextSizeMismatches. The window
+     * slides forward to that position and returns all reads that may have been finalized in the
+     * sliding process.
+     *
+     * @param read the read
+     * @return a list of reads that have been finished by sliding the window.
+     */
+    public List<GATKSAMRecord> addRead(GATKSAMRecord read) {
+        updateHeaderCounts(read, false);                                                                                // update the window header counts
+        readsInWindow.add(read);                                                                                        // add read to sliding reads
+        return slideWindow(read.getUnclippedStart());
+    }
+
+    /**
+     * returns the next complete or incomplete variant region between 'from' (inclusive) and 'to' (exclusive)
+     *
+     * @param from         beginning window header index of the search window (inclusive)
+     * @param to           end window header index of the search window (exclusive)
+     * @param variantSite  boolean array with true marking variant regions
+     * @return null if nothing is variant, start/stop if there is a complete variant region, start/-1 if there is an incomplete variant region.
+     */
+    private Pair<Integer, Integer> getNextVariantRegion(int from, int to, boolean[] variantSite) {
+        boolean foundStart = false;
+        int variantRegionStartIndex = 0;
+        for (int i=from; i<to; i++) {
+            if (variantSite[i] && !foundStart) {
+                variantRegionStartIndex = i;
+                foundStart = true;
+            }
+            else if(!variantSite[i] && foundStart) {
+                return(new Pair<Integer, Integer>(variantRegionStartIndex, i-1));
+            }
+        }
+        return (foundStart) ? new Pair<Integer, Integer>(variantRegionStartIndex, -1) : null;
+    }
+
+    /**
+     * Creates a list with all the complete and incomplete variant regions within 'from' (inclusive) and 'to' (exclusive)
+     *
+     * @param from         beginning window header index of the search window (inclusive)
+     * @param to           end window header index of the search window (exclusive)
+     * @param variantSite  boolean array with true marking variant regions
+     * @return a list with start/stops of variant regions following getNextVariantRegion description
+     */
+    private List<Pair<Integer, Integer>> getAllVariantRegions(int from, int to, boolean[] variantSite) {
+        List<Pair<Integer,Integer>> regions = new LinkedList<Pair<Integer, Integer>>();
+        int index = from;
+        while(index < to) {
+            Pair<Integer,Integer> result = getNextVariantRegion(index, to, variantSite);
+            if (result == null)
+                break;
+
+            regions.add(result);
+            if (result.getSecond() < 0)
+                break;
+            index = result.getSecond() + 1;
+        }
+        return regions;
+    }
+
+
+    /**
+     * Determines if the window can be slid given the new incoming read.
+     *
+     * We check from the start of the window to the (unclipped) start of the new incoming read if there
+     * is any variant.
+     * If there are variant sites, we check if it's time to close the variant region.
+     *
+     * @param incomingReadUnclippedStart the incoming read's start position. Must be the unclipped start!
+     * @return all reads that have fallen to the left of the sliding window after the slide
+     */
+    protected List<GATKSAMRecord> slideWindow(int incomingReadUnclippedStart) {
+        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
+
+        if (incomingReadUnclippedStart - contextSize > getStartLocation()) {
+            int readStartHeaderIndex = incomingReadUnclippedStart - getStartLocation();
+            boolean[] variantSite = markSites(getStartLocation() + readStartHeaderIndex);
+            int breakpoint = Math.max(readStartHeaderIndex - contextSize - 1, 0);                                       // this is the limit of what we can close/send to consensus (non-inclusive)
+
+            List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, breakpoint, variantSite);
+            finalizedReads = closeVariantRegions(regions, false);
+
+            List<GATKSAMRecord> readsToRemove = new LinkedList<GATKSAMRecord>();
+            for (GATKSAMRecord read : readsInWindow) {                                                                  // todo -- unnecessarily going through all reads in the window !! Optimize this (But remember reads are not sorted by alignment end!)
+                if (read.getAlignmentEnd() < getStartLocation()) {
+                    readsToRemove.add(read);
+                }
+            }
+            for (GATKSAMRecord read : readsToRemove) {
+                readsInWindow.remove(read);
+            }
+        }
+
+        return finalizedReads;
+    }
+
+    /**
+     * returns an array marked with variant and non-variant regions (it uses
+     * markVariantRegions to make the marks)
+     *
+     * @param stop check the window from start to stop (not-inclusive)
+     * @return a boolean array with 'true' marking variant regions and false marking consensus sites
+     */
+    protected boolean[] markSites(int stop) {
+
+        boolean[] markedSites = new boolean[stop - getStartLocation() + contextSize + 1];
+
+        Iterator<HeaderElement> headerElementIterator = windowHeader.iterator();
+        for (int i = getStartLocation(); i < stop; i++) {
+            if (headerElementIterator.hasNext()) {
+                HeaderElement headerElement = headerElementIterator.next();
+
+                if (headerElement.isVariant(MIN_ALT_BASE_PROPORTION_TO_TRIGGER_VARIANT, MIN_INDEL_BASE_PROPORTION_TO_TRIGGER_VARIANT))
+                    markVariantRegion(markedSites, i - getStartLocation());
+
+            } else
+                break;
+        }
+        return markedSites;
+    }
+
+    /**
+     * Marks the sites around the variant site (as true)
+     *
+     * @param markedSites         the boolean array to bear the marks
+     * @param variantSiteLocation the location where a variant site was found
+     */
+    protected void markVariantRegion(boolean[] markedSites, int variantSiteLocation) {
+        int from = (variantSiteLocation < contextSize) ? 0 : variantSiteLocation - contextSize;
+        int to = (variantSiteLocation + contextSize + 1 > markedSites.length) ? markedSites.length : variantSiteLocation + contextSize + 1;
+        for (int i = from; i < to; i++)
+            markedSites[i] = true;
+    }
+
+    /**
+     * Adds bases to the running consensus or filtered data accordingly
+     * 
+     * If adding a sequence with gaps, it will finalize multiple consensus reads and keep the last running consensus
+     *
+     * @param start the first header index to add to consensus
+     * @param end   the first header index NOT TO add to consensus
+     * @return a list of consensus reads generated by this call. Empty list if no consensus was generated.
+     */
+    protected List<GATKSAMRecord> addToSyntheticReads(int start, int end) {
+        LinkedList<GATKSAMRecord> reads = new LinkedList<GATKSAMRecord>();
+        if (start < end) {
+
+            ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException(String.format("Requested to add to synthetic reads a region that contains no header element at index: %d  - %d / %d", start, windowHeader.size(), end));
+
+            HeaderElement headerElement = headerElementIterator.next();
+
+            if (headerElement.hasConsensusData()) {
+                reads.addAll(finalizeAndAdd(ConsensusType.FILTERED));
+
+                int endOfConsensus = findNextNonConsensusElement(start, end);
+                addToRunningConsensus(start, endOfConsensus);
+
+                if (endOfConsensus <= start)
+                    throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfConsensus, start));
+
+                reads.addAll(addToSyntheticReads(endOfConsensus, end));
+            } else if (headerElement.hasFilteredData()) {
+                reads.addAll(finalizeAndAdd(ConsensusType.CONSENSUS));
+
+                int endOfFilteredData = findNextNonFilteredDataElement(start, end);
+                addToFilteredData(start, endOfFilteredData);
+
+                if (endOfFilteredData <= start)
+                    throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfFilteredData, start));
+
+                reads.addAll(addToSyntheticReads(endOfFilteredData, end));
+            } else if (headerElement.isEmpty()) {
+                reads.addAll(finalizeAndAdd(ConsensusType.BOTH));
+
+                int endOfEmptyData = findNextNonEmptyElement(start, end);
+
+                if (endOfEmptyData <= start)
+                    throw new ReviewedStingException(String.format("next start is <= current start: (%d <= %d)", endOfEmptyData, start));
+
+                reads.addAll(addToSyntheticReads(endOfEmptyData, end));
+            } else
+                throw new ReviewedStingException(String.format("Header Element %d is neither Consensus, Data or Empty. Something is wrong.", start));
+
+        }
+
+        return reads;
+    }
+
+    /**
+     * Finalizes one or more synthetic reads.
+     *
+     * @param type the synthetic reads you want to close
+     * @return the GATKSAMRecords generated by finalizing the synthetic reads
+     */
+    private List<GATKSAMRecord> finalizeAndAdd(ConsensusType type) {
+        GATKSAMRecord read = null;
+        List<GATKSAMRecord> list = new LinkedList<GATKSAMRecord>();
+
+        switch (type) {
+            case CONSENSUS:
+                read = finalizeRunningConsensus();
+                break;
+            case FILTERED:
+                read = finalizeFilteredDataConsensus();
+                break;
+            case BOTH:
+                read = finalizeRunningConsensus();
+                if (read != null) list.add(read);
+                read = finalizeFilteredDataConsensus();
+        }
+        if (read != null)
+            list.add(read);
+
+        return list;
+    }
+
+    /**
+     * Looks for the next position without consensus data
+     *
+     * @param start beginning of the filtered region
+     * @param upTo  limit to search for another consensus element
+     * @return next position with consensus data or empty
+     */
+    private int findNextNonConsensusElement(int start, int upTo) {
+        Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+        int index = start;
+        while (index < upTo) {
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException("There are no more header elements in this window");
+
+            HeaderElement headerElement = headerElementIterator.next();
+            if (!headerElement.hasConsensusData())
+                break;
+            index++;
+        }
+        return index;
+    }
+
+    /**
+     * Looks for the next position without filtered data
+     *
+     * @param start beginning of the region
+     * @param upTo  limit to search for
+     * @return next position with no filtered data
+     */
+    private int findNextNonFilteredDataElement(int start, int upTo) {
+        Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+        int index = start;
+        while (index < upTo) {
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException("There are no more header elements in this window");
+
+            HeaderElement headerElement = headerElementIterator.next();
+            if (!headerElement.hasFilteredData() || headerElement.hasConsensusData())
+                break;
+            index++;
+        }
+        return index;
+    }
+
+    /**
+     * Looks for the next non-empty header element
+     *
+     * @param start beginning of the region
+     * @param upTo  limit to search for
+     * @return next position with non-empty element
+     */
+    private int findNextNonEmptyElement(int start, int upTo) {
+        ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+        int index = start;
+        while (index < upTo) {
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException("There are no more header elements in this window");
+
+            HeaderElement headerElement = headerElementIterator.next();
+            if (!headerElement.isEmpty())
+                break;
+            index++;
+        }
+        return index;
+    }
+
+
+    /**
+     * Adds bases to the filtered data synthetic read.
+     * 
+     * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
+     * bases.
+     *
+     * @param start the first header index to add to consensus
+     * @param end   the first header index NOT TO add to consensus
+     */
+    private void addToFilteredData(int start, int end) {
+        if (filteredDataConsensus == null)
+            filteredDataConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, filteredDataReadName + filteredDataConsensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
+
+        ListIterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+        for (int index = start; index < end; index++) {
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException("Requested to create a filtered data synthetic read from " + start + " to " + end + " but " + index + " does not exist");
+
+            HeaderElement headerElement = headerElementIterator.next();
+            if (headerElement.hasConsensusData())
+                throw new ReviewedStingException("Found consensus data inside region to add to filtered data.");
+
+            if (!headerElement.hasFilteredData())
+                throw new ReviewedStingException("No filtered data in " + index);
+
+            genericAddBaseToConsensus(filteredDataConsensus, headerElement.getFilteredBaseCounts(), headerElement.getRMS());
+        }
+    }
+
+    /**
+     * Adds bases to the filtered data synthetic read.
+     * 
+     * Different from the addToConsensus method, this method assumes a contiguous sequence of filteredData
+     * bases.
+     *
+     * @param start the first header index to add to consensus
+     * @param end   the first header index NOT TO add to consensus
+     */
+    private void addToRunningConsensus(int start, int end) {
+        if (runningConsensus == null)
+            runningConsensus = new SyntheticRead(header, readGroupAttribute, contig, contigIndex, consensusReadName + consensusCounter++, windowHeader.get(start).getLocation(), GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, hasIndelQualities);
+
+        Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(start);
+        for (int index = start; index < end; index++) {
+            if (!headerElementIterator.hasNext())
+                throw new ReviewedStingException("Requested to create a running consensus synthetic read from " + start + " to " + end + " but " + index + " does not exist");
+
+            HeaderElement headerElement = headerElementIterator.next();
+            if (!headerElement.hasConsensusData())
+                throw new ReviewedStingException("No CONSENSUS data in " + index);
+
+            genericAddBaseToConsensus(runningConsensus, headerElement.getConsensusBaseCounts(), headerElement.getRMS());
+        }
+    }
+
+    /**
+     * Generic accessor to add base and qualities to a synthetic read
+     *
+     * @param syntheticRead the synthetic read to add to
+     * @param baseCounts    the base counts object in the header element
+     * @param rms           the rms mapping quality in the header element
+     */
+    private void genericAddBaseToConsensus(SyntheticRead syntheticRead, BaseAndQualsCounts baseCounts, double rms) {
+        BaseIndex base = baseCounts.baseIndexWithMostCounts();
+        byte count = (byte) Math.min(baseCounts.countOfMostCommonBase(), Byte.MAX_VALUE);
+        byte qual = baseCounts.averageQualsOfMostCommonBase();
+        byte insQual = baseCounts.averageInsertionQualsOfMostCommonBase();
+        byte delQual = baseCounts.averageDeletionQualsOfMostCommonBase();
+        syntheticRead.add(base, count, qual, insQual, delQual, rms);
+    }
+
+    /**
+     * Finalizes a variant region, any adjacent synthetic reads.
+     *
+     * @param start the first window header index in the variant region (inclusive)
+     * @param stop  the last window header index of the variant region (inclusive)
+     * @return all reads contained in the variant region plus any adjacent synthetic reads
+     */
+    @Requires("start <= stop")
+    protected List<GATKSAMRecord> closeVariantRegion(int start, int stop) {
+        List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
+
+        int refStart = windowHeader.get(start).getLocation();                                                           // All operations are reference based, not read based
+        int refStop = windowHeader.get(stop).getLocation();
+
+        for (GATKSAMRecord read : readsInWindow) {                                                                      // Keep all reads that overlap the variant region
+            if (read.getSoftStart() <= refStop && read.getAlignmentEnd() >= refStart) {
+                allReads.add(read);
+                updateHeaderCounts(read, true);                                                                         // Remove this read from the window header entirely
+            }
+        }
+
+        List<GATKSAMRecord> result = (downsampleCoverage > 0) ? downsampleVariantRegion(allReads) : allReads;
+        result.addAll(addToSyntheticReads(0, start));
+        result.addAll(finalizeAndAdd(ConsensusType.BOTH));
+
+        for (GATKSAMRecord read : result) {
+            readsInWindow.remove(read);                                                                                 // todo -- not optimal, but needs to be done so the next region doesn't try to remove the same reads from the header counts.
+        }
+
+        return result;                                                                                                  // finalized reads will be downsampled if necessary
+    }
+
+
+    private List<GATKSAMRecord> closeVariantRegions(List<Pair<Integer, Integer>> regions, boolean forceClose) {
+        List<GATKSAMRecord> allReads = new LinkedList<GATKSAMRecord>();
+        if (!regions.isEmpty()) {
+            int lastStop = -1;
+            for (Pair<Integer, Integer> region : regions) {
+                int start = region.getFirst();
+                int stop = region.getSecond();
+                if (stop < 0 && forceClose)
+                    stop = windowHeader.size() - 1;
+                if (stop >= 0) {
+                    allReads.addAll(closeVariantRegion(start, stop));
+                    lastStop = stop;
+                }
+            }
+            for (int i = 0; i < lastStop; i++)                                                                          // clean up the window header elements up until the end of the variant region. (we keep the last element in case the following element had a read that started with insertion)
+                windowHeader.remove();                                                                                  // todo -- can't believe java doesn't allow me to just do windowHeader = windowHeader.get(stop). Should be more efficient here!
+        }
+        return allReads;
+    }
+
+    /**
+     * Downsamples a variant region to the downsample coverage of the sliding window.
+     *
+     * It will use the downsampling strategy defined by the SlidingWindow
+     *
+     * @param allReads the reads to select from (all reads that cover the window)
+     * @return a list of reads selected by the downsampler to cover the window to at least the desired coverage
+     */
+    protected List<GATKSAMRecord> downsampleVariantRegion(final List<GATKSAMRecord> allReads) {
+        double fraction = 100 / allReads.size();
+        if (fraction >= 1)
+            return allReads;
+
+        FractionalDownsampler <GATKSAMRecord> downsampler = new FractionalDownsampler<GATKSAMRecord>(fraction);
+        downsampler.submit(allReads);
+        return downsampler.consumeDownsampledItems();
+    }
+
+    /**
+     * Properly closes a Sliding Window, finalizing all consensus and variant
+     * regions that still exist regardless of being able to fulfill the
+     * context size requirement in the end.
+     *
+     * @return All reads generated
+     */
+    public List<GATKSAMRecord> close() {
+        // mark variant regions
+        List<GATKSAMRecord> finalizedReads = new LinkedList<GATKSAMRecord>();
+
+        if (!windowHeader.isEmpty()) {
+            boolean[] variantSite = markSites(stopLocation + 1);
+            List<Pair<Integer,Integer>> regions = getAllVariantRegions(0, windowHeader.size(), variantSite);
+            finalizedReads = closeVariantRegions(regions, true);
+
+            if (!windowHeader.isEmpty()) {
+                finalizedReads.addAll(addToSyntheticReads(0, windowHeader.size() - 1));
+                finalizedReads.addAll(finalizeAndAdd(ConsensusType.BOTH));                                              // if it ended in running consensus, finish it up
+            }
+
+        }
+        return finalizedReads;
+    }
+
+    /**
+     * generates the SAM record for the running consensus read and resets it (to null)
+     *
+     * @return the read contained in the running consensus
+     */
+    protected GATKSAMRecord finalizeRunningConsensus() {
+        GATKSAMRecord finalizedRead = null;
+        if (runningConsensus != null) {
+            if (runningConsensus.size() > 0)
+                finalizedRead = runningConsensus.close();
+            else
+                consensusCounter--;
+
+            runningConsensus = null;
+        }
+        return finalizedRead;
+    }
+
+    /**
+     * generates the SAM record for the filtered data consensus and resets it (to null)
+     *
+     * @return the read contained in the running consensus
+     */
+    protected GATKSAMRecord finalizeFilteredDataConsensus() {
+        GATKSAMRecord finalizedRead = null;
+        if (filteredDataConsensus != null) {
+            if (filteredDataConsensus.size() > 0)
+                finalizedRead = filteredDataConsensus.close();
+            else
+                filteredDataConsensusCounter--;
+
+            filteredDataConsensus = null;
+        }
+        return finalizedRead;
+    }
+
+
+    /**
+     * Updates the sliding window's header counts with the incoming read bases, insertions
+     * and deletions.
+     *
+     * @param read the incoming read to be added to the sliding window
+     */
+    protected void updateHeaderCounts(GATKSAMRecord read, boolean removeRead) {
+        byte[] bases = read.getReadBases();
+        byte[] quals = read.getBaseQualities();
+        byte[] insQuals = read.getExistingBaseInsertionQualities();
+        byte[] delQuals = read.getExistingBaseDeletionQualities();
+        int readStart = read.getSoftStart();
+        int readEnd = read.getSoftEnd();
+        Cigar cigar = read.getCigar();
+
+        int readBaseIndex = 0;
+        int startLocation = getStartLocation();
+        int locationIndex = startLocation < 0 ? 0 : readStart - startLocation;
+
+        if (removeRead && locationIndex < 0)
+            throw new ReviewedStingException("read is behind the Sliding Window. read: " + read + " cigar: " + read.getCigarString() + " window: " + startLocation + "," + stopLocation);
+
+        if (!removeRead) {                                                                                              // we only need to create new header elements if we are adding the read, not when we're removing it
+            if (locationIndex < 0) {                                                                                    // Do we need to add extra elements before the start of the header? -- this may happen if the previous read was clipped and this alignment starts before the beginning of the window
+                for (int i = 1; i <= -locationIndex; i++)
+                    windowHeader.addFirst(new HeaderElement(startLocation - i));
+
+                startLocation = readStart;                                                               // update start location accordingly
+                locationIndex = 0;
+            }
+
+            if (stopLocation < readEnd) {                                                                // Do we need to add extra elements to the header?
+                int elementsToAdd = (stopLocation < 0) ? readEnd - readStart + 1 : readEnd - stopLocation;
+                while (elementsToAdd-- > 0)
+                    windowHeader.addLast(new HeaderElement(readEnd - elementsToAdd));
+
+                stopLocation = readEnd;                                                                  // update stopLocation accordingly
+            }
+
+            // Special case for leading insertions before the beginning of the sliding read
+            if (ReadUtils.readStartsWithInsertion(read).getFirst() && (readStart == startLocation || startLocation < 0)) {
+                windowHeader.addFirst(new HeaderElement(readStart - 1));                                 // create a new first element to the window header with no bases added
+                locationIndex = 1;                                                                                      // This allows the first element (I) to look at locationIndex - 1 in the subsequent switch and do the right thing.
+            }
+        }
+
+        Iterator<HeaderElement> headerElementIterator = windowHeader.listIterator(locationIndex);
+        HeaderElement headerElement;
+        for (CigarElement cigarElement : cigar.getCigarElements()) {
+            switch (cigarElement.getOperator()) {
+                case H:
+                    break;
+                case I:
+                    if (removeRead && locationIndex == 0) {                                                             // special case, if we are removing a read that starts in insertion and we don't have the previous header element anymore, don't worry about it.
+                        break;
+                    }
+
+                    headerElement = windowHeader.get(locationIndex - 1);                                                // insertions are added to the base to the left (previous element)
+
+                    if (removeRead) {
+                        headerElement.removeInsertionToTheRight();
+                    }
+                    else {
+                        headerElement.addInsertionToTheRight();
+                    }
+                    readBaseIndex += cigarElement.getLength();
+                    break;                                                                                              // just ignore the insertions at the beginning of the read
+                case D:
+                    int nDeletions = cigarElement.getLength();
+                    while (nDeletions-- > 0) {                                                                          // deletions are added to the baseCounts with the read mapping quality as it's quality score
+                        headerElement = headerElementIterator.next();
+                        byte mq = (byte) read.getMappingQuality();
+                        if (removeRead)
+                            headerElement.removeBase((byte) 'D', mq, mq, mq, mq, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
+                        else
+                            headerElement.addBase((byte) 'D', mq, mq, mq, mq, MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, false);
+
+                        locationIndex++;
+                    }
+                    break;
+                case S:
+                case M:
+                case P:
+                case EQ:
+                case X:
+                    int nBasesToAdd = cigarElement.getLength();
+                    while (nBasesToAdd-- > 0) {
+                        headerElement = headerElementIterator.next();
+                        byte insertionQuality = insQuals == null ? -1 : insQuals[readBaseIndex];                        // if the read doesn't have indel qualities, use -1 (doesn't matter the value because it won't be used for anything)
+                        byte deletionQuality = delQuals == null ? -1 : delQuals[readBaseIndex];
+                        if (removeRead)
+                            headerElement.removeBase(bases[readBaseIndex], quals[readBaseIndex], insertionQuality, deletionQuality, read.getMappingQuality(), MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, cigarElement.getOperator() == CigarOperator.S);
+                        else
+                            headerElement.addBase(bases[readBaseIndex], quals[readBaseIndex], insertionQuality, deletionQuality, read.getMappingQuality(), MIN_BASE_QUAL_TO_COUNT, MIN_MAPPING_QUALITY, cigarElement.getOperator() == CigarOperator.S);
+
+                        readBaseIndex++;
+                        locationIndex++;
+                    }
+                    break;
+            }
+        }
+    }
+}
+
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticRead.java
@ -0,0 +1,285 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import com.google.java.contract.Requires;
+import net.sf.samtools.Cigar;
+import net.sf.samtools.CigarElement;
+import net.sf.samtools.CigarOperator;
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.gatk.walkers.bqsr.EventType;
+import org.broadinstitute.sting.utils.MathUtils;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * Running Consensus is a read that is compressed as a sliding window travels over the reads
+ * and keeps track of all the bases that are outside of variant regions.
+ *
+ * Consensus reads have qual fields that correspond to the number of reads that had the base
+ * and passed the minimum quality threshold.
+ *
+ * The mapping quality of a consensus read is the average RMS of the mapping qualities of all reads
+ * that compose the consensus
+ *
+ * @author Mauricio Carneiro
+ * @since 8/26/11
+ */
+public class SyntheticRead {
+    private List<BaseIndex> bases;
+    private List<Byte> counts;
+    private List<Byte> quals;
+    private List<Byte> insertionQuals;
+    private List<Byte> deletionQuals;
+    private double mappingQuality;                                                                                      // the average of the rms of the mapping qualities of all the reads that contributed to this consensus
+    private String readTag;
+
+    // Information to produce a GATKSAMRecord
+    private SAMFileHeader header;
+    private GATKSAMReadGroupRecord readGroupRecord;
+    private String contig;
+    private int contigIndex;
+    private String readName;
+    private Integer refStart;
+    private boolean hasIndelQualities = false;
+
+    /**
+     * Full initialization of the running consensus if you have all the information and are ready to
+     * start adding to the running consensus.
+     *
+     * @param header          GATKSAMRecord file header
+     * @param readGroupRecord Read Group for the GATKSAMRecord
+     * @param contig          the read's contig name
+     * @param contigIndex     the read's contig index
+     * @param readName        the read's name
+     * @param refStart        the alignment start (reference based)
+     * @param readTag         the reduce reads tag for the synthetic read
+     */
+    public SyntheticRead(SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, String readTag, boolean hasIndelQualities) {
+        final int initialCapacity = 10000;
+        bases = new ArrayList<BaseIndex>(initialCapacity);
+        counts = new ArrayList<Byte>(initialCapacity);
+        quals = new ArrayList<Byte>(initialCapacity);
+        insertionQuals = new ArrayList<Byte>(initialCapacity);
+        deletionQuals = new ArrayList<Byte>(initialCapacity);
+        mappingQuality = 0.0;
+
+        this.readTag = readTag;
+        this.header = header;
+        this.readGroupRecord = readGroupRecord;
+        this.contig = contig;
+        this.contigIndex = contigIndex;
+        this.readName = readName;
+        this.refStart = refStart;
+        this.hasIndelQualities = hasIndelQualities;
+    }
+
+    public SyntheticRead(List<BaseIndex> bases, List<Byte> counts, List<Byte> quals, List<Byte> insertionQuals, List<Byte> deletionQuals, double mappingQuality, String readTag, SAMFileHeader header, GATKSAMReadGroupRecord readGroupRecord, String contig, int contigIndex, String readName, Integer refStart, boolean hasIndelQualities) {
+        this.bases = bases;
+        this.counts = counts;
+        this.quals = quals;
+        this.insertionQuals = insertionQuals;
+        this.deletionQuals = deletionQuals;
+        this.mappingQuality = mappingQuality;
+        this.readTag = readTag;
+        this.header = header;
+        this.readGroupRecord = readGroupRecord;
+        this.contig = contig;
+        this.contigIndex = contigIndex;
+        this.readName = readName;
+        this.refStart = refStart;
+        this.hasIndelQualities = hasIndelQualities;
+    }
+
+    /**
+     * Easy access to keep adding to a running consensus that has already been
+     * initialized with the correct read name and refStart
+     *
+     * @param base   the base to add
+     * @param count  number of reads with this base
+     */
+    @Requires("count < Byte.MAX_VALUE")
+    public void add(BaseIndex base, byte count, byte qual, byte insQual, byte delQual, double mappingQuality) {
+        counts.add(count);
+        bases.add(base);
+        quals.add(qual);
+        insertionQuals.add(insQual);
+        deletionQuals.add(delQual);
+        this.mappingQuality += mappingQuality;
+    }
+
+    public BaseIndex getBase(int readCoordinate) {
+        return bases.get(readCoordinate);
+    }
+
+   /**
+     * Creates a GATKSAMRecord of the synthetic read. Will return null if the read is invalid.
+     *
+     * Invalid reads are :
+     *   - exclusively composed of deletions
+     *
+     * @return a GATKSAMRecord or null
+     */
+    public GATKSAMRecord close () {
+        if (isAllDeletions())
+            return null;
+
+        GATKSAMRecord read = new GATKSAMRecord(header);
+        read.setReferenceName(contig);
+        read.setReferenceIndex(contigIndex);
+        read.setReadPairedFlag(false);
+        read.setReadUnmappedFlag(false);
+        read.setCigar(buildCigar());                                        // the alignment start may change while building the cigar (leading deletions)
+        read.setAlignmentStart(refStart);
+        read.setReadName(readName);
+        read.setBaseQualities(convertBaseQualities(), EventType.BASE_SUBSTITUTION);
+        read.setReadBases(convertReadBases());
+        read.setMappingQuality((int) Math.ceil(mappingQuality / bases.size()));
+        read.setReadGroup(readGroupRecord);
+        read.setAttribute(readTag, convertBaseCounts());
+
+        if (hasIndelQualities) {
+            read.setBaseQualities(convertInsertionQualities(), EventType.BASE_INSERTION);
+            read.setBaseQualities(convertDeletionQualities(), EventType.BASE_DELETION);
+        }
+
+        return read;
+    }
+
+    /**
+     * Checks if the synthetic read is composed exclusively of deletions
+     *
+     * @return true if it is, false if it isn't.
+     */
+    private boolean isAllDeletions() {
+        for (BaseIndex b : bases)
+            if (b != BaseIndex.D)
+                return false;
+        return true;
+    }
+
+    public int size () {
+        return bases.size();
+    }
+
+    private byte [] convertBaseQualities() {
+        return convertVariableGivenBases(bases, quals);
+    }
+
+    private byte [] convertInsertionQualities() {
+        return convertVariableGivenBases(bases, insertionQuals);
+    }
+
+    private byte [] convertDeletionQualities() {
+        return convertVariableGivenBases(bases, deletionQuals);
+    }
+
+    protected byte [] convertBaseCounts() {
+        byte[] countsArray = convertVariableGivenBases(bases, counts);
+
+        if (countsArray.length == 0)
+            throw new ReviewedStingException("Reduced read has counts array of length 0");
+
+        byte[] compressedCountsArray = new byte [countsArray.length];
+        compressedCountsArray[0] = countsArray[0];
+        for (int i = 1; i < countsArray.length; i++)
+            compressedCountsArray[i] = (byte) MathUtils.bound(countsArray[i] - compressedCountsArray[0], Byte.MIN_VALUE, Byte.MAX_VALUE);
+
+        return compressedCountsArray;
+    }
+
+    private byte [] convertReadBases() {
+        byte [] readArray = new byte[getReadLengthWithNoDeletions(bases)];
+        int i = 0;
+        for (BaseIndex baseIndex : bases)
+            if (baseIndex != BaseIndex.D)
+                readArray[i++] = baseIndex.getByte();
+
+        return readArray;
+    }
+
+    /**
+     * Builds the cigar string for the synthetic read
+     *
+     * Warning: if the synthetic read has leading deletions, it will shift the refStart (alignment start) of the read.
+     *
+     * @return the cigar string for the synthetic read
+     */
+    private Cigar buildCigar() {
+        LinkedList<CigarElement> cigarElements = new LinkedList<CigarElement>();
+        CigarOperator cigarOperator = null;
+        int length = 0;
+        for (BaseIndex b : bases) {
+            CigarOperator op;
+            switch (b) {
+                case D:
+                    op = CigarOperator.DELETION;
+                    break;
+                case I:
+                    throw new ReviewedStingException("Trying to create an insertion in a synthetic read. This operation is currently unsupported.");
+                default:
+                    op = CigarOperator.MATCH_OR_MISMATCH;
+                    break;
+            }
+            if (cigarOperator == null) {
+                if (op == CigarOperator.D)                                  // read cannot start with a deletion
+                    refStart++;                                             // if it does, we need to move the reference start forward
+                else
+                    cigarOperator = op;
+            }
+            else if (cigarOperator != op) {                                 // if this is a new operator, we need to close the previous one
+                cigarElements.add(new CigarElement(length, cigarOperator)); // close previous operator
+                cigarOperator = op;
+                length = 0;
+            }
+
+            if (cigarOperator != null)                                      // only increment the length of the cigar element if we really added it to the read (no leading deletions)
+                length++;
+        }
+        if (length > 0 && cigarOperator != CigarOperator.D)                 // read cannot end with a deletion
+            cigarElements.add(new CigarElement(length, cigarOperator));     // add the last cigar element
+
+        return new Cigar(cigarElements);
+    }
+
+    /**
+     * Shared functionality for all conversion utilities
+     *
+     * @param bases    the read bases
+     * @param variable the list to convert
+     * @return a converted variable given the bases and skipping deletions
+     */
+
+    private static byte [] convertVariableGivenBases (List<BaseIndex> bases, List<Byte> variable) {
+        byte [] variableArray = new byte[getReadLengthWithNoDeletions(bases)];
+        int i = 0;
+        Iterator<Byte> variableIterator = variable.iterator();
+        for (BaseIndex baseIndex : bases) {
+            byte count = variableIterator.next();
+            if (baseIndex != BaseIndex.D)
+                variableArray[i++] = count;
+        }
+        return variableArray;
+
+    }
+
+    /**
+     * Shared functionality for all conversion utilities
+     *
+     * @param bases  the read bases
+     * @return the length of the read with no deletions
+     */
+    private static int getReadLengthWithNoDeletions(List<BaseIndex> bases) {
+        int readLength = bases.size();
+        for (BaseIndex baseIndex : bases)
+            if (baseIndex == BaseIndex.D)
+                readLength--;
+        return readLength;
+    }
+
+
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/BaseCountsUnitTest.java
@ -0,0 +1,69 @@
+// our package
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+
+// the imports for unit testing.
+
+
+import org.broadinstitute.sting.BaseTest;
+import org.testng.Assert;
+import org.testng.annotations.DataProvider;
+import org.testng.annotations.Test;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Basic unit test for BaseCounts in reduced reads
+ */
+public class BaseCountsUnitTest extends BaseTest {
+    private class SingleTest {
+        public String bases;
+        public byte mostCountBase;
+        public int mostCommonCount;
+
+        private SingleTest(String bases, char mostCountBase, int mostCommonCount) {
+            this.mostCommonCount = mostCommonCount;
+            this.mostCountBase = (byte)mostCountBase;
+            this.bases = bases;
+        }
+    }
+
+
+    @DataProvider(name = "data")
+    public Object[][] createData1() {
+        List<SingleTest> params = new ArrayList<SingleTest>();
+
+        params.add(new SingleTest("A", 'A', 1 ));
+        params.add(new SingleTest("AA", 'A', 2 ));
+        params.add(new SingleTest("AC", 'A', 1 ));
+        params.add(new SingleTest("AAC", 'A', 2 ));
+        params.add(new SingleTest("AAA", 'A', 3 ));
+        params.add(new SingleTest("AAAN", 'A', 3 ));
+        params.add(new SingleTest("AAANNNN", 'N', 4 ));
+        params.add(new SingleTest("AACTG", 'A', 2 ));
+        params.add(new SingleTest("D", 'D', 1 ));
+        params.add(new SingleTest("DDAAD", 'D', 3));
+        params.add(new SingleTest("", (char)BaseCounts.MAX_BASE_WITH_NO_COUNTS, 0 ));
+        params.add(new SingleTest("AAIIIAI", 'I', 4 ));
+
+        List<Object[]> params2 = new ArrayList<Object[]>();
+        for ( SingleTest x : params ) params2.add(new Object[]{x});
+        return params2.toArray(new Object[][]{});
+    }
+
+
+
+    @Test(dataProvider = "data", enabled = true)
+    public void testCounting(SingleTest params) {
+        BaseCounts counts = new BaseCounts();
+
+        for ( byte base : params.bases.getBytes() )
+            counts.incr(base);
+
+        String name = String.format("Test-%s", params.bases);
+        Assert.assertEquals(counts.totalCount(), params.bases.length(), name);
+        Assert.assertEquals(counts.countOfMostCommonBase(), params.mostCommonCount, name);
+        Assert.assertEquals((char)counts.baseWithMostCounts(), (char)params.mostCountBase, name);
+    }
+}
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/ReduceReadsIntegrationTest.java
@ -0,0 +1,68 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import org.broadinstitute.sting.WalkerTest;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+
+public class ReduceReadsIntegrationTest extends WalkerTest {
+    final static String REF = b37KGReference;
+    final String BAM = validationDataLocation + "NA12878.HiSeq.b37.chr20.10_11mb.bam";
+    final String DELETION_BAM = validationDataLocation + "filtered_deletion_for_reduce_reads.bam";
+    final String STASH_BAM = validationDataLocation + "ReduceReadsStashBug.bam";
+    final String STASH_L = " -L 14:73718184-73718284 -L 14:73718294-73718330 -L 14:73718360-73718556";
+    final String L = " -L 20:10,100,000-10,120,000 ";
+
+    private void RRTest(String testName, String args, String md5) {
+        String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, BAM) + " -o %s ";
+        WalkerTestSpec spec = new WalkerTestSpec(base + args, Arrays.asList(md5));
+        executeTest(testName, spec);
+    }
+
+    @Test(enabled = true)
+    public void testDefaultCompression() {
+        RRTest("testDefaultCompression ", L, "4c92d59d4a5292af1f968dc922c2c63e");
+    }
+
+    @Test(enabled = true)
+    public void testMultipleIntervals() {
+        String intervals = "-L 20:10,100,000-10,100,500 -L 20:10,200,000-10,200,500 -L 20:10,300,000-10,300,500 -L 20:10,400,000-10,500,000 -L 20:10,500,050-10,500,060 -L 20:10,600,000-10,600,015 -L 20:10,700,000-10,700,110";
+        RRTest("testMultipleIntervals ", intervals, "97d5c3fda5551741676793ba325ec7ed");
+    }
+
+    @Test(enabled = true)
+    public void testHighCompression() {
+        RRTest("testHighCompression ", " -cs 10 -minvar 0.3 -mindel 0.3 " + L, "e6bc1cd0e9de961cf0fb1789bf6ab108");
+    }
+
+    @Test(enabled = true)
+    public void testLowCompression() {
+        RRTest("testLowCompression ", " -cs 30 -minvar 0.01 -mindel 0.01 -minmap 5 -minqual 5 " + L, "f33ec7cd0b98eebd73d1025ca656cd7e");
+    }
+
+    @Test(enabled = true)
+    public void testIndelCompression() {
+        RRTest("testIndelCompression ", " -cs 50 -L 20:10,100,500-10,100,600 ", "e6e2bc889e4f342a7fedc5d38b391d20");
+    }
+
+    @Test(enabled = true)
+    public void testFilteredDeletionCompression() {
+        String base = String.format("-T ReduceReads -npt -R %s -I %s ", REF, DELETION_BAM) + " -o %s ";
+        executeTest("testFilteredDeletionCompression", new WalkerTestSpec(base, Arrays.asList("891bd6dcda66611f343e8ff25f34aaeb")));
+    }
+
+    /**
+     * Bug reported by Adam where a read that got clipped before actually belongs 2 intervals ahead
+     * and a subsequent tail leaves only this read in the stash. The next read to come in is in fact 
+     * before (alignment start) than this read, so the TreeSet breaks with a Key out of Range error
+     * that was freaking hard to catch. 
+     * 
+     * This bam is simplified to replicate the exact bug with the three provided intervals.
+     */
+    @Test(enabled = true)
+    public void testAddingReadAfterTailingTheStash() {
+        String base = String.format("-T ReduceReads %s -npt -R %s -I %s", STASH_L, REF, STASH_BAM) + " -o %s ";
+        executeTest("testAddingReadAfterTailingTheStash", new WalkerTestSpec(base, Arrays.asList("022931f032a4122cfe41e58e74d0aede")));
+    }
+}
+
--- a/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java
+++ b/protected/java/test/org/broadinstitute/sting/gatk/walkers/compression/reducereads/SyntheticReadUnitTest.java
@ -0,0 +1,85 @@
+package org.broadinstitute.sting.gatk.walkers.compression.reducereads;
+
+import net.sf.samtools.SAMFileHeader;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
+import org.broadinstitute.sting.utils.sam.GATKSAMReadGroupRecord;
+import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+import java.util.Arrays;
+import java.util.Random;
+
+public class SyntheticReadUnitTest extends BaseTest {
+    final SAMFileHeader artificialSAMHeader = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1);
+    final GATKSAMReadGroupRecord artificialGATKRG = new GATKSAMReadGroupRecord("synthetic");
+    final String artificialContig = "1";
+    final int artificialContigIndex = 0;
+    final String artificialReadName = "synth";
+    final int artificialRefStart = 1;
+    final double artificialMappingQuality = 60;
+
+    final Random random = new Random(8854875);
+
+
+@Test
+public void testBaseCounts() {
+        BaseIndex [] bases = new BaseIndex[] {BaseIndex.A,BaseIndex.A,BaseIndex.A,BaseIndex.A};
+        Byte[] quals = new Byte[] {20, 20, 20, 20 };
+
+        TestRead [] testReads = new TestRead [] {
+                new TestRead(bases, quals, new Byte[] {100, 100, 100, 101}, new byte [] {100, 0, 0, 1}),
+                new TestRead(bases, quals, new Byte[] {1, 100, 100, 0},     new byte [] {1, 99, 99, -1}),
+                new TestRead(bases, quals, new Byte[] {127, 100, 0, 1},     new byte [] {127, -27, -127, -126}),
+                new TestRead(bases, quals, new Byte[] {1, 127, 51, 126},    new byte [] {1, 126, 50, 125})};
+
+        for (TestRead testRead : testReads) {
+            SyntheticRead syntheticRead = new SyntheticRead(Arrays.asList(testRead.getBases()), Arrays.asList(testRead.getCounts()), Arrays.asList(testRead.getQuals()), Arrays.asList(testRead.getInsQuals()), Arrays.asList(testRead.getDelQuals()), artificialMappingQuality, GATKSAMRecord.REDUCED_READ_CONSENSUS_TAG, artificialSAMHeader, artificialGATKRG, artificialContig, artificialContigIndex, artificialReadName, artificialRefStart, false);
+            Assert.assertEquals(syntheticRead.convertBaseCounts(), testRead.getExpectedCounts());
+        }
+}
+
+private class TestRead {
+    BaseIndex[] bases;
+    Byte[] quals;
+    Byte[] insQuals;
+    Byte[] delQuals;
+    Byte[] counts;
+    byte [] expectedCounts;
+
+    private TestRead(BaseIndex[] bases, Byte[] quals, Byte[] counts, byte[] expectedCounts) {
+        this.bases = bases;
+        this.quals = quals;
+        this.insQuals = quals;
+        this.delQuals = quals;
+        this.counts = counts;
+        this.expectedCounts = expectedCounts;
+    }
+
+    public BaseIndex[] getBases() {
+        return bases;
+    }
+
+    public Byte[] getQuals() {
+        return quals;
+    }
+
+    public Byte[] getInsQuals() {
+        return insQuals;
+    }
+
+    public Byte[] getDelQuals() {
+        return delQuals;
+    }
+
+    public Byte[] getCounts() {
+        return counts;
+    }
+
+    public byte[] getExpectedCounts() {
+        return expectedCounts;
+    }
+}
+
+}