Merge with master

-- A few bug fixes
2011-11-19 09:56:06 -05:00 · 2011-11-19 09:56:06 -05:00 · 73119c8e3c
parent f685fff79b b5de182014
commit 73119c8e3c
81 changed files with 5126 additions and 2935 deletions
--- a/ivy.xml
+++ b/ivy.xml
@ -76,7 +76,7 @@
    <dependency org="org.apache.poi" name="poi-ooxml" rev="3.8-beta3" />

    <!-- snpEff annotator for pipelines -->
-    <dependency org="net.sf.snpeff" name="snpeff" rev="2.0.2" />
+    <dependency org="net.sf.snpeff" name="snpeff" rev="2.0.4rc3" />

    <!-- Exclude dependencies on sun libraries where the downloads aren't available but included in the jvm. -->
    <exclude org="javax.servlet" />
--- a/public/java/src/net/sf/samtools/BAMFileReader.java
+++ b/public/java/src/net/sf/samtools/BAMFileReader.java
@ -0,0 +1,762 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+package net.sf.samtools;
+
+
+import net.sf.samtools.util.*;
+import net.sf.samtools.SAMFileReader.ValidationStringency;
+
+import java.io.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+import java.util.NoSuchElementException;
+
+/**
+ * Internal class for reading and querying BAM files.
+ */
+class BAMFileReader extends SAMFileReader.ReaderImplementation {
+    // True if reading from a File rather than an InputStream
+    private boolean mIsSeekable = false;
+
+    // For converting bytes into other primitive types
+    private BinaryCodec mStream = null;
+
+    // Underlying compressed data stream.
+    private final BAMInputStream mInputStream;
+    private SAMFileHeader mFileHeader = null;
+
+    // Populated if the file is seekable and an index exists
+    private File mIndexFile;
+    private BAMIndex mIndex = null;
+    private long mFirstRecordPointer = 0;
+    private CloseableIterator<SAMRecord> mCurrentIterator = null;
+
+    // If true, all SAMRecords are fully decoded as they are read.
+    private final boolean eagerDecode;
+
+    // For error-checking.
+    private ValidationStringency mValidationStringency;
+
+    // For creating BAMRecords
+    private SAMRecordFactory samRecordFactory;
+
+    /**
+     * Use the caching index reader implementation rather than the disk-hit-per-file model.
+     */
+    private boolean mEnableIndexCaching = false;
+
+    /**
+     * Use the traditional memory-mapped implementation for BAM file indexes rather than regular I/O.
+     */
+    private boolean mEnableIndexMemoryMapping = true;
+
+    /**
+     * Add information about the origin (reader and position) to SAM records.
+     */
+    private SAMFileReader mFileReader = null;
+
+    /**
+     * Prepare to read BAM from a stream (not seekable)
+     * @param stream source of bytes.
+     * @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
+     * @param validationStringency Controls how to handle invalidate reads or header lines.
+     */
+    BAMFileReader(final InputStream stream,
+                  final File indexFile,
+                  final boolean eagerDecode,
+                  final ValidationStringency validationStringency,
+                  final SAMRecordFactory factory)
+        throws IOException {
+        mIndexFile = indexFile;
+        mIsSeekable = false;
+        mInputStream = stream instanceof BAMInputStream ? (BAMInputStream)stream : new BlockCompressedInputStream(stream);
+        mStream = new BinaryCodec(new DataInputStream((InputStream)mInputStream));
+        this.eagerDecode = eagerDecode;
+        this.mValidationStringency = validationStringency;
+        this.samRecordFactory = factory;
+        readHeader(null);
+    }
+
+    /**
+     * Prepare to read BAM from a file (seekable)
+     * @param file source of bytes.
+     * @param eagerDecode if true, decode all BAM fields as reading rather than lazily.
+     * @param validationStringency Controls how to handle invalidate reads or header lines.
+     */
+    BAMFileReader(final File file,
+                  final File indexFile,
+                  final boolean eagerDecode,
+                  final ValidationStringency validationStringency,
+                  final SAMRecordFactory factory)
+        throws IOException {
+        this(new BlockCompressedInputStream(file), indexFile!=null ? indexFile : findIndexFile(file), eagerDecode, file.getAbsolutePath(), validationStringency, factory);
+        if (mIndexFile != null && mIndexFile.lastModified() < file.lastModified()) {
+            System.err.println("WARNING: BAM index file " + mIndexFile.getAbsolutePath() +
+                    " is older than BAM " + file.getAbsolutePath());
+        }        
+    }
+
+    BAMFileReader(final SeekableStream strm,
+                  final File indexFile,
+                  final boolean eagerDecode,
+                  final ValidationStringency validationStringency,
+                  final SAMRecordFactory factory)
+        throws IOException {
+        this(strm instanceof BAMInputStream ? (BAMInputStream)strm : new BlockCompressedInputStream(strm),
+                indexFile,
+                eagerDecode,
+                strm.getSource(),
+                validationStringency,
+                factory);
+    }
+
+    private BAMFileReader(final BAMInputStream inputStream,
+                          final File indexFile,
+                          final boolean eagerDecode,
+                          final String source,
+                          final ValidationStringency validationStringency,
+                          final SAMRecordFactory factory)
+        throws IOException {
+        mIndexFile = indexFile;
+        mIsSeekable = true;
+        mInputStream = inputStream;
+        mStream = new BinaryCodec(new DataInputStream((InputStream)inputStream));
+        this.eagerDecode = eagerDecode;
+        this.mValidationStringency = validationStringency;
+        this.samRecordFactory = factory;
+        readHeader(source);
+        mFirstRecordPointer = inputStream.getFilePointer();
+    }
+
+    /**
+     * If true, writes the source of every read into the source SAMRecords.
+     * @param enabled true to write source information into each SAMRecord.
+     */
+    void enableFileSource(final SAMFileReader reader, final boolean enabled) {
+        this.mFileReader = enabled ? reader : null;
+    }
+
+    /**
+     * If true, uses the caching version of the index reader.
+     * @param enabled true to write source information into each SAMRecord.
+     */
+    public void enableIndexCaching(final boolean enabled) {
+        if(mIndex != null)
+            throw new SAMException("Unable to turn on index caching; index file has already been loaded.");
+        this.mEnableIndexCaching = enabled;
+    }
+
+    /**
+     * If false, disable the use of memory mapping for accessing index files (default behavior is to use memory mapping).
+     * This is slower but more scalable when accessing large numbers of BAM files sequentially.
+     * @param enabled True to use memory mapping, false to use regular I/O.
+     */
+    public void enableIndexMemoryMapping(final boolean enabled) {
+        if (mIndex != null) {
+            throw new SAMException("Unable to change index memory mapping; index file has already been loaded.");
+        }
+        this.mEnableIndexMemoryMapping = enabled;
+    }
+
+    @Override void enableCrcChecking(final boolean enabled) {
+        this.mInputStream.setCheckCrcs(enabled);
+    }
+
+    @Override void setSAMRecordFactory(final SAMRecordFactory factory) { this.samRecordFactory = factory; }
+
+    /**
+     * @return true if ths is a BAM file, and has an index
+     */
+    public boolean hasIndex() {
+        return (mIndexFile != null);
+    }
+
+    /**
+     * Retrieves the index for the given file type.  Ensure that the index is of the specified type.
+     * @return An index of the given type.
+     */
+    public BAMIndex getIndex() {
+        if(mIndexFile == null)
+            throw new SAMException("No index is available for this BAM file.");
+        if(mIndex == null)
+            mIndex = mEnableIndexCaching ? new CachingBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping)
+                                         : new DiskBasedBAMFileIndex(mIndexFile, getFileHeader().getSequenceDictionary(), mEnableIndexMemoryMapping);
+        return mIndex;
+    }
+
+    void close() {
+        if (mStream != null) {
+            mStream.close();
+        }
+        if (mIndex != null) {
+            mIndex.close();
+        }
+        mStream = null;
+        mFileHeader = null;
+        mIndex = null;
+    }
+
+    SAMFileHeader getFileHeader() {
+        return mFileHeader;
+    }
+
+    /**
+     * Set error-checking level for subsequent SAMRecord reads.
+     */
+    void setValidationStringency(final SAMFileReader.ValidationStringency validationStringency) {
+        this.mValidationStringency = validationStringency;
+    }
+
+    SAMFileReader.ValidationStringency getValidationStringency() {
+        return this.mValidationStringency;
+    }
+
+    /**
+     * Prepare to iterate through the SAMRecords in file order.
+     * Only a single iterator on a BAM file can be extant at a time.  If getIterator() or a query method has been called once,
+     * that iterator must be closed before getIterator() can be called again.
+     * A somewhat peculiar aspect of this method is that if the file is not seekable, a second call to
+     * getIterator() begins its iteration where the last one left off.  That is the best that can be
+     * done in that situation.
+     */
+    CloseableIterator<SAMRecord> getIterator() {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (mIsSeekable) {
+            try {
+                mInputStream.seek(mFirstRecordPointer);
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+        mCurrentIterator = new BAMFileIterator();
+        return mCurrentIterator;
+    }
+
+    @Override
+    CloseableIterator<SAMRecord> getIterator(final SAMFileSpan chunks) {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (!(chunks instanceof BAMFileSpan)) {
+            throw new IllegalStateException("BAMFileReader cannot handle this type of file span.");
+        }
+
+        // Create an iterator over the given chunk boundaries.
+        mCurrentIterator = new BAMFileIndexIterator(((BAMFileSpan)chunks).toCoordinateArray());
+        return mCurrentIterator;
+    }
+
+    /**
+     * Gets an unbounded pointer to the first record in the BAM file.  Because the reader doesn't necessarily know
+     * when the file ends, the rightmost bound of the file pointer will not end exactly where the file ends.  However,
+     * the rightmost bound is guaranteed to be after the last read in the file.
+     * @return An unbounded pointer to the first record in the BAM file.
+     */
+    @Override
+    SAMFileSpan getFilePointerSpanningReads() {
+        return new BAMFileSpan(new Chunk(mFirstRecordPointer,Long.MAX_VALUE));
+    }
+
+    /**
+     * Prepare to iterate through the SAMRecords that match the given interval.
+     * Only a single iterator on a BAMFile can be extant at a time.  The previous one must be closed
+     * before calling any of the methods that return an iterator.
+     *
+     * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
+     * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
+     * matches the specified interval.
+     *
+     * Note that this method is not necessarily efficient in terms of disk I/O.  The index does not have perfect
+     * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
+     *
+     * @param sequence Reference sequence sought.
+     * @param start Desired SAMRecords must overlap or be contained in the interval specified by start and end.
+     * A value of zero implies the start of the reference sequence.
+     * @param end A value of zero implies the end of the reference sequence.
+     * @param contained If true, the alignments for the SAMRecords must be completely contained in the interval
+     * specified by start and end.  If false, the SAMRecords need only overlap the interval.
+     * @return Iterator for the matching SAMRecords
+     */
+    CloseableIterator<SAMRecord> query(final String sequence, final int start, final int end, final boolean contained) {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (!mIsSeekable) {
+            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
+        }
+        mCurrentIterator = createIndexIterator(sequence, start, end, contained? QueryType.CONTAINED: QueryType.OVERLAPPING);
+        return mCurrentIterator;
+    }
+
+    /**
+     * Prepare to iterate through the SAMRecords with the given alignment start.
+     * Only a single iterator on a BAMFile can be extant at a time.  The previous one must be closed
+     * before calling any of the methods that return an iterator.
+     *
+     * Note that an unmapped SAMRecord may still have a reference name and an alignment start for sorting
+     * purposes (typically this is the coordinate of its mate), and will be found by this method if the coordinate
+     * matches the specified interval.
+     *
+     * Note that this method is not necessarily efficient in terms of disk I/O.  The index does not have perfect
+     * resolution, so some SAMRecords may be read and then discarded because they do not match the specified interval.
+     *
+     * @param sequence Reference sequence sought.
+     * @param start Alignment start sought.
+     * @return Iterator for the matching SAMRecords.
+     */
+    CloseableIterator<SAMRecord> queryAlignmentStart(final String sequence, final int start) {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (!mIsSeekable) {
+            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
+        }
+        mCurrentIterator = createIndexIterator(sequence, start, -1, QueryType.STARTING_AT);
+        return mCurrentIterator;
+    }
+
+    public CloseableIterator<SAMRecord> queryUnmapped() {
+        if (mStream == null) {
+            throw new IllegalStateException("File reader is closed");
+        }
+        if (mCurrentIterator != null) {
+            throw new IllegalStateException("Iteration in progress");
+        }
+        if (!mIsSeekable) {
+            throw new UnsupportedOperationException("Cannot query stream-based BAM file");
+        }
+        try {
+            final long startOfLastLinearBin = getIndex().getStartOfLastLinearBin();
+            if (startOfLastLinearBin != -1) {
+                mInputStream.seek(startOfLastLinearBin);
+            } else {
+                // No mapped reads in file, just start at the first read in file.
+                mInputStream.seek(mFirstRecordPointer);
+            }
+            mCurrentIterator = new BAMFileIndexUnmappedIterator();
+            return mCurrentIterator;
+        } catch (IOException e) {
+            throw new RuntimeException("IOException seeking to unmapped reads", e);
+        }
+    }
+
+    /**
+     * Reads the header from the file or stream
+     * @param source Note that this is used only for reporting errors.
+     */
+    private void readHeader(final String source)
+        throws IOException {
+
+        final byte[] buffer = new byte[4];
+        mStream.readBytes(buffer);
+        if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) {
+            throw new IOException("Invalid BAM file header");
+        }
+
+        final int headerTextLength = mStream.readInt();
+        final String textHeader = mStream.readString(headerTextLength);
+        final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
+        headerCodec.setValidationStringency(mValidationStringency);
+        mFileHeader = headerCodec.decode(new StringLineReader(textHeader),
+                source);
+
+        final int sequenceCount = mStream.readInt();
+        if (mFileHeader.getSequenceDictionary().size() > 0) {
+            // It is allowed to have binary sequences but no text sequences, so only validate if both are present
+            if (sequenceCount != mFileHeader.getSequenceDictionary().size()) {
+                throw new SAMFormatException("Number of sequences in text header (" +
+                        mFileHeader.getSequenceDictionary().size() +
+                        ") != number of sequences in binary header (" + sequenceCount + ") for file " + source);
+            }
+            for (int i = 0; i < sequenceCount; i++) {
+                final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(source);
+                final SAMSequenceRecord sequenceRecord = mFileHeader.getSequence(i);
+                if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
+                    throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " +
+                            source);
+                }
+                if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
+                    throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " +
+                            source);
+                }
+            }
+        } else {
+            // If only binary sequences are present, copy them into mFileHeader
+            final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
+            for (int i = 0; i < sequenceCount; i++) {
+                sequences.add(readSequenceRecord(source));
+            }
+            mFileHeader.setSequenceDictionary(new SAMSequenceDictionary(sequences));
+        }
+    }
+
+    /**
+     * Reads a single binary sequence record from the file or stream
+     * @param source Note that this is used only for reporting errors.
+     */
+    private SAMSequenceRecord readSequenceRecord(final String source) {
+        final int nameLength = mStream.readInt();
+        if (nameLength <= 1) {
+            throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + source);
+        }
+        final String sequenceName = mStream.readString(nameLength - 1);
+        // Skip the null terminator
+        mStream.readByte();
+        final int sequenceLength = mStream.readInt();
+        return new SAMSequenceRecord(SAMSequenceRecord.truncateSequenceName(sequenceName), sequenceLength);
+    }
+
+    /**
+     * Iterator for non-indexed sequential iteration through all SAMRecords in file.
+     * Starting point of iteration is wherever current file position is when the iterator is constructed.
+     */
+    private class BAMFileIterator implements CloseableIterator<SAMRecord> {
+        private SAMRecord mNextRecord = null;
+        private final BAMRecordCodec bamRecordCodec;
+        private long samRecordIndex = 0; // Records at what position (counted in records) we are at in the file
+
+        BAMFileIterator() {
+            this(true);
+        }
+
+        /**
+         * @param advance Trick to enable subclass to do more setup before advancing
+         */
+        BAMFileIterator(final boolean advance) {
+            this.bamRecordCodec = new BAMRecordCodec(getFileHeader(), samRecordFactory);
+            this.bamRecordCodec.setInputStream(BAMFileReader.this.mStream.getInputStream());
+
+            if (advance) {
+                advance();
+            }
+        }
+
+        public void close() {
+            if (mCurrentIterator != null && this != mCurrentIterator) {
+                throw new IllegalStateException("Attempt to close non-current iterator");
+            }
+            mCurrentIterator = null;
+        }
+
+        public boolean hasNext() {
+            return (mNextRecord != null);
+        }
+
+        public SAMRecord next() {
+            final SAMRecord result = mNextRecord;
+            advance();
+            return result;
+        }
+
+        public void remove() {
+            throw new UnsupportedOperationException("Not supported: remove");
+        }
+
+        void advance() {
+            try {
+                mNextRecord = getNextRecord();
+
+                if (mNextRecord != null) {
+                    ++this.samRecordIndex;
+                    // Because some decoding is done lazily, the record needs to remember the validation stringency.
+                    mNextRecord.setValidationStringency(mValidationStringency);
+
+                    if (mValidationStringency != ValidationStringency.SILENT) {
+                        final List<SAMValidationError> validationErrors = mNextRecord.isValid();
+                        SAMUtils.processValidationErrors(validationErrors,
+                                this.samRecordIndex, BAMFileReader.this.getValidationStringency());
+                    }
+                }
+                if (eagerDecode && mNextRecord != null) {
+                    mNextRecord.eagerDecode();
+                }
+            } catch (IOException exc) {
+                throw new RuntimeException(exc.getMessage(), exc);
+            }
+        }
+
+        /**
+         * Read the next record from the input stream.
+         */
+        SAMRecord getNextRecord() throws IOException {
+            final long startCoordinate = mInputStream.getFilePointer();
+            final SAMRecord next = bamRecordCodec.decode();
+            final long stopCoordinate = mInputStream.getFilePointer();
+
+            if(mFileReader != null && next != null)
+                next.setFileSource(new SAMFileSource(mFileReader,new BAMFileSpan(new Chunk(startCoordinate,stopCoordinate))));
+
+            return next;
+        }
+
+        /**
+         * @return The record that will be return by the next call to next()
+         */
+        protected SAMRecord peek() {
+            return mNextRecord;
+        }
+    }
+
+    /**
+     * Prepare to iterate through SAMRecords matching the target interval.
+     * @param sequence Desired reference sequence.
+     * @param start 1-based start of target interval, inclusive.
+     * @param end 1-based end of target interval, inclusive.
+     * @param queryType contained, overlapping, or starting-at query.
+     */
+    private CloseableIterator<SAMRecord> createIndexIterator(final String sequence,
+                                                             final int start,
+                                                             final int end,
+                                                             final QueryType queryType) {
+        long[] filePointers = null;
+
+        // Hit the index to determine the chunk boundaries for the required data.
+        final SAMFileHeader fileHeader = getFileHeader();
+        final int referenceIndex = fileHeader.getSequenceIndex(sequence);
+        if (referenceIndex != -1) {
+            final BAMIndex fileIndex = getIndex();
+            final BAMFileSpan fileSpan = fileIndex.getSpanOverlapping(referenceIndex, start, end);
+            filePointers = fileSpan != null ? fileSpan.toCoordinateArray() : null;
+        }
+
+        // Create an iterator over the above chunk boundaries.
+        final BAMFileIndexIterator iterator = new BAMFileIndexIterator(filePointers);
+
+        // Add some preprocessing filters for edge-case reads that don't fit into this
+        // query type.
+        return new BAMQueryFilteringIterator(iterator,sequence,start,end,queryType);
+    }
+
+    enum QueryType {CONTAINED, OVERLAPPING, STARTING_AT}
+
+    /**
+     * Look for BAM index file according to standard naming convention.
+     *
+     * @param dataFile BAM file name.
+     * @return Index file name, or null if not found.
+     */
+    private static File findIndexFile(final File dataFile) {
+        // If input is foo.bam, look for foo.bai
+        final String bamExtension = ".bam";
+        File indexFile;
+        final String fileName = dataFile.getName();
+        if (fileName.endsWith(bamExtension)) {
+            final String bai = fileName.substring(0, fileName.length() - bamExtension.length()) + BAMIndex.BAMIndexSuffix;
+            indexFile = new File(dataFile.getParent(), bai);
+            if (indexFile.exists()) {
+                return indexFile;
+            }
+        }
+
+        // If foo.bai doesn't exist look for foo.bam.bai
+        indexFile = new File(dataFile.getParent(), dataFile.getName() + ".bai");
+        if (indexFile.exists()) {
+            return indexFile;
+        } else {
+            return null;
+        }
+    }    
+
+    private class BAMFileIndexIterator extends BAMFileIterator {
+
+        private long[] mFilePointers = null;
+        private int mFilePointerIndex = 0;
+        private long mFilePointerLimit = -1;
+
+        /**
+         * Prepare to iterate through SAMRecords stored in the specified compressed blocks at the given offset.
+         * @param filePointers the block / offset combination, stored in chunk format.
+         */
+        BAMFileIndexIterator(final long[] filePointers) {
+            super(false);  // delay advance() until after construction
+            mFilePointers = filePointers;
+            advance();
+        }
+
+        SAMRecord getNextRecord()
+            throws IOException {
+            // Advance to next file block if necessary
+            while (mInputStream.getFilePointer() >= mFilePointerLimit) {
+                if (mFilePointers == null ||
+                        mFilePointerIndex >= mFilePointers.length) {
+                    return null;
+                }
+                final long startOffset = mFilePointers[mFilePointerIndex++];
+                final long endOffset = mFilePointers[mFilePointerIndex++];
+                mInputStream.seek(startOffset);
+                mFilePointerLimit = endOffset;
+            }
+            // Pull next record from stream
+            return super.getNextRecord();
+        }
+    }
+
+    /**
+     * A decorating iterator that filters out records that are outside the bounds of the
+     * given query parameters.
+     */
+    private class BAMQueryFilteringIterator implements CloseableIterator<SAMRecord> {
+        /**
+         * The wrapped iterator.
+         */
+        private final CloseableIterator<SAMRecord> wrappedIterator;
+
+        /**
+         * The next record to be returned.  Will be null if no such record exists.
+         */
+        private SAMRecord mNextRecord;
+
+        private final int mReferenceIndex;
+        private final int mRegionStart;
+        private final int mRegionEnd;
+        private final QueryType mQueryType;
+
+        public BAMQueryFilteringIterator(final CloseableIterator<SAMRecord> iterator,final String sequence, final int start, final int end, final QueryType queryType) {
+            this.wrappedIterator = iterator;
+            final SAMFileHeader fileHeader = getFileHeader();
+            mReferenceIndex = fileHeader.getSequenceIndex(sequence);
+            mRegionStart = start;
+            if (queryType == QueryType.STARTING_AT) {
+                mRegionEnd = mRegionStart;
+            } else {
+                mRegionEnd = (end <= 0) ? Integer.MAX_VALUE : end;
+            }
+            mQueryType = queryType;
+            mNextRecord = advance();
+        }
+
+        /**
+         * Returns true if a next element exists; false otherwise.
+         */
+        public boolean hasNext() {
+            return mNextRecord != null;
+        }
+
+        /**
+         * Gets the next record from the given iterator.
+         * @return The next SAM record in the iterator.
+         */
+        public SAMRecord next() {
+            if(!hasNext())
+                throw new NoSuchElementException("BAMQueryFilteringIterator: no next element available");
+            final SAMRecord currentRead = mNextRecord;
+            mNextRecord = advance();
+            return currentRead;
+        }
+
+        /**
+         * Closes down the existing iterator.
+         */
+        public void close() {
+            if (this != mCurrentIterator) {
+                throw new IllegalStateException("Attempt to close non-current iterator");
+            }
+            mCurrentIterator = null;
+        }
+
+        /**
+         * @throws UnsupportedOperationException always.
+         */
+        public void remove() {
+            throw new UnsupportedOperationException("Not supported: remove");
+        }
+
+        SAMRecord advance() {
+            while (true) {
+                // Pull next record from stream
+                if(!wrappedIterator.hasNext())
+                    return null;
+
+                final SAMRecord record = wrappedIterator.next();
+                // If beyond the end of this reference sequence, end iteration
+                final int referenceIndex = record.getReferenceIndex();
+                if (referenceIndex != mReferenceIndex) {
+                    if (referenceIndex < 0 ||
+                        referenceIndex > mReferenceIndex) {
+                        return null;
+                    }
+                    // If before this reference sequence, continue
+                    continue;
+                }
+                if (mRegionStart == 0 && mRegionEnd == Integer.MAX_VALUE) {
+                    // Quick exit to avoid expensive alignment end calculation
+                    return record;
+                }
+                final int alignmentStart = record.getAlignmentStart();
+                // If read is unmapped but has a coordinate, return it if the coordinate is within
+                // the query region, regardless of whether the mapped mate will be returned.
+                final int alignmentEnd;
+                if (mQueryType == QueryType.STARTING_AT) {
+                    alignmentEnd = -1;
+                } else {
+                    alignmentEnd = (record.getAlignmentEnd() != SAMRecord.NO_ALIGNMENT_START?
+                            record.getAlignmentEnd(): alignmentStart);
+                }
+
+                if (alignmentStart > mRegionEnd) {
+                    // If scanned beyond target region, end iteration
+                    return null;
+                }
+                // Filter for overlap with region
+                if (mQueryType == QueryType.CONTAINED) {
+                    if (alignmentStart >= mRegionStart && alignmentEnd <= mRegionEnd) {
+                        return record;
+                    }
+                } else if (mQueryType == QueryType.OVERLAPPING) {
+                    if (alignmentEnd >= mRegionStart && alignmentStart <= mRegionEnd) {
+                        return record;
+                    }
+                } else {
+                    if (alignmentStart == mRegionStart) {
+                        return record;
+                    }
+                }
+            }
+        }
+    }
+
+    private class BAMFileIndexUnmappedIterator extends BAMFileIterator  {
+        private BAMFileIndexUnmappedIterator() {
+            while (this.hasNext() && peek().getReferenceIndex() != SAMRecord.NO_ALIGNMENT_REFERENCE_INDEX) {
+                advance();
+            }
+        }
+    }
+
+}
--- a/public/java/src/net/sf/samtools/GATKBAMFileSpan.java
+++ b/public/java/src/net/sf/samtools/GATKBAMFileSpan.java
@ -25,6 +25,7 @@
 package net.sf.samtools;

 import net.sf.picard.util.PeekableIterator;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

 import java.util.ArrayList;
 import java.util.Arrays;
@ -47,6 +48,18 @@ public class GATKBAMFileSpan extends BAMFileSpan {
        super();
    }

+    /**
+     * Create a new GATKBAMFileSpan from an existing BAMFileSpan.
+     * @param sourceFileSpan
+     */
+    public GATKBAMFileSpan(SAMFileSpan sourceFileSpan) {
+        if(!(sourceFileSpan instanceof BAMFileSpan))
+            throw new SAMException("Unable to create GATKBAMFileSpan from a SAMFileSpan. Please submit a BAMFileSpan instead");
+        BAMFileSpan sourceBAMFileSpan = (BAMFileSpan)sourceFileSpan;
+        for(Chunk chunk: sourceBAMFileSpan.getChunks())
+            add(chunk instanceof GATKChunk ? chunk : new GATKChunk(chunk));
+    }
+
    /**
     * Convenience constructor to construct a BAM file span from
     * a single chunk.
--- a/public/java/src/net/sf/samtools/GATKChunk.java
+++ b/public/java/src/net/sf/samtools/GATKChunk.java
@ -69,6 +69,22 @@ public class GATKChunk extends Chunk {
        super.setChunkEnd(value);
    }

+    public long getBlockStart() {
+        return getChunkStart() >>> 16;
+    }
+
+    public int getBlockOffsetStart() {
+        return (int)(getChunkStart() & 0xFFFF);
+    }
+
+    public long getBlockEnd() {
+        return getChunkEnd() >>> 16;
+    }
+
+    public int getBlockOffsetEnd() {
+        return ((int)getChunkEnd() & 0xFFFF);
+    }
+
    /**
     * Computes an approximation of the uncompressed size of the
     * chunk, in bytes.  Can be used to determine relative weights
--- a/public/java/src/net/sf/samtools/util/BAMInputStream.java
+++ b/public/java/src/net/sf/samtools/util/BAMInputStream.java
@ -0,0 +1,72 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package net.sf.samtools.util;
+
+import java.io.IOException;
+
+/**
+ * An input stream formulated for use reading BAM files.  Supports
+ */
+public interface BAMInputStream {
+    /**
+     * Seek to the given position in the file.  Note that pos is a special virtual file pointer,
+     * not an actual byte offset.
+     *
+     * @param pos virtual file pointer
+     */
+    public void seek(final long pos) throws IOException;
+
+    /**
+     * @return virtual file pointer that can be passed to seek() to return to the current position.  This is
+     * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
+     * the two.
+     */
+    public long getFilePointer();
+
+    /**
+     * Determines whether or not the inflater will re-calculated the CRC on the decompressed data
+     * and check it against the value stored in the GZIP header.  CRC checking is an expensive
+     * operation and should be used accordingly.
+     */
+    public void setCheckCrcs(final boolean check);
+
+    public int read() throws java.io.IOException;
+
+    public int read(byte[] bytes) throws java.io.IOException;
+
+    public int read(byte[] bytes, int i, int i1) throws java.io.IOException;
+
+    public long skip(long l) throws java.io.IOException;
+
+    public int available() throws java.io.IOException;
+
+    public void close() throws java.io.IOException;
+
+    public void mark(int i);
+
+    public void reset() throws java.io.IOException;
+
+    public boolean markSupported();
+}
--- a/public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java
+++ b/public/java/src/net/sf/samtools/util/BlockCompressedInputStream.java
@ -0,0 +1,483 @@
+/*
+ * The MIT License
+ *
+ * Copyright (c) 2009 The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+package net.sf.samtools.util;
+
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.RandomAccessFile;
+import java.net.URL;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+
+import net.sf.samtools.FileTruncatedException;
+
+/*
+ * Utility class for reading BGZF block compressed files.  The caller can treat this file like any other InputStream.
+ * It probably is not necessary to wrap this stream in a buffering stream, because there is internal buffering.
+ * The advantage of BGZF over conventional GZip format is that BGZF allows for seeking without having to read the
+ * entire file up to the location being sought.  Note that seeking is only possible if the ctor(File) is used.
+ *
+ * c.f. http://samtools.sourceforge.net/SAM1.pdf for details of BGZF format
+ */
+public class BlockCompressedInputStream extends InputStream implements BAMInputStream {
+    private InputStream mStream = null;
+    private SeekableStream mFile = null;
+    private byte[] mFileBuffer = null;
+    private byte[] mCurrentBlock = null;
+    private int mCurrentOffset = 0;
+    private long mBlockAddress = 0;
+    private int mLastBlockLength = 0;
+    private final BlockGunzipper blockGunzipper = new BlockGunzipper();
+
+
+    /**
+     * Note that seek() is not supported if this ctor is used.
+     */
+    public BlockCompressedInputStream(final InputStream stream) {
+        mStream = IOUtil.toBufferedStream(stream);
+        mFile = null;
+    }
+
+    /**
+     * Use this ctor if you wish to call seek()
+     */
+    public BlockCompressedInputStream(final File file)
+        throws IOException {
+        mFile = new SeekableFileStream(file);
+        mStream = null;
+
+    }
+
+    public BlockCompressedInputStream(final URL url) {
+        mFile = new SeekableBufferedStream(new SeekableHTTPStream(url));
+        mStream = null;
+    }
+
+    /**
+     * For providing some arbitrary data source.  No additional buffering is
+     * provided, so if the underlying source is not buffered, wrap it in a
+     * SeekableBufferedStream before passing to this ctor.
+     */
+    public BlockCompressedInputStream(final SeekableStream strm) {
+        mFile = strm;
+        mStream = null;
+    }
+
+    /**
+     * Determines whether or not the inflater will re-calculated the CRC on the decompressed data
+     * and check it against the value stored in the GZIP header.  CRC checking is an expensive
+     * operation and should be used accordingly.
+     */
+    public void setCheckCrcs(final boolean check) {
+        this.blockGunzipper.setCheckCrcs(check);
+    }
+
+    /**
+     * @return the number of bytes that can be read (or skipped over) from this input stream without blocking by the
+     * next caller of a method for this input stream. The next caller might be the same thread or another thread.
+     * Note that although the next caller can read this many bytes without blocking, the available() method call itself
+     * may block in order to fill an internal buffer if it has been exhausted.
+     */
+    public int available()
+        throws IOException {
+        if (mCurrentBlock == null || mCurrentOffset == mCurrentBlock.length) {
+            readBlock();
+        }
+        if (mCurrentBlock == null) {
+            return 0;
+        }
+        return mCurrentBlock.length - mCurrentOffset;
+    }
+
+    /**
+     * Closes the underlying InputStream or RandomAccessFile
+     */
+    public void close()
+        throws IOException {
+        if (mFile != null) {
+            mFile.close();
+            mFile = null;
+        } else if (mStream != null) {
+            mStream.close();
+            mStream = null;
+        }
+        // Encourage garbage collection
+        mFileBuffer = null;
+        mCurrentBlock = null;
+    }
+
+    /**
+     * Reads the next byte of data from the input stream. The value byte is returned as an int in the range 0 to 255.
+     * If no byte is available because the end of the stream has been reached, the value -1 is returned.
+     * This method blocks until input data is available, the end of the stream is detected, or an exception is thrown.
+
+     * @return the next byte of data, or -1 if the end of the stream is reached.
+     */
+    public int read()
+        throws IOException {
+        return (available() > 0) ? mCurrentBlock[mCurrentOffset++] : -1;
+    }
+
+    /**
+     * Reads some number of bytes from the input stream and stores them into the buffer array b. The number of bytes
+     * actually read is returned as an integer. This method blocks until input data is available, end of file is detected,
+     * or an exception is thrown.
+     *
+     * read(buf) has the same effect as read(buf, 0, buf.length).
+     *
+     * @param buffer the buffer into which the data is read.
+     * @return the total number of bytes read into the buffer, or -1 is there is no more data because the end of
+     * the stream has been reached.
+     */
+    public int read(final byte[] buffer)
+        throws IOException {
+        return read(buffer, 0, buffer.length);
+    }
+
+    private volatile ByteArrayOutputStream buf = null;
+    private static final byte eol = '\n';
+    private static final byte eolCr = '\r';
+    
+    /**
+     * Reads a whole line. A line is considered to be terminated by either a line feed ('\n'), 
+     * carriage return ('\r') or carriage return followed by a line feed ("\r\n").
+     *
+     * @return  A String containing the contents of the line, excluding the line terminating
+     *          character, or null if the end of the stream has been reached
+     *
+     * @exception  IOException  If an I/O error occurs
+     * @
+     */
+    public String readLine() throws IOException {
+    	int available = available();
+        if (available == 0) {
+            return null;
+        }
+        if(null == buf){ // lazy initialisation 
+        	buf = new ByteArrayOutputStream(8192);
+        }
+        buf.reset();
+    	boolean done = false;
+    	boolean foundCr = false; // \r found flag
+        while (!done) {
+        	int linetmpPos = mCurrentOffset;
+        	int bCnt = 0;
+        	while((available-- > 0)){
+        		final byte c = mCurrentBlock[linetmpPos++];
+        		if(c == eol){ // found \n
+        			done = true;
+        			break;
+        		} else if(foundCr){  // previous char was \r
+        			--linetmpPos; // current char is not \n so put it back
+        			done = true;
+        			break;
+        		} else if(c == eolCr){ // found \r
+					foundCr = true;
+        			continue; // no ++bCnt
+        		}
+				++bCnt;
+        	}
+        	if(mCurrentOffset < linetmpPos){
+				buf.write(mCurrentBlock, mCurrentOffset, bCnt);
+	        	mCurrentOffset = linetmpPos;
+        	}
+        	available = available();    
+        	if(available == 0){
+        		// EOF
+        		done = true;
+        	}
+        }
+    	return buf.toString();
+    }
+
+    /**
+     * Reads up to len bytes of data from the input stream into an array of bytes. An attempt is made to read
+     * as many as len bytes, but a smaller number may be read. The number of bytes actually read is returned as an integer.
+     *
+     * This method blocks until input data is available, end of file is detected, or an exception is thrown.
+     *
+     * @param buffer buffer into which data is read.
+     * @param offset the start offset in array b  at which the data is written.
+     * @param length the maximum number of bytes to read.
+     * @return the total number of bytes read into the buffer, or -1 if there is no more data because the end of
+     * the stream has been reached.
+     */
+    public int read(final byte[] buffer, int offset, int length)
+        throws IOException {
+        final int originalLength = length;
+        while (length > 0) {
+            final int available = available();
+            if (available == 0) {
+                // Signal EOF to caller
+                if (originalLength == length) {
+                    return -1;
+                }
+                break;
+            }
+            final int copyLength = Math.min(length, available);
+            System.arraycopy(mCurrentBlock, mCurrentOffset, buffer, offset, copyLength);
+            mCurrentOffset += copyLength;
+            offset += copyLength;
+            length -= copyLength;
+        }
+        return originalLength - length;
+    }
+
+    /**
+     * Seek to the given position in the file.  Note that pos is a special virtual file pointer,
+     * not an actual byte offset.
+     *
+     * @param pos virtual file pointer
+     */
+    public void seek(final long pos)
+        throws IOException {
+        if (mFile == null) {
+            throw new IOException("Cannot seek on stream based file");
+        }
+        // Decode virtual file pointer
+        // Upper 48 bits is the byte offset into the compressed stream of a block.
+        // Lower 16 bits is the byte offset into the uncompressed stream inside the block.
+        final long compressedOffset = BlockCompressedFilePointerUtil.getBlockAddress(pos);
+        final int uncompressedOffset = BlockCompressedFilePointerUtil.getBlockOffset(pos);
+        final int available;
+        if (mBlockAddress == compressedOffset && mCurrentBlock != null) {
+            available = mCurrentBlock.length;
+        } else {
+            mFile.seek(compressedOffset);
+            mBlockAddress = compressedOffset;
+            mLastBlockLength = 0;
+            readBlock();
+            available = available();
+        }
+        if (uncompressedOffset > available ||
+                (uncompressedOffset == available && !eof())) {
+            throw new IOException("Invalid file pointer: " + pos);
+        }
+        mCurrentOffset = uncompressedOffset;
+    }
+
+    private boolean eof() throws IOException {
+        if (mFile.eof()) {
+            return true;
+        }
+        // If the last remaining block is the size of the EMPTY_GZIP_BLOCK, this is the same as being at EOF.
+        return (mFile.length() - (mBlockAddress + mLastBlockLength) == BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
+    }
+
+    /**
+     * @return virtual file pointer that can be passed to seek() to return to the current position.  This is
+     * not an actual byte offset, so arithmetic on file pointers cannot be done to determine the distance between
+     * the two.
+     */
+    public long getFilePointer() {
+        if (mCurrentOffset == mCurrentBlock.length) {
+            // If current offset is at the end of the current block, file pointer should point
+            // to the beginning of the next block.
+            return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress + mLastBlockLength, 0);
+        }
+        return BlockCompressedFilePointerUtil.makeFilePointer(mBlockAddress, mCurrentOffset);
+    }
+
+    public static long getFileBlock(final long bgzfOffset) {
+        return BlockCompressedFilePointerUtil.getBlockAddress(bgzfOffset);
+    }
+    
+    /**
+     * @param stream Must be at start of file.  Throws RuntimeException if !stream.markSupported().
+     * @return true if the given file looks like a valid BGZF file.
+     */
+    public static boolean isValidFile(final InputStream stream)
+        throws IOException {
+        if (!stream.markSupported()) {
+            throw new RuntimeException("Cannot test non-buffered stream");
+        }
+        stream.mark(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        final byte[] buffer = new byte[BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH];
+        final int count = readBytes(stream, buffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        stream.reset();
+        return count == BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH && isValidBlockHeader(buffer);
+    }
+
+    private static boolean isValidBlockHeader(final byte[] buffer) {
+        return (buffer[0] == BlockCompressedStreamConstants.GZIP_ID1 &&
+                (buffer[1] & 0xFF) == BlockCompressedStreamConstants.GZIP_ID2 &&
+                (buffer[3] & BlockCompressedStreamConstants.GZIP_FLG) != 0 &&
+                buffer[10] == BlockCompressedStreamConstants.GZIP_XLEN &&
+                buffer[12] == BlockCompressedStreamConstants.BGZF_ID1 &&
+                buffer[13] == BlockCompressedStreamConstants.BGZF_ID2);
+    }
+
+    private void readBlock()
+        throws IOException {
+
+        if (mFileBuffer == null) {
+            mFileBuffer = new byte[BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE];
+        }
+        int count = readBytes(mFileBuffer, 0, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        if (count == 0) {
+            // Handle case where there is no empty gzip block at end.
+            mCurrentOffset = 0;
+            mBlockAddress += mLastBlockLength;
+            mCurrentBlock = new byte[0];
+            return;
+        }
+        if (count != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH) {
+            throw new IOException("Premature end of file");
+        }
+        final int blockLength = unpackInt16(mFileBuffer, BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET) + 1;
+        if (blockLength < BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH || blockLength > mFileBuffer.length) {
+            throw new IOException("Unexpected compressed block length: " + blockLength);
+        }
+        final int remaining = blockLength - BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH;
+        count = readBytes(mFileBuffer, BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH, remaining);
+        if (count != remaining) {
+            throw new FileTruncatedException("Premature end of file");
+        }
+        inflateBlock(mFileBuffer, blockLength);
+        mCurrentOffset = 0;
+        mBlockAddress += mLastBlockLength;
+        mLastBlockLength = blockLength;
+    }
+
+    private void inflateBlock(final byte[] compressedBlock, final int compressedLength)
+        throws IOException {
+        final int uncompressedLength = unpackInt32(compressedBlock, compressedLength-4);
+        byte[] buffer = mCurrentBlock;
+        mCurrentBlock = null;
+        if (buffer == null || buffer.length != uncompressedLength) {
+            try {
+                buffer = new byte[uncompressedLength];
+            } catch (NegativeArraySizeException e) {
+                throw new RuntimeException("BGZF file has invalid uncompressedLength: " + uncompressedLength, e);
+            }
+        }
+        blockGunzipper.unzipBlock(buffer, compressedBlock, compressedLength);
+        mCurrentBlock = buffer;
+    }
+
+    private int readBytes(final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        if (mFile != null) {
+            return readBytes(mFile, buffer, offset, length);
+        } else if (mStream != null) {
+            return readBytes(mStream, buffer, offset, length);
+        } else {
+            return 0;
+        }
+    }
+
+    private static int readBytes(final SeekableStream file, final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        int bytesRead = 0;
+        while (bytesRead < length) {
+            final int count = file.read(buffer, offset + bytesRead, length - bytesRead);
+            if (count <= 0) {
+                break;
+            }
+            bytesRead += count;
+        }
+        return bytesRead;
+    }
+
+    private static int readBytes(final InputStream stream, final byte[] buffer, final int offset, final int length)
+        throws IOException {
+        int bytesRead = 0;
+        while (bytesRead < length) {
+            final int count = stream.read(buffer, offset + bytesRead, length - bytesRead);
+            if (count <= 0) {
+                break;
+            }
+            bytesRead += count;
+        }
+        return bytesRead;
+    }
+
+    private int unpackInt16(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8));
+    }
+
+    private int unpackInt32(final byte[] buffer, final int offset) {
+        return ((buffer[offset] & 0xFF) |
+                ((buffer[offset+1] & 0xFF) << 8) |
+                ((buffer[offset+2] & 0xFF) << 16) |
+                ((buffer[offset+3] & 0xFF) << 24));
+    }
+
+    public enum FileTermination {HAS_TERMINATOR_BLOCK, HAS_HEALTHY_LAST_BLOCK, DEFECTIVE}
+
+    public static FileTermination checkTermination(final File file)
+        throws IOException {
+        final long fileSize = file.length();
+        if (fileSize < BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length) {
+            return FileTermination.DEFECTIVE;
+        }
+        final RandomAccessFile raFile = new RandomAccessFile(file, "r");
+        try {
+            raFile.seek(fileSize - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
+            byte[] buf = new byte[BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length];
+            raFile.readFully(buf);
+            if (Arrays.equals(buf, BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK)) {
+                return FileTermination.HAS_TERMINATOR_BLOCK;
+            }
+            final int bufsize = (int)Math.min(fileSize, BlockCompressedStreamConstants.MAX_COMPRESSED_BLOCK_SIZE);
+            buf = new byte[bufsize];
+            raFile.seek(fileSize - bufsize);
+            raFile.read(buf);
+            for (int i = buf.length - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length;
+                    i >= 0; --i) {
+                if (!preambleEqual(BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE,
+                        buf, i, BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length)) {
+                    continue;
+                }
+                final ByteBuffer byteBuffer = ByteBuffer.wrap(buf, i + BlockCompressedStreamConstants.GZIP_BLOCK_PREAMBLE.length, 4);
+                byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+                final int totalBlockSizeMinusOne =  byteBuffer.getShort() & 0xFFFF;
+                if (buf.length - i == totalBlockSizeMinusOne + 1) {
+                    return FileTermination.HAS_HEALTHY_LAST_BLOCK;
+                } else {
+                    return FileTermination.DEFECTIVE;
+                }
+            }
+            return FileTermination.DEFECTIVE;
+        } finally {
+            raFile.close();
+        }
+    }
+
+    private static boolean preambleEqual(final byte[] preamble, final byte[] buf, final int startOffset, final int length) {
+        for (int i = 0; i < length; ++i) {
+            if (preamble[i] != buf[i + startOffset]) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
+
+
--- a/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
+++ b/public/java/src/org/broadinstitute/sting/commandline/CommandLineProgram.java
@ -331,12 +331,12 @@ public abstract class CommandLineProgram {
     * used to indicate an error occured
     *
     * @param msg the message
-     * @param e   the error
+     * @param t   the error
     */
-    public static void exitSystemWithError(String msg, final Exception e) {
+    public static void exitSystemWithError(String msg, final Throwable t) {
        errorPrintf("------------------------------------------------------------------------------------------%n");
        errorPrintf("stack trace %n");
-        e.printStackTrace();
+        t.printStackTrace();

        errorPrintf("------------------------------------------------------------------------------------------%n");
        errorPrintf("A GATK RUNTIME ERROR has occurred (version %s):%n", CommandLineGATK.getVersionNumber());
@ -394,8 +394,8 @@ public abstract class CommandLineProgram {
     *
     * @param e the exception occured
     */
-    public static void exitSystemWithError(Exception e) {
-        exitSystemWithError(e.getMessage(), e);
+    public static void exitSystemWithError(Throwable t) {
+        exitSystemWithError(t.getMessage(), t);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/CommandLineGATK.java
@ -99,8 +99,8 @@ public class CommandLineGATK extends CommandLineExecutable {
        } catch (net.sf.samtools.SAMException e) {
            // Let's try this out and see how it is received by our users
            exitSystemWithSamError(e);
-        } catch (Exception e) {
-            exitSystemWithError(e);
+        } catch (Throwable t) {
+            exitSystemWithError(t);
        }
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java
@ -35,6 +35,7 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.datasources.reads.*;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.samples.SampleDB;
 import org.broadinstitute.sting.gatk.executive.MicroScheduler;
 import org.broadinstitute.sting.gatk.filters.FilterManager;
@ -126,6 +127,11 @@ public class GenomeAnalysisEngine {
     */
    private Collection<ReadFilter> filters;

+    /**
+     * Controls the allocation of threads between CPU vs IO.
+     */
+    private ThreadAllocation threadAllocation;
+
    /**
     * A currently hacky unique name for this GATK instance
     */
@ -199,6 +205,9 @@ public class GenomeAnalysisEngine {
        if (this.getArguments().nonDeterministicRandomSeed)
            resetRandomGenerator(System.currentTimeMillis());

+        // Determine how the threads should be divided between CPU vs. IO.
+        determineThreadAllocation();
+
        // Prepare the data for traversal.
        initializeDataSources();

@ -218,7 +227,7 @@ public class GenomeAnalysisEngine {
        // create the output streams                     "
        initializeOutputStreams(microScheduler.getOutputTracker());

-        ShardStrategy shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);
+        Iterable<Shard> shardStrategy = getShardStrategy(readsDataSource,microScheduler.getReference(),intervals);

        // execute the microscheduler, storing the results
        return microScheduler.execute(this.walker, shardStrategy);
@ -266,6 +275,16 @@ public class GenomeAnalysisEngine {
        return Collections.unmodifiableList(filters);
    }

+    /**
+     * Parse out the thread allocation from the given command-line argument.
+     */
+    private void determineThreadAllocation() {
+        Tags tags = parsingEngine.getTags(argCollection.numberOfThreads);
+        Integer numCPUThreads = tags.containsKey("cpu") ? Integer.parseInt(tags.getValue("cpu")) : null;
+        Integer numIOThreads = tags.containsKey("io") ? Integer.parseInt(tags.getValue("io")) : null;
+        this.threadAllocation = new ThreadAllocation(argCollection.numberOfThreads,numCPUThreads,numIOThreads);
+    }
+
    /**
     * Allow subclasses and others within this package direct access to the walker manager.
     * @return The walker manager used by this package.
@ -286,7 +305,7 @@ public class GenomeAnalysisEngine {
            throw new UserException.CommandLineException("Read-based traversals require a reference file but none was given");
        }

-        return MicroScheduler.create(this,walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),this.getArguments().numberOfThreads);
+        return MicroScheduler.create(this,walker,this.getReadsDataSource(),this.getReferenceDataSource().getReference(),this.getRodDataSources(),threadAllocation);
    }

    protected DownsamplingMethod getDownsamplingMethod() {
@ -397,103 +416,49 @@ public class GenomeAnalysisEngine {
     * @param intervals intervals
     * @return the sharding strategy
     */
-    protected ShardStrategy getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
+    protected Iterable<Shard> getShardStrategy(SAMDataSource readsDataSource, ReferenceSequenceFile drivingDataSource, GenomeLocSortedSet intervals) {
        ValidationExclusion exclusions = (readsDataSource != null ? readsDataSource.getReadsInfo().getValidationExclusionList() : null);
        ReferenceDataSource referenceDataSource = this.getReferenceDataSource();
-        // Use monolithic sharding if no index is present.  Monolithic sharding is always required for the original
-        // sharding system; it's required with the new sharding system only for locus walkers.
-        if(readsDataSource != null && !readsDataSource.hasIndex() ) { 
-            if(!exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM))
+
+        // If reads are present, assume that accessing the reads is always the dominant factor and shard based on that supposition.
+        if(!readsDataSource.isEmpty()) {
+            if(!readsDataSource.hasIndex() && !exclusions.contains(ValidationExclusion.TYPE.ALLOW_UNINDEXED_BAM))
                throw new UserException.CommandLineException("Cannot process the provided BAM file(s) because they were not indexed.  The GATK does offer limited processing of unindexed BAMs in --unsafe mode, but this GATK feature is currently unsupported.");
-            if(intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
+            if(!readsDataSource.hasIndex() && intervals != null && !argCollection.allowIntervalsWithUnindexedBAM)
                throw new UserException.CommandLineException("Cannot perform interval processing when reads are present but no index is available.");

-            Shard.ShardType shardType;
            if(walker instanceof LocusWalker) {
                if (readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data.  Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
-                shardType = Shard.ShardType.LOCUS;
+                if(intervals == null)
+                    return readsDataSource.createShardIteratorOverMappedReads(referenceDataSource.getReference().getSequenceDictionary(),new LocusShardBalancer());
+                else
+                    return readsDataSource.createShardIteratorOverIntervals(intervals,new LocusShardBalancer());
+            }
+            else if(walker instanceof ReadWalker || walker instanceof ReadPairWalker || walker instanceof DuplicateWalker) {
+                // Apply special validation to read pair walkers.
+                if(walker instanceof ReadPairWalker) {
+                    if(readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
+                        throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers are exceptions in that they cannot be run on coordinate-sorted BAMs but instead require query name-sorted files.  You will need to resort your input BAM file in query name order to use this walker.");
+                    if(intervals != null && !intervals.isEmpty())
+                        throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
+                }
+
+                if(intervals == null)
+                    return readsDataSource.createShardIteratorOverAllReads(new ReadShardBalancer());
+                else
+                    return readsDataSource.createShardIteratorOverIntervals(intervals,new ReadShardBalancer());
            }
-            else if(walker instanceof ReadWalker || walker instanceof DuplicateWalker || walker instanceof ReadPairWalker)
-                shardType = Shard.ShardType.READ;
            else
-                throw new UserException.CommandLineException("The GATK cannot currently process unindexed BAM files");
-
-            List<GenomeLoc> region;
-            if(intervals != null)
-                region = intervals.toList();
-            else {
-                region = new ArrayList<GenomeLoc>();
-                for(SAMSequenceRecord sequenceRecord: drivingDataSource.getSequenceDictionary().getSequences())
-                    region.add(getGenomeLocParser().createGenomeLoc(sequenceRecord.getSequenceName(),1,sequenceRecord.getSequenceLength()));
-            }
-
-            return new MonolithicShardStrategy(getGenomeLocParser(), readsDataSource,shardType,region);
+                throw new ReviewedStingException("Unable to determine walker type for walker " + walker.getClass().getName());
+        }
+        else {
+            final int SHARD_SIZE = walker instanceof RodWalker ? 100000000 : 100000;
+            if(intervals == null)
+                return referenceDataSource.createShardsOverEntireReference(readsDataSource,genomeLocParser,SHARD_SIZE);
+            else
+                return referenceDataSource.createShardsOverIntervals(readsDataSource,intervals,SHARD_SIZE);
        }
-
-        ShardStrategy shardStrategy;
-        ShardStrategyFactory.SHATTER_STRATEGY shardType;
-
-        long SHARD_SIZE = 100000L;
-
-        if (walker instanceof LocusWalker) {
-            if (walker instanceof RodWalker) SHARD_SIZE *= 1000;
-
-            if (intervals != null && !intervals.isEmpty()) {
-                if (readsDataSource == null)
-                    throw new IllegalArgumentException("readsDataSource is null");
-                if(!readsDataSource.isEmpty() && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.coordinate)
-                    throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.coordinate, "Locus walkers can only traverse coordinate-sorted data.  Please resort your input BAM file(s) or set the Sort Order tag in the header appropriately.");
-
-                shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
-                        referenceDataSource.getReference(),
-                        ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
-                        drivingDataSource.getSequenceDictionary(),
-                        SHARD_SIZE,
-                        getGenomeLocParser(),
-                        intervals);
-            } else
-                shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
-                        referenceDataSource.getReference(),
-                        ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL,
-                        drivingDataSource.getSequenceDictionary(),
-                        SHARD_SIZE,getGenomeLocParser());
-        } else if (walker instanceof ReadWalker ||
-                walker instanceof DuplicateWalker) {
-            shardType = ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL;
-
-            if (intervals != null && !intervals.isEmpty()) {
-                shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
-                        referenceDataSource.getReference(),
-                        shardType,
-                        drivingDataSource.getSequenceDictionary(),
-                        SHARD_SIZE,
-                        getGenomeLocParser(),
-                        intervals);
-            } else {
-                shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
-                        referenceDataSource.getReference(),
-                        shardType,
-                        drivingDataSource.getSequenceDictionary(),
-                        SHARD_SIZE,
-                        getGenomeLocParser());
-            }
-        } else if (walker instanceof ReadPairWalker) {
-            if(readsDataSource != null && readsDataSource.getSortOrder() != SAMFileHeader.SortOrder.queryname)
-                throw new UserException.MissortedBAM(SAMFileHeader.SortOrder.queryname, "Read pair walkers can only walk over query name-sorted data.  Please resort your input BAM file.");
-            if(intervals != null && !intervals.isEmpty())
-                throw new UserException.CommandLineException("Pairs traversal cannot be used in conjunction with intervals.");
-
-            shardStrategy = ShardStrategyFactory.shatter(readsDataSource,
-                    referenceDataSource.getReference(),
-                    ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
-                    drivingDataSource.getSequenceDictionary(),
-                    SHARD_SIZE,
-                    getGenomeLocParser());
-        } else
-            throw new ReviewedStingException("Unable to support walker of type" + walker.getClass().getName());
-
-        return shardStrategy;
    }

    protected boolean flashbackData() {
@ -751,6 +716,8 @@ public class GenomeAnalysisEngine {

        return new SAMDataSource(
                samReaderIDs,
+                threadAllocation,
+                argCollection.numberOfBAMFileHandles,
                genomeLocParser,
                argCollection.useOriginalBaseQualities,
                argCollection.strictnessLevel,
@ -763,8 +730,7 @@ public class GenomeAnalysisEngine {
                getWalkerBAQApplicationTime() == BAQ.ApplicationTime.ON_INPUT ? argCollection.BAQMode : BAQ.CalculationMode.OFF,
                getWalkerBAQQualityMode(),
                refReader,
-                argCollection.defaultBaseQualities,
-                !argCollection.disableLowMemorySharding);
+                argCollection.defaultBaseQualities);
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/arguments/GATKArgumentCollection.java
@ -194,10 +194,14 @@ public class GATKArgumentCollection {
    @Argument(fullName = "unsafe", shortName = "U", doc = "If set, enables unsafe operations: nothing will be checked at runtime.  For expert users only who know what they are doing.  We do not support usage of this argument.", required = false)
    public ValidationExclusion.TYPE unsafe;

-    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis", required = false)
-    public int numberOfThreads = 1;
+    /** How many threads should be allocated to this analysis. */
+    @Argument(fullName = "num_threads", shortName = "nt", doc = "How many threads should be allocated to running this analysis.", required = false)
+    public Integer numberOfThreads = 1;

-    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line", required = false)
+    @Argument(fullName = "num_bam_file_handles", shortName = "bfh", doc="The total number of BAM file handles to keep open simultaneously", required=false)
+    public Integer numberOfBAMFileHandles = null;
+
+    @Input(fullName = "read_group_black_list", shortName="rgbl", doc="Filters out read groups matching <TAG>:<STRING> or a .txt file containing the filter strings one per line.", required = false)
    public List<String> readGroupBlackList = null;

    // --------------------------------------------------------------------------------------------------------------
@ -292,9 +296,6 @@ public class GATKArgumentCollection {
    @Hidden
    public boolean allowIntervalsWithUnindexedBAM = false;

-    @Argument(fullName="disable_experimental_low_memory_sharding",doc="Disable experimental low-memory sharding functionality",required=false)
-    public boolean disableLowMemorySharding = false;
-
    // --------------------------------------------------------------------------------------------------------------
    //
    // methods
@ -365,7 +366,11 @@ public class GATKArgumentCollection {
                (other.downsampleCoverage != null && !other.downsampleCoverage.equals(this.downsampleCoverage))) {
            return false;
        }
-        if (other.numberOfThreads != this.numberOfThreads) {
+        if (!other.numberOfThreads.equals(this.numberOfThreads)) {
+            return false;
+        }
+        if ((other.numberOfBAMFileHandles == null && this.numberOfBAMFileHandles != null) ||
+                (other.numberOfBAMFileHandles != null && !other.numberOfBAMFileHandles.equals(this.numberOfBAMFileHandles))) {
            return false;
        }
        if (other.intervalMerging != this.intervalMerging) {
@ -389,9 +394,6 @@ public class GATKArgumentCollection {
        if (allowIntervalsWithUnindexedBAM != other.allowIntervalsWithUnindexedBAM)
            return false;

-        if (disableLowMemorySharding != other.disableLowMemorySharding)
-            return false;
-
        return true;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMBlockStartIterator.java
@ -1,128 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import org.broadinstitute.sting.utils.exceptions.StingException;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
-import java.nio.channels.FileChannel;
-import java.util.Iterator;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 7, 2011
- * Time: 2:46:34 PM
- * To change this template use File | Settings | File Templates.
- */
-public class BAMBlockStartIterator implements Iterator<Long> {
-    /**
-     * How large is a BGZF header?
-     */
-    private static int BGZF_HEADER_SIZE = 18;
-
-    /**
-     * Where within the header does the BLOCKSIZE actually live?
-     */
-    private static int BLOCK_SIZE_HEADER_POSITION = BGZF_HEADER_SIZE - 2;
-
-    private FileChannel bamInputChannel;
-    private ByteBuffer headerByteBuffer;
-
-    private long nextLocation = 0;
-
-    public BAMBlockStartIterator(File bamFile) {
-        try {
-            FileInputStream bamInputStream = new FileInputStream(bamFile);
-            bamInputChannel = bamInputStream.getChannel();
-
-            headerByteBuffer = ByteBuffer.allocate(BGZF_HEADER_SIZE);
-            headerByteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-
-        }
-        catch(IOException ex) {
-            throw new StingException("Could not open file",ex);
-        }
-    }
-
-    public boolean hasNext() {
-        return nextLocation != -1;
-    }
-
-    public Long next() {
-        long currentLocation = nextLocation;
-        advance();
-        return currentLocation;
-    }
-
-    public void remove() {
-        throw new UnsupportedOperationException("Cannot remove from a BAMBlockStartIterator");
-    }
-
-    private void advance() {
-        int readStatus;
-
-        headerByteBuffer.clear();
-        try {
-            readStatus = bamInputChannel.read(headerByteBuffer);
-        }
-        catch(IOException ex) {
-            throw new StingException("Could not read header data",ex);
-        }
-
-        if(readStatus == -1) {
-            nextLocation = -1;
-            try {
-                bamInputChannel.close();
-            }
-            catch(IOException ex) {
-                throw new StingException("Could not close input file",ex);
-            }
-            return;
-        }
-
-        headerByteBuffer.position(BLOCK_SIZE_HEADER_POSITION);
-        int blockSize = headerByteBuffer.getShort();
-
-        try {
-            bamInputChannel.position(bamInputChannel.position()+blockSize-BGZF_HEADER_SIZE+1);
-            nextLocation = bamInputChannel.position();
-        }
-        catch(IOException ex) {
-            throw new StingException("Could not reposition input stream",ex);
-        }
-    }
-
-    public static void main(String argv[]) throws IOException {
-        BAMBlockStartIterator blockStartIterator = new BAMBlockStartIterator(new File("/Users/mhanna/testdata/reads/MV1994.bam"));
-        int i = 0;
-        while(blockStartIterator.hasNext())
-            System.out.printf("%d -> %d%n",i++,blockStartIterator.next());
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexContent.java
@ -1,195 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.samtools.GATKBin;
-import net.sf.samtools.GATKChunk;
-import net.sf.samtools.LinearIndex;
-
-import java.util.*;
-
-/**
- * Represents the contents of a bam index file for one reference.
- * A BAM index (.bai) file contains information for all references in the bam file.
- * This class describes the data present in the index file for one of these references;
- * including the bins, chunks, and linear index.
- */
-class BAMIndexContent {
-    /**
-     * The reference sequence for the data currently loaded.
-     */
-    private final int mReferenceSequence;
-
-    /**
-     * A list of all bins in the above reference sequence.
-     */
-    private final BinList mBinList;
-
-    /**
-     * The linear index for the reference sequence above.
-     */
-    private final LinearIndex mLinearIndex;
-
-
-    /**
-     * @param referenceSequence Content corresponds to this reference.
-     * @param bins              Array of bins represented by this content, possibly sparse
-     * @param numberOfBins      Number of non-null bins
-     * @param linearIndex       Additional index used to optimize queries
-     */
-    BAMIndexContent(final int referenceSequence, final GATKBin[] bins, final int numberOfBins, final LinearIndex linearIndex) {
-        this.mReferenceSequence = referenceSequence;
-        this.mBinList = new BinList(bins, numberOfBins);
-        this.mLinearIndex = linearIndex;
-    }
-
-    /**
-     * Reference for this Content
-     */
-    public int getReferenceSequence() {
-        return mReferenceSequence;
-    }
-
-    /**
-     * Does this content have anything in this bin?
-     */
-    public boolean containsBin(final GATKBin bin) {
-        return mBinList.getBin(bin.getBinNumber()) != null;
-    }
-
-    /**
-     * @return iterable list of bins represented by this content
-     */
-    public BinList getBins() {
-        return mBinList;
-    }
-
-    /**
-     * @return the number of non-null bins represented by this content
-     */
-    int getNumberOfNonNullBins() {
-        return mBinList.getNumberOfNonNullBins();
-    }
-
-    /**
-     * @return all chunks associated with all bins in this content
-     */
-    public List<GATKChunk> getAllChunks() {
-        List<GATKChunk> allChunks = new ArrayList<GATKChunk>();
-        for (GATKBin b : mBinList)
-            if (b.getChunkList() != null) {
-                allChunks.addAll(Arrays.asList(b.getChunkList()));
-            }
-        return Collections.unmodifiableList(allChunks);
-    }
-
-    /**
-     * @return the linear index represented by this content
-     */
-    public LinearIndex getLinearIndex() {
-        return mLinearIndex;
-    }
-
-        /**
-     * This class is used to encapsulate the list of Bins store in the BAMIndexContent
-     * While it is currently represented as an array, we may decide to change it to an ArrayList or other structure
-     */
-    class BinList implements Iterable<GATKBin> {
-
-        private final GATKBin[] mBinArray;
-        public final int numberOfNonNullBins;
-        public final int maxBinNumber;  // invariant: maxBinNumber = mBinArray.length -1 since array is 0 based
-
-        /**
-         * @param binArray            a sparse array representation of the bins. The index into the array is the bin number.
-         * @param numberOfNonNullBins
-         */
-        BinList(GATKBin[] binArray, int numberOfNonNullBins) {
-            this.mBinArray = binArray;
-            this.numberOfNonNullBins = numberOfNonNullBins;
-            this.maxBinNumber = mBinArray.length - 1;
-        }
-
-        GATKBin getBin(int binNumber) {
-            if (binNumber > maxBinNumber) return null;
-            return mBinArray[binNumber];
-        }
-
-        int getNumberOfNonNullBins() {
-            return numberOfNonNullBins;
-        }
-
-        /**
-         * Gets an iterator over all non-null bins.
-         *
-         * @return An iterator over all bins.
-         */
-        public Iterator<GATKBin> iterator() {
-            return new BinIterator();
-        }
-
-        private class BinIterator implements Iterator<GATKBin> {
-            /**
-             * Stores the bin # of the Bin currently in use.
-             */
-            private int nextBin;
-
-            public BinIterator() {
-                nextBin = 0;
-            }
-
-            /**
-             * Are there more bins in this set, waiting to be returned?
-             *
-             * @return True if more bins are remaining.
-             */
-            public boolean hasNext() {
-                while (nextBin <= maxBinNumber) {
-                    if (getBin(nextBin) != null) return true;
-                    nextBin++;
-                }
-                return false;
-            }
-
-            /**
-             * Gets the next bin in the provided BinList.
-             *
-             * @return the next available bin in the BinList.
-             */
-            public GATKBin next() {
-                if (!hasNext())
-                    throw new NoSuchElementException("This BinIterator is currently empty");
-                GATKBin result = getBin(nextBin);
-                nextBin++;
-                return result;
-            }
-
-            public void remove() {
-                throw new UnsupportedOperationException("Unable to remove from a bin iterator");
-            }
-        }
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMOverlap.java
@ -1,29 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.samtools.Bin;
-
-import java.util.HashMap;
-import java.util.Map;
-
-/**
- * Models a bin at which all BAM files in the merged input stream overlap.
- */
-class BAMOverlap {
-    public final int start;
-    public final int stop;
-
-    private final Map<SAMReaderID,Bin> bins = new HashMap<SAMReaderID,Bin>();
-
-    public BAMOverlap(final int start, final int stop) {
-        this.start = start;
-        this.stop = stop;
-    }
-
-    public void addBin(final SAMReaderID id, final Bin bin) {
-        bins.put(id,bin);
-    }
-
-    public Bin getBin(final SAMReaderID id) {
-        return bins.get(id);
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMSchedule.java
@ -84,21 +84,21 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {

    /**
     * Create a new BAM schedule based on the given index.
-     * @param indexFiles Index files.
+     * @param dataSource The SAM data source to use.
     * @param intervals List of 
     */
-    public BAMSchedule(final Map<SAMReaderID,GATKBAMIndex> indexFiles, final List<GenomeLoc> intervals) {
+    public BAMSchedule(final SAMDataSource dataSource, final List<GenomeLoc> intervals) {
        if(intervals.isEmpty())
            throw new ReviewedStingException("Tried to write schedule for empty interval list.");

-        referenceSequence = intervals.get(0).getContigIndex();
+        referenceSequence = dataSource.getHeader().getSequence(intervals.get(0).getContig()).getSequenceIndex();

        createScheduleFile();

-        readerIDs.addAll(indexFiles.keySet());
+        readerIDs.addAll(dataSource.getReaderIDs());

        for(final SAMReaderID reader: readerIDs) {
-            final GATKBAMIndex index = indexFiles.get(reader);
+            final GATKBAMIndex index = dataSource.getIndex(reader);
            final GATKBAMIndexData indexData = index.readReferenceSequence(referenceSequence);

            int currentBinInLowestLevel = GATKBAMIndex.getFirstBinInLevel(GATKBAMIndex.getNumIndexLevels()-1);
@ -237,7 +237,10 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
        if(selectedIterators.isEmpty())
            return;

+        // Create the target schedule entry
        BAMScheduleEntry mergedScheduleEntry = new BAMScheduleEntry(currentStart,currentStop);
+
+        // For each schedule entry with data, load the data into the merged schedule.
        for (int reader = selectedIterators.nextSetBit(0); reader >= 0; reader = selectedIterators.nextSetBit(reader+1)) {
            PeekableIterator<BAMScheduleEntry> scheduleIterator = scheduleIterators.get(reader);
            BAMScheduleEntry individualScheduleEntry = scheduleIterator.peek();
@ -248,6 +251,11 @@ public class BAMSchedule implements CloseableIterator<BAMScheduleEntry> {
                scheduleIterator.next();
        }

+        // For each schedule entry without data, add a blank entry.
+        for (int reader = selectedIterators.nextClearBit(0); reader < readerIDs.size(); reader = selectedIterators.nextClearBit(reader+1)) {
+            mergedScheduleEntry.addFileSpan(readerIDs.get(reader),new GATKBAMFileSpan());
+        }
+
        nextScheduleEntry = mergedScheduleEntry;
    }

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@ -27,7 +27,12 @@ package org.broadinstitute.sting.gatk.datasources.reads;
 import net.sf.picard.util.PeekableIterator;
 import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.GATKChunk;
+import net.sf.samtools.SAMFileHeader;
+import net.sf.samtools.SAMFileSpan;
+import net.sf.samtools.SAMSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.*;
@ -42,21 +47,86 @@ public class BAMScheduler implements Iterator<FilePointer> {

    private FilePointer nextFilePointer = null;

-    private final GenomeLocSortedSet loci;
+    private GenomeLocSortedSet loci;
+    private PeekableIterator<GenomeLoc> locusIterator;
+    private GenomeLoc currentLocus;

-    private final PeekableIterator<GenomeLoc> locusIterator;
+    public static BAMScheduler createOverMappedReads(final SAMDataSource dataSource, final SAMSequenceDictionary referenceSequenceDictionary, final GenomeLocParser parser) {
+        BAMScheduler scheduler = new BAMScheduler(dataSource);
+        GenomeLocSortedSet intervals = new GenomeLocSortedSet(parser);
+        for(SAMSequenceRecord sequence: referenceSequenceDictionary.getSequences()) {
+            // Match only on sequence name; trust startup validation to make sure all the sequences match.
+            if(dataSource.getHeader().getSequenceDictionary().getSequence(sequence.getSequenceName()) != null)
+                intervals.add(parser.createOverEntireContig(sequence.getSequenceName()));
+        }
+        scheduler.populateFilteredIntervalList(intervals);
+        return scheduler;
+    }

-    private GenomeLoc currentLocus;    
+    public static BAMScheduler createOverAllReads(final SAMDataSource dataSource, final GenomeLocParser parser) {
+        BAMScheduler scheduler = new BAMScheduler(dataSource);
+        scheduler.populateUnfilteredIntervalList(parser);
+        return scheduler;
+    }

-    public BAMScheduler(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
+    public static BAMScheduler createOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
+        BAMScheduler scheduler = new BAMScheduler(dataSource);
+        scheduler.populateFilteredIntervalList(loci);
+        return scheduler;
+    }
+
+
+    private BAMScheduler(final SAMDataSource dataSource) {
        this.dataSource = dataSource;
-        for(SAMReaderID reader: dataSource.getReaderIDs())
-            indexFiles.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
+        for(SAMReaderID reader: dataSource.getReaderIDs()) {
+            GATKBAMIndex index = dataSource.getIndex(reader);
+            if(index != null)
+                indexFiles.put(reader,dataSource.getIndex(reader));
+        }
+    }
+
+    /**
+     * The consumer has asked for a bounded set of locations.  Prepare an iterator over those locations.
+     * @param loci The list of locations to search and iterate over.
+     */
+    private void populateFilteredIntervalList(final GenomeLocSortedSet loci) {
        this.loci = loci;
-        locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
-        if(locusIterator.hasNext())
-            currentLocus = locusIterator.next();
-        advance();
+        if(!indexFiles.isEmpty()) {
+            // If index data is available, start up the iterator.
+            locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
+            if(locusIterator.hasNext())
+                currentLocus = locusIterator.next();
+            advance();
+        }
+        else {
+            // Otherwise, seed the iterator with a single file pointer over the entire region.
+            nextFilePointer = generatePointerOverEntireFileset();
+            for(GenomeLoc locus: loci)
+                nextFilePointer.addLocation(locus);
+            locusIterator = new PeekableIterator<GenomeLoc>(Collections.<GenomeLoc>emptyList().iterator());
+        }
+    }
+
+    /**
+     * The consumer has provided null, meaning to iterate over all available data.  Create a file pointer stretching
+     * from just before the start of the region to the end of the region.
+     */
+    private void populateUnfilteredIntervalList(final GenomeLocParser parser) {
+        this.loci = new GenomeLocSortedSet(parser);
+        locusIterator = new PeekableIterator<GenomeLoc>(Collections.<GenomeLoc>emptyList().iterator());
+        nextFilePointer = generatePointerOverEntireFileset();
+    }
+
+    /**
+     * Generate a span that runs from the end of the BAM header to the end of the fle.
+     * @return A file pointer over the specified region.
+     */
+    private FilePointer generatePointerOverEntireFileset() {
+        FilePointer filePointer = new FilePointer();
+        Map<SAMReaderID,GATKBAMFileSpan> currentPosition = dataSource.getCurrentPosition();
+        for(SAMReaderID reader: dataSource.getReaderIDs())
+            filePointer.addFileSpans(reader,createSpanToEndOfFile(currentPosition.get(reader).getGATKChunks().get(0).getChunkStart()));
+        return filePointer;
    }

    public boolean hasNext() {
@ -67,7 +137,9 @@ public class BAMScheduler implements Iterator<FilePointer> {
        if(!hasNext())
            throw new NoSuchElementException("No next element available in interval sharder");
        FilePointer currentFilePointer = nextFilePointer;
+        nextFilePointer = null;
        advance();
+
        return currentFilePointer;
    }

@ -79,13 +151,12 @@ public class BAMScheduler implements Iterator<FilePointer> {
        if(loci.isEmpty())
            return;

-        nextFilePointer = null;
        while(nextFilePointer == null && currentLocus != null) {
            // special case handling of the unmapped shard.
            if(currentLocus == GenomeLoc.UNMAPPED) {
                nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
                for(SAMReaderID id: dataSource.getReaderIDs())
-                    nextFilePointer.addFileSpans(id,new GATKBAMFileSpan(new GATKChunk(indexFiles.get(id).getStartOfLastLinearBin(),Long.MAX_VALUE)));
+                    nextFilePointer.addFileSpans(id,createSpanToEndOfFile(indexFiles.get(id).getStartOfLastLinearBin()));
                currentLocus = null;
                continue;
            }
@ -96,7 +167,7 @@ public class BAMScheduler implements Iterator<FilePointer> {
            int coveredRegionStop = Integer.MAX_VALUE;
            GenomeLoc coveredRegion = null;

-            BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indexFiles,currentLocus);
+            BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(currentLocus);

            // No overlapping data at all.
            if(scheduleEntry != null) {
@ -108,7 +179,6 @@ public class BAMScheduler implements Iterator<FilePointer> {
            }
            else {
                // Always create a file span, whether there was covered data or not.  If there was no covered data, then the binTree is empty.
-                //System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
                for(SAMReaderID reader: indexFiles.keySet())
                    nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
            }
@ -116,21 +186,13 @@ public class BAMScheduler implements Iterator<FilePointer> {
            // Early exit if no bins were found.
            if(coveredRegion == null) {
                // for debugging only: maximum split is 16384.                
-                if(currentLocus.size() > 16384) {
-                    GenomeLoc[] splitContigs = currentLocus.split(currentLocus.getStart()+16384);
-                    nextFilePointer.addLocation(splitContigs[0]);
-                    currentLocus = splitContigs[1];
-                }
-                else {
-                    nextFilePointer.addLocation(currentLocus);
-                    currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
-                }
+                nextFilePointer.addLocation(currentLocus);
+                currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
                continue;
            }

            // Early exit if only part of the first interval was found.
            if(currentLocus.startsBefore(coveredRegion)) {
-                // for debugging only: maximum split is 16384.
                int splitPoint = Math.min(coveredRegion.getStart()-currentLocus.getStart(),16384)+currentLocus.getStart();
                GenomeLoc[] splitContigs = currentLocus.split(splitPoint);
                nextFilePointer.addLocation(splitContigs[0]);
@ -175,25 +237,30 @@ public class BAMScheduler implements Iterator<FilePointer> {

    /**
     * Get the next overlapping tree of bins associated with the given BAM file.
-     * @param indices BAM indices.
     * @param currentLocus The actual locus for which to check overlap.
     * @return The next schedule entry overlapping with the given list of loci.
     */
-    private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
+    private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final GenomeLoc currentLocus) {
+        // Make sure that we consult the BAM header to ensure that we're using the correct contig index for this contig name.
+        // This will ensure that if the two sets of contigs don't quite match (b36 male vs female ref, hg19 Epstein-Barr), then
+        // we'll be using the correct contig index for the BAMs.
+        // TODO: Warning: assumes all BAMs use the same sequence dictionary!  Get around this with contig aliasing.
+        final int currentContigIndex = dataSource.getHeader().getSequence(currentLocus.getContig()).getSequenceIndex();
+
        // Stale reference sequence or first invocation.  (Re)create the binTreeIterator.
-        if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
+        if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentContigIndex) {
            if(bamScheduleIterator != null)
                bamScheduleIterator.close();
-            lastReferenceSequenceLoaded = currentLocus.getContigIndex();
+            lastReferenceSequenceLoaded = currentContigIndex;

            // Naive algorithm: find all elements in current contig for proper schedule creation.
            List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
            for(GenomeLoc locus: loci) {
-                if(locus.getContigIndex() == lastReferenceSequenceLoaded)
+                if(dataSource.getHeader().getSequence(locus.getContig()).getSequenceIndex() == lastReferenceSequenceLoaded)
                    lociInContig.add(locus);
            }

-            bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
+            bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(dataSource,lociInContig));
        }

        if(!bamScheduleIterator.hasNext())
@ -209,4 +276,13 @@ public class BAMScheduler implements Iterator<FilePointer> {
        return (bamScheduleEntry != null && bamScheduleEntry.overlaps(currentLocus)) ? bamScheduleEntry : null;
    }

+    /**
+     * Create a span from the given start point to the end of the file.
+     * @param startOfRegion Start of the region, in encoded coordinates (block start << 16 & block offset).
+     * @return A file span from the given point to the end of the file.
+     */
+    private GATKBAMFileSpan createSpanToEndOfFile(final long startOfRegion) {
+      return new GATKBAMFileSpan(new GATKChunk(startOfRegion,Long.MAX_VALUE));
+    }
+
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BGZFBlockLoadingDispatcher.java
@ -0,0 +1,85 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.LinkedList;
+import java.util.Queue;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+
+/**
+ * Preloads BGZF blocks in preparation for unzipping and data processing.
+ * TODO: Right now, the block loader has all threads blocked waiting for a work request.  Ultimately this should
+ * TODO: be replaced with a central thread management strategy.
+ */
+public class BGZFBlockLoadingDispatcher {
+    /**
+     * The file handle cache, used when allocating blocks from the dispatcher.
+     */
+    private final FileHandleCache fileHandleCache;
+
+    private final ExecutorService threadPool;
+
+    private final Queue<SAMReaderPosition> inputQueue;
+
+    public BGZFBlockLoadingDispatcher(final int numThreads, final int numFileHandles) {
+        threadPool = Executors.newFixedThreadPool(numThreads);
+        fileHandleCache = new FileHandleCache(numFileHandles);
+        inputQueue = new LinkedList<SAMReaderPosition>();
+
+        threadPool.execute(new BlockLoader(this,fileHandleCache,true));
+    }
+
+    /**
+     * Initiates a request for a new block load.
+      * @param readerPosition Position at which to load.
+     */
+    void queueBlockLoad(final SAMReaderPosition readerPosition) {
+        synchronized(inputQueue) {
+            inputQueue.add(readerPosition);
+            inputQueue.notify();
+        }
+    }
+
+    /**
+     * Claims the next work request from the queue.
+     * @return The next work request, or null if none is available.
+     */
+    SAMReaderPosition claimNextWorkRequest() {
+        synchronized(inputQueue) {
+            while(inputQueue.isEmpty()) {
+                try {
+                    inputQueue.wait();
+                }
+                catch(InterruptedException ex) {
+                    throw new ReviewedStingException("Interrupt occurred waiting for next block reader work item");
+                }
+            }
+            return inputQueue.poll();
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockInputStream.java
@ -0,0 +1,436 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.GATKChunk;
+import net.sf.samtools.util.BAMInputStream;
+import net.sf.samtools.util.BlockCompressedFilePointerUtil;
+import net.sf.samtools.util.BlockCompressedInputStream;
+import net.sf.samtools.util.RuntimeEOFException;
+import net.sf.samtools.util.SeekableStream;
+import org.broad.tribble.util.BlockCompressedStreamConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.util.Arrays;
+import java.util.LinkedList;
+
+/**
+ * Presents decompressed blocks to the SAMFileReader.
+ */
+public class BlockInputStream extends SeekableStream implements BAMInputStream {
+    /**
+     * Mechanism for triggering block loads.
+     */
+    private final BGZFBlockLoadingDispatcher dispatcher;
+
+    /**
+     * The reader whose data is supplied by this input stream.
+     */
+    private final SAMReaderID reader;
+
+    /**
+     * Length of the input stream.
+     */
+    private final long length;
+
+    /**
+     * The latest error reported by an asynchronous block load.
+     */
+    private Throwable error;
+
+    /**
+     * Current position.
+     */
+    private SAMReaderPosition position;
+
+    /**
+     * A stream of compressed data blocks.
+     */
+    private final ByteBuffer buffer;
+
+    /**
+     * Offsets of the given blocks in the buffer.
+     */
+    private LinkedList<Integer> blockOffsets = new LinkedList<Integer>();
+
+    /**
+     * Source positions of the given blocks in the buffer.
+     */
+    private LinkedList<Long> blockPositions = new LinkedList<Long>();
+
+    /**
+     * Provides a lock to wait for more data to arrive.
+     */
+    private final Object lock = new Object();
+
+    /**
+     * An input stream to use when comparing data back to what it should look like.
+     */
+    private final BlockCompressedInputStream validatingInputStream;
+
+    /**
+     * Has the buffer been filled since last request?
+     */
+    private boolean bufferFilled = false;
+
+    /**
+     * Create a new block presenting input stream with a dedicated buffer.
+     * @param dispatcher the block loading messenger.
+     * @param reader the reader for which to load data.
+     * @param validate validates the contents read into the buffer against the contents of a Picard BlockCompressedInputStream.
+     */
+    BlockInputStream(final BGZFBlockLoadingDispatcher dispatcher, final SAMReaderID reader, final boolean validate) {
+        this.reader = reader;
+        this.length = reader.samFile.length();
+
+        buffer = ByteBuffer.wrap(new byte[64*1024]);
+        buffer.order(ByteOrder.LITTLE_ENDIAN);
+
+        // The state of the buffer assumes that the range of data written into the buffer appears in the range
+        // [position,limit), while extra capacity exists in the range [limit,capacity)
+        buffer.limit(0);
+
+        this.dispatcher = dispatcher;
+        // TODO: Kill the region when all we want to do is start at the beginning of the stream and run to the end of the stream.
+        this.position = new SAMReaderPosition(reader,this,new GATKBAMFileSpan(new GATKChunk(0,Long.MAX_VALUE)));
+
+        try {
+            if(validate) {
+                System.out.printf("BlockInputStream %s: BGZF block validation mode activated%n",this);
+                validatingInputStream = new BlockCompressedInputStream(reader.samFile);
+                // A bug in ValidatingInputStream means that calling getFilePointer() immediately after initialization will result in an NPE.
+                // Poke the stream to start reading data.
+                validatingInputStream.available();
+            }
+            else
+                validatingInputStream = null;
+        }
+        catch(IOException ex) {
+            throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+        }
+    }
+
+    public long length() {
+        return length;
+    }
+
+    public long getFilePointer() {
+        long filePointer;
+        synchronized(lock) {
+            if(buffer.remaining() > 0) {
+                // If there's data in the buffer, figure out from whence it came.
+                final long blockAddress = blockPositions.size() > 0 ? blockPositions.get(0) : 0;
+                final int blockOffset = buffer.position();
+                filePointer = blockAddress << 16 | blockOffset;
+            }
+            else {
+                // Otherwise, find the next position to load.
+                filePointer = position.getBlockAddress() << 16;
+            }
+        }
+
+        if(validatingInputStream != null && filePointer != validatingInputStream.getFilePointer())
+            throw new ReviewedStingException(String.format("Position of input stream is invalid; expected (block address, block offset) = (%d,%d), got (%d,%d)",
+                    BlockCompressedFilePointerUtil.getBlockAddress(filePointer),BlockCompressedFilePointerUtil.getBlockOffset(filePointer),
+                    BlockCompressedFilePointerUtil.getBlockAddress(validatingInputStream.getFilePointer()),BlockCompressedFilePointerUtil.getBlockOffset(validatingInputStream.getFilePointer())));
+
+        return filePointer;
+    }
+
+    public void seek(long target) {
+        // TODO: Validate the seek point.
+        //System.out.printf("Thread %s, BlockInputStream %s: seeking to block %d, offset %d%n",Thread.currentThread().getId(),this,BlockCompressedFilePointerUtil.getBlockAddress(target),BlockCompressedFilePointerUtil.getBlockOffset(target));
+        synchronized(lock) {
+            clearBuffers();
+            position.advancePosition(BlockCompressedFilePointerUtil.getBlockAddress(target));
+            waitForBufferFill();
+            buffer.position(BlockCompressedFilePointerUtil.getBlockOffset(target));
+
+            if(validatingInputStream != null) {
+                try {
+                    validatingInputStream.seek(target);
+                }
+                catch(IOException ex) {
+                    throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+                }
+            }
+        }
+    }
+
+    private void clearBuffers() {
+        this.position.reset();
+
+        // Buffer semantics say that outside of a lock, buffer should always be prepared for reading.
+        // Indicate no data to be read.
+        buffer.clear();
+        buffer.limit(0);
+
+        blockOffsets.clear();
+        blockPositions.clear();
+    }
+
+    public boolean eof() {
+        synchronized(lock) {
+            // TODO: Handle multiple empty BGZF blocks at end of the file.
+            return position != null && position.getBlockAddress() >= length;
+        }
+    }
+
+    public void setCheckCrcs(final boolean check) {
+        // TODO: Implement
+    }
+
+    /**
+     * Submits a new access plan for the given dataset.
+     * @param position The next seek point for BAM data in this reader.
+     */
+    public void submitAccessPlan(final SAMReaderPosition position) {
+        //System.out.printf("Thread %s: submitting access plan for block at position: %d%n",Thread.currentThread().getId(),position.getBlockAddress());
+        synchronized(lock) {
+            // Assume that the access plan is going to tell us to start where we are and move forward.
+            // If this isn't the case, we'll soon receive a seek request and the buffer will be forced to reset.
+            if(this.position != null && position.getBlockAddress() < this.position.getBlockAddress())
+                position.advancePosition(this.position.getBlockAddress());
+        }
+        this.position = position;
+    }
+
+    private void compactBuffer() {
+        // Compact buffer to maximize storage space.
+        int bytesToRemove = 0;
+
+        // Look ahead to see if we can compact away the first block in the series.
+        while(blockOffsets.size() > 1 && buffer.position() < blockOffsets.get(1)) {
+            bytesToRemove += blockOffsets.remove();
+            blockPositions.remove();
+        }
+
+        // If we end up with an empty block at the end of the series, compact this as well.
+        if(buffer.remaining() == 0 && !blockOffsets.isEmpty() && buffer.position() >= blockOffsets.peek()) {
+            bytesToRemove += buffer.position();
+            blockOffsets.remove();
+            blockPositions.remove();
+        }
+
+        int finalBufferStart = buffer.position() - bytesToRemove;
+        int finalBufferSize = buffer.remaining();
+
+        buffer.position(bytesToRemove);
+        buffer.compact();
+
+        buffer.position(finalBufferStart);
+        buffer.limit(finalBufferStart+finalBufferSize);
+    }
+
+    /**
+     * Push contents of incomingBuffer into the end of this buffer.
+     * MUST be called from a thread that is NOT the reader thread.
+     * @param incomingBuffer The data being pushed into this input stream.
+     * @param position target position for the data.
+     */
+    public void copyIntoBuffer(final ByteBuffer incomingBuffer, final SAMReaderPosition position, final long filePosition) {
+        synchronized(lock) {
+            try {
+                compactBuffer();
+                // Open up the buffer for more reading.
+                buffer.limit(buffer.capacity());
+
+                // Advance the position to take the most recent read into account.
+                long lastReadPosition = position.getBlockAddress();
+
+                byte[] validBytes = null;
+                if(validatingInputStream != null) {
+                    validBytes = new byte[incomingBuffer.remaining()];
+
+                    byte[] currentBytes = new byte[incomingBuffer.remaining()];
+                    int pos = incomingBuffer.position();
+                    int lim = incomingBuffer.limit();
+                    incomingBuffer.get(currentBytes);
+
+                    incomingBuffer.limit(lim);
+                    incomingBuffer.position(pos);
+
+                    long currentFilePointer = validatingInputStream.getFilePointer();
+                    validatingInputStream.seek(lastReadPosition << 16);
+                    validatingInputStream.read(validBytes);
+                    validatingInputStream.seek(currentFilePointer);
+
+                    if(!Arrays.equals(validBytes,currentBytes))
+                        throw new ReviewedStingException(String.format("Bytes being inserted into BlockInputStream %s are incorrect",this));
+                }
+
+                this.position = position;
+                position.advancePosition(filePosition);
+
+                if(buffer.remaining() < incomingBuffer.remaining()) {
+                    //System.out.printf("Thread %s: waiting for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n",Thread.currentThread().getId(),buffer.remaining(),incomingBuffer.remaining());
+                    lock.wait();
+                    //System.out.printf("Thread %s: waited for available space in buffer; buffer remaining = %d, incoming buffer remaining = %d%n", Thread.currentThread().getId(), buffer.remaining(), incomingBuffer.remaining());
+                }
+
+                // Queue list of block offsets / block positions.
+                blockOffsets.add(buffer.position());
+                blockPositions.add(lastReadPosition);
+
+                buffer.put(incomingBuffer);
+
+                // Set up the buffer for reading.
+                buffer.flip();
+                bufferFilled = true;
+
+                lock.notify();
+            }
+            catch(Exception ex) {
+                reportException(ex);
+                lock.notify();
+            }
+        }
+    }
+
+    void reportException(Throwable t) {
+        synchronized(lock) {
+            this.error = t;
+            lock.notify();
+        }
+    }
+
+    private void checkForErrors() {
+        synchronized(lock) {
+            if(error != null) {
+                ReviewedStingException toThrow = new ReviewedStingException(String.format("Thread %s, BlockInputStream %s: Unable to retrieve BAM data from disk",Thread.currentThread().getId(),this),error);
+                toThrow.setStackTrace(error.getStackTrace());
+                throw toThrow;
+            }
+        }
+    }
+
+    /**
+     * Reads the next byte of data from the input stream.
+     * @return Next byte of data, from 0->255, as an int.
+     */
+    @Override
+    public int read() {
+        byte[] singleByte = new byte[1];
+        read(singleByte);
+        return singleByte[0];
+    }
+
+    /**
+     * Fills the given byte array to the extent possible.
+     * @param bytes byte array to be filled.
+     * @return The number of bytes actually read.
+     */
+    @Override
+    public int read(byte[] bytes) {
+        return read(bytes,0,bytes.length);
+    }
+
+    @Override
+    public int read(byte[] bytes, final int offset, final int length) {
+        int remaining = length;
+        synchronized(lock) {
+            while(remaining > 0) {
+                // Check for error conditions during last read.
+                checkForErrors();
+
+                // If completely out of space, queue up another buffer fill.
+                waitForBufferFill();
+
+                // Couldn't manage to load any data at all; abort and return what's available.
+                if(buffer.remaining() == 0)
+                    break;
+
+                int numBytesToCopy = Math.min(buffer.remaining(),remaining);
+                buffer.get(bytes,length-remaining+offset,numBytesToCopy);
+                remaining -= numBytesToCopy;
+
+                //if(remaining > 0)
+                //    System.out.printf("Thread %s: read the first %d bytes of a %d byte request%n",Thread.currentThread().getId(),length-remaining,length);
+                // TODO: Assert that we don't copy across a block boundary
+            }
+
+            // Notify any waiting threads that some of the contents of the buffer were removed.
+            if(length-remaining > 0)
+                lock.notify();
+        }
+
+        if(validatingInputStream != null) {
+            byte[] validBytes = new byte[length];
+            try {
+                validatingInputStream.read(validBytes,offset,length);
+                for(int i = offset; i < offset+length; i++) {
+                    if(bytes[i] != validBytes[i]) {
+                        System.out.printf("Thread %s: preparing to throw an exception because contents don't match%n",Thread.currentThread().getId());
+                        throw new ReviewedStingException(String.format("Thread %s: blockInputStream %s attempting to return wrong set of bytes; mismatch at offset %d",Thread.currentThread().getId(),this,i));
+                    }
+                }
+            }
+            catch(IOException ex) {
+                throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+            }
+        }
+
+        return length - remaining;
+    }
+
+    public void close() {
+        if(validatingInputStream != null) {
+            try {
+                validatingInputStream.close();
+            }
+            catch(IOException ex) {
+                throw new ReviewedStingException("Unable to validate against Picard input stream",ex);
+            }
+        }
+    }
+
+    public String getSource() {
+        return reader.getSamFilePath();
+    }
+
+    private void waitForBufferFill() {
+        synchronized(lock) {
+            bufferFilled = false;
+            if(buffer.remaining() == 0 && !eof()) {
+                //System.out.printf("Thread %s is waiting for a buffer fill from position %d to buffer %s%n",Thread.currentThread().getId(),position.getBlockAddress(),this);
+                dispatcher.queueBlockLoad(position);
+                try {
+                    lock.wait();
+                }
+                catch(InterruptedException ex) {
+                    // TODO: handle me.
+                    throw new ReviewedStingException("Interrupt occurred waiting for buffer to fill",ex);
+                }
+
+                if(bufferFilled && buffer.remaining() == 0)
+                    throw new RuntimeEOFException("No more data left in InputStream");
+            }
+        }
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/BlockLoader.java
@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import org.broad.tribble.util.BlockCompressedStreamConstants;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.channels.FileChannel;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+
+/**
+ * An engine for loading blocks.
+ */
+class BlockLoader implements Runnable {
+    /**
+     * Coordinates the input queue.
+     */
+    private BGZFBlockLoadingDispatcher dispatcher;
+
+    /**
+     * A cache from which to retrieve open file handles.
+     */
+    private final FileHandleCache fileHandleCache;
+
+    /**
+     * Whether asynchronous decompression should happen.
+     */
+    private final boolean decompress;
+
+    /**
+     * An direct input buffer for incoming data from disk.
+     */
+    private final ByteBuffer inputBuffer;
+
+    public BlockLoader(final BGZFBlockLoadingDispatcher dispatcher, final FileHandleCache fileHandleCache, final boolean decompress) {
+        this.dispatcher = dispatcher;
+        this.fileHandleCache = fileHandleCache;
+        this.decompress = decompress;
+
+        this.inputBuffer = ByteBuffer.allocateDirect(64*1024 + BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length);
+        inputBuffer.order(ByteOrder.LITTLE_ENDIAN);
+    }
+
+    public void run() {
+        for(;;) {
+            SAMReaderPosition readerPosition = null;
+            try {
+                readerPosition = dispatcher.claimNextWorkRequest();
+                FileInputStream inputStream = fileHandleCache.claimFileInputStream(readerPosition.getReader());
+
+                long blockAddress = readerPosition.getBlockAddress();
+                //System.out.printf("Thread %s: BlockLoader: copying bytes from %s at position %d into %s%n",Thread.currentThread().getId(),inputStream,blockAddress,readerPosition.getInputStream());
+
+                ByteBuffer compressedBlock = readBGZFBlock(inputStream,readerPosition.getBlockAddress());
+                long nextBlockAddress = position(inputStream);
+                fileHandleCache.releaseFileInputStream(readerPosition.getReader(),inputStream);
+
+                ByteBuffer block = decompress ? decompressBGZFBlock(compressedBlock) : compressedBlock;
+                int bytesCopied = block.remaining();
+
+                BlockInputStream bamInputStream = readerPosition.getInputStream();
+                bamInputStream.copyIntoBuffer(block,readerPosition,nextBlockAddress);
+
+                //System.out.printf("Thread %s: BlockLoader: copied %d bytes from %s at position %d into %s%n",Thread.currentThread().getId(),bytesCopied,inputStream,blockAddress,readerPosition.getInputStream());
+            }
+            catch(Throwable error) {
+                if(readerPosition != null && readerPosition.getInputStream() != null)
+                    readerPosition.getInputStream().reportException(error);
+            }
+        }
+
+    }
+
+    private ByteBuffer readBGZFBlock(final FileInputStream inputStream, final long blockAddress) throws IOException {
+        FileChannel channel = inputStream.getChannel();
+
+        // Read the block header
+        channel.position(blockAddress);
+
+        int uncompressedDataSize = 0;
+        int bufferSize = 0;
+
+        do {
+            inputBuffer.clear();
+            inputBuffer.limit(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+            channel.read(inputBuffer);
+
+            // Read out the size of the full BGZF block into a two bit short container, then 'or' that
+            // value into an int buffer to transfer the bitwise contents into an int.
+            inputBuffer.flip();
+            if(inputBuffer.remaining() != BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH)
+                throw new ReviewedStingException("BUG: unable to read a the complete block header in one pass.");
+
+            // Verify that the file was read at a valid point.
+            if(unpackUByte8(inputBuffer,0) != BlockCompressedStreamConstants.GZIP_ID1 ||
+                    unpackUByte8(inputBuffer,1) != BlockCompressedStreamConstants.GZIP_ID2 ||
+                    unpackUByte8(inputBuffer,3) != BlockCompressedStreamConstants.GZIP_FLG ||
+                    unpackUInt16(inputBuffer,10) != BlockCompressedStreamConstants.GZIP_XLEN ||
+                    unpackUByte8(inputBuffer,12) != BlockCompressedStreamConstants.BGZF_ID1 ||
+                    unpackUByte8(inputBuffer,13) != BlockCompressedStreamConstants.BGZF_ID2) {
+                throw new ReviewedStingException("BUG: Started reading compressed block at incorrect position");
+            }
+
+            inputBuffer.position(BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET);
+            bufferSize = unpackUInt16(inputBuffer,BlockCompressedStreamConstants.BLOCK_LENGTH_OFFSET)+1;
+
+            // Adjust buffer limits and finish reading the block.  Also read the next header, just in case there's a 0-byte block.
+            inputBuffer.limit(bufferSize);
+            inputBuffer.position(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+            channel.read(inputBuffer);
+
+            // Check the uncompressed length.  If 0 and not at EOF, we'll want to check the next block.
+            uncompressedDataSize = inputBuffer.getInt(inputBuffer.limit()-4);
+            //System.out.printf("Uncompressed block size of the current block (at position %d) is %d%n",channel.position()-inputBuffer.limit(),uncompressedDataSize);
+        }
+        while(uncompressedDataSize == 0 && channel.position() < channel.size());
+
+        // Prepare the buffer for reading.
+        inputBuffer.flip();
+
+        return inputBuffer;
+    }
+
+    private ByteBuffer decompressBGZFBlock(final ByteBuffer bgzfBlock) throws DataFormatException {
+        final int compressedBufferSize = bgzfBlock.remaining();
+
+        // Determine the uncompressed buffer size (
+        bgzfBlock.position(bgzfBlock.limit()-4);
+        int uncompressedBufferSize = bgzfBlock.getInt();
+        byte[] uncompressedContent = new byte[uncompressedBufferSize];
+
+        // Bound the CDATA section of the buffer.
+        bgzfBlock.limit(compressedBufferSize-BlockCompressedStreamConstants.BLOCK_FOOTER_LENGTH);
+        bgzfBlock.position(BlockCompressedStreamConstants.BLOCK_HEADER_LENGTH);
+        byte[] compressedContent = new byte[bgzfBlock.remaining()];
+        ByteBuffer.wrap(compressedContent).put(bgzfBlock);
+
+        // Decompress the buffer.
+        final Inflater inflater = new Inflater(true);
+        inflater.setInput(compressedContent);
+        int bytesUncompressed = inflater.inflate(uncompressedContent);
+        if(bytesUncompressed != uncompressedBufferSize)
+            throw new ReviewedStingException("Error decompressing block");
+
+        return ByteBuffer.wrap(uncompressedContent);
+    }
+
+    private long position(final FileInputStream inputStream) throws IOException {
+        return inputStream.getChannel().position();
+    }
+
+    private int unpackUByte8(final ByteBuffer buffer,final int position) {
+        return buffer.get(position) & 0xFF;
+    }
+
+    private int unpackUInt16(final ByteBuffer buffer,final int position) {
+        // Read out the size of the full BGZF block into a two bit short container, then 'or' that
+        // value into an int buffer to transfer the bitwise contents into an int.
+        return buffer.getShort(position) & 0xFFFF;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FileHandleCache.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FileHandleCache.java
@ -0,0 +1,231 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+import org.broadinstitute.sting.utils.exceptions.StingException;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.LinkedHashMap;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+
+/**
+ * Caches frequently used  file handles.  Right now, caches only a single file handle.
+ * TODO: Generalize to support arbitrary file handle caches.
+ */
+public class FileHandleCache {
+    /**
+     * The underlying data structure storing file handles.
+     */
+    private final FileHandleStorage fileHandleStorage;
+
+    /**
+     * How many file handles should be kept open at once.
+     */
+    private final int cacheSize;
+
+    /**
+     * A uniquifier: assign a unique ID to every instance of a file handle.
+     */
+    private final Map<SAMReaderID,Integer> keyCounter = new HashMap<SAMReaderID,Integer>();
+
+    /**
+     * A shared lock, private so that outside users cannot notify it.
+     */
+    private final Object lock = new Object();
+
+    /**
+     * Indicates how many file handles are outstanding at this point.
+     */
+    private int numOutstandingFileHandles = 0;
+
+    /**
+     * Create a new file handle cache of the given cache size.
+     * @param cacheSize how many readers to hold open at once.
+     */
+    public FileHandleCache(final int cacheSize) {
+        this.cacheSize = cacheSize;
+        fileHandleStorage = new FileHandleStorage();
+    }
+
+    /**
+     * Retrieves or opens a file handle for the given reader ID.
+     * @param key The ke
+     * @return A file input stream from the cache, if available, or otherwise newly opened.
+     */
+    public FileInputStream claimFileInputStream(final SAMReaderID key) {
+        synchronized(lock) {
+            FileInputStream inputStream = findExistingEntry(key);
+            if(inputStream == null) {
+                try {
+                    // If the cache is maxed out, wait for another file handle to emerge.
+                    if(numOutstandingFileHandles >= cacheSize)
+                        lock.wait();
+                }
+                catch(InterruptedException ex) {
+                    throw new ReviewedStingException("Interrupted while waiting for a file handle");
+                }
+                inputStream = openInputStream(key);
+            }
+            numOutstandingFileHandles++;
+
+            //System.out.printf("Handing input stream %s to thread %s%n",inputStream,Thread.currentThread().getId());
+            return inputStream;
+        }
+    }
+
+    /**
+     * Releases the current reader and returns it to the cache.
+     * @param key The reader.
+     * @param inputStream The stream being used.
+     */
+    public void releaseFileInputStream(final SAMReaderID key, final FileInputStream inputStream) {
+        synchronized(lock) {
+            numOutstandingFileHandles--;
+            UniqueKey newID = allocateKey(key);
+            fileHandleStorage.put(newID,inputStream);
+            // Let any listeners know that another file handle has become available.
+            lock.notify();
+        }
+    }
+
+    /**
+     * Finds an existing entry in the storage mechanism.
+     * @param key Reader.
+     * @return a cached stream, if available.  Otherwise,
+     */
+    private FileInputStream findExistingEntry(final SAMReaderID key) {
+        int existingHandles = getMostRecentUniquifier(key);
+
+        // See if any of the keys currently exist in the repository.
+        for(int i = 0; i <= existingHandles; i++) {
+            UniqueKey uniqueKey = new UniqueKey(key,i);
+            if(fileHandleStorage.containsKey(uniqueKey))
+                return fileHandleStorage.remove(uniqueKey);
+        }
+
+        return null;
+    }
+
+    /**
+     * Gets the most recent uniquifier used for the given reader.
+     * @param reader Reader for which to determine uniqueness.
+     * @return
+     */
+    private int getMostRecentUniquifier(final SAMReaderID reader) {
+        if(keyCounter.containsKey(reader))
+            return keyCounter.get(reader);
+        else return -1;
+    }
+
+    private UniqueKey allocateKey(final SAMReaderID reader) {
+        int uniquifier = getMostRecentUniquifier(reader)+1;
+        keyCounter.put(reader,uniquifier);
+        return new UniqueKey(reader,uniquifier);
+    }
+
+    private FileInputStream openInputStream(final SAMReaderID reader) {
+        try {
+            return new FileInputStream(reader.getSamFilePath());
+        }
+        catch(IOException ex) {
+            throw new StingException("Unable to open input file");
+        }
+    }
+
+    private void closeInputStream(final FileInputStream inputStream) {
+        try {
+            inputStream.close();
+        }
+        catch(IOException ex) {
+            throw new StingException("Unable to open input file");
+        }
+    }
+
+    /**
+     * Actually contains the file handles, purging them as they get too old.
+     */
+    private class FileHandleStorage extends LinkedHashMap<UniqueKey,FileInputStream> {
+        /**
+         * Remove the oldest entry
+         * @param entry Entry to consider removing.
+         * @return True if the cache size has been exceeded.  False otherwise.
+         */
+        @Override
+        protected boolean removeEldestEntry(Map.Entry<UniqueKey,FileInputStream> entry) {
+            synchronized (lock) {
+                if(size() > cacheSize) {
+                    keyCounter.put(entry.getKey().key,keyCounter.get(entry.getKey().key)-1);
+                    closeInputStream(entry.getValue());
+
+                    return true;
+                }
+            }
+            return false;
+        }
+    }
+
+    /**
+     * Uniquifies a key by adding a numerical uniquifier.
+     */
+    private class UniqueKey {
+        /**
+         * The file handle's key.
+         */
+        private final SAMReaderID key;
+
+        /**
+         * A uniquifier, so that multiple of the same reader can exist in the cache.
+         */
+        private final int uniqueID;
+
+        public UniqueKey(final SAMReaderID reader, final int uniqueID) {
+            this.key = reader;
+            this.uniqueID = uniqueID;
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if(!(other instanceof UniqueKey))
+                return false;
+            UniqueKey otherUniqueKey = (UniqueKey)other;
+            return key.equals(otherUniqueKey.key) && this.uniqueID == otherUniqueKey.uniqueID;
+        }
+
+        @Override
+        public int hashCode() {
+            return key.hashCode();
+        }
+    }
+
+
+
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java
@ -29,6 +29,7 @@ import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.SAMFileSpan;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
 import org.broadinstitute.sting.utils.interval.IntervalUtils;
@ -40,28 +41,25 @@ import java.util.*;
 */
 public class FilePointer {
    protected final SortedMap<SAMReaderID,SAMFileSpan> fileSpans = new TreeMap<SAMReaderID,SAMFileSpan>();
-    protected final BAMOverlap overlap;
-    protected final List<GenomeLoc> locations;
+    protected final List<GenomeLoc> locations = new ArrayList<GenomeLoc>();

    /**
     * Does this file pointer point into an unmapped region?
     */
    protected final boolean isRegionUnmapped;

-    public FilePointer() {
-        this((BAMOverlap)null);
-    }
-
-    public FilePointer(final GenomeLoc location) {
-        this.overlap = null;
-        this.locations = Collections.singletonList(location);
-        this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
-    }
-
-    public FilePointer(final BAMOverlap overlap) {
-        this.overlap = overlap;
-        this.locations = new ArrayList<GenomeLoc>();
-        this.isRegionUnmapped = false;
+    public FilePointer(final GenomeLoc... locations) {
+        this.locations.addAll(Arrays.asList(locations));
+        boolean foundMapped = false, foundUnmapped = false;
+        for(GenomeLoc location: locations) {
+            if(GenomeLoc.isUnmapped(location))
+                foundUnmapped = true;
+            else
+                foundMapped = true;
+        }
+        if(foundMapped && foundUnmapped)
+            throw new ReviewedStingException("BUG: File pointers cannot be mixed mapped/unmapped.");
+        this.isRegionUnmapped = foundUnmapped;
    }

    /**
@ -217,4 +215,20 @@ public class FilePointer {
            fileSpan = fileSpan.union((GATKBAMFileSpan)iterators[i].next().getValue());
        combined.addFileSpans(initialElement.getKey(),fileSpan);
    }
+
+    @Override
+    public String toString() {
+        StringBuilder builder = new StringBuilder();
+        builder.append("FilePointer:%n");
+        builder.append("\tlocations = {");
+        builder.append(Utils.join(";",locations));
+        builder.append("}%n\tregions = %n");
+        for(Map.Entry<SAMReaderID,SAMFileSpan> entry: fileSpans.entrySet()) {
+            builder.append(entry.getKey());
+            builder.append("= {");
+            builder.append(entry.getValue());
+            builder.append("}");
+        }
+        return builder.toString();
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java
@ -25,419 +25,58 @@
 package org.broadinstitute.sting.gatk.datasources.reads;

 import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.AbstractBAMFileIndex;
-import net.sf.samtools.Bin;
-import net.sf.samtools.BrowseableBAMIndex;
-import net.sf.samtools.SAMSequenceRecord;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.GenomeLoc;
+import net.sf.samtools.SAMSequenceDictionary;
+import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.collections.Pair;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;

-import java.util.*;
+import java.util.Iterator;

 /**
- * Shard intervals based on position within the BAM file.
- *
- * @author mhanna
- * @version 0.1
+ * Handles the process of aggregating BAM intervals into individual shards.
+ * TODO: The task performed by IntervalSharder is now better performed by LocusShardBalancer.  Merge BAMScheduler and IntervalSharder.
 */
-public class IntervalSharder {
-    private static Logger logger = Logger.getLogger(IntervalSharder.class);
+public class IntervalSharder implements Iterator<FilePointer> {
+    /**
+     * The iterator actually laying out the data for BAM scheduling.
+     */
+    private final PeekableIterator<FilePointer> wrappedIterator;

-    public static Iterator<FilePointer> shardIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
-        return new IntervalSharder.FilePointerIterator(dataSource,loci);
+    /**
+     * The parser, for interval manipulation.
+     */
+    private final GenomeLocParser parser;
+
+    public static IntervalSharder shardOverAllReads(final SAMDataSource dataSource, final GenomeLocParser parser) {
+        return new IntervalSharder(BAMScheduler.createOverAllReads(dataSource,parser),parser);
+    }
+
+    public static IntervalSharder shardOverMappedReads(final SAMDataSource dataSource, final SAMSequenceDictionary sequenceDictionary, final GenomeLocParser parser) {
+        return new IntervalSharder(BAMScheduler.createOverMappedReads(dataSource,sequenceDictionary,parser),parser);
+    }
+
+    public static IntervalSharder shardOverIntervals(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
+        return new IntervalSharder(BAMScheduler.createOverIntervals(dataSource,loci),loci.getGenomeLocParser());
+    }
+
+    private IntervalSharder(final BAMScheduler scheduler, final GenomeLocParser parser) {
+        wrappedIterator = new PeekableIterator<FilePointer>(scheduler);
+        this.parser = parser;
+    }
+
+    public boolean hasNext() {
+        return wrappedIterator.hasNext();
    }

    /**
-     * A lazy-loading iterator over file pointers.
+     * Accumulate shards where there's no additional cost to processing the next shard in the sequence.
+     * @return The next file pointer to process.
     */
-    private static class FilePointerIterator implements Iterator<FilePointer> {
-        final SAMDataSource dataSource;
-        final GenomeLocSortedSet loci;
-        final PeekableIterator<GenomeLoc> locusIterator;
-        final Queue<FilePointer> cachedFilePointers = new LinkedList<FilePointer>();
-
-        public FilePointerIterator(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
-            this.dataSource = dataSource;
-            this.loci = loci;
-            locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
-            advance();
-        }
-
-        public boolean hasNext() {
-            return !cachedFilePointers.isEmpty();
-        }
-
-        public FilePointer next() {
-            if(!hasNext())
-                throw new NoSuchElementException("FilePointerIterator iteration is complete");
-            FilePointer filePointer = cachedFilePointers.remove();
-            if(cachedFilePointers.isEmpty())
-                advance();
-            return filePointer;
-        }
-
-        public void remove() {
-            throw new UnsupportedOperationException("Cannot remove from a FilePointerIterator");
-        }
-
-        private void advance() {
-            GenomeLocSortedSet nextBatch = new GenomeLocSortedSet(loci.getGenomeLocParser());
-            String contig = null;
-
-            // If the next section of the BAM to be processed is unmapped, handle this region separately.
-            while(locusIterator.hasNext() && nextBatch.isEmpty()) {
-                contig = null;
-                while(locusIterator.hasNext() && (contig == null || (!GenomeLoc.isUnmapped(locusIterator.peek()) && locusIterator.peek().getContig().equals(contig)))) {
-                    GenomeLoc nextLocus = locusIterator.next();
-                    contig = nextLocus.getContig();
-                    nextBatch.add(nextLocus);
-                }
-            }
-
-            if(nextBatch.size() > 0) {
-                cachedFilePointers.addAll(shardIntervalsOnContig(dataSource,contig,nextBatch));
-            }
-        }
+    public FilePointer next() {
+        FilePointer current = wrappedIterator.next();
+        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
+            current = current.combine(parser,wrappedIterator.next());
+        return current;
    }

-    /**
-     * Merge / split intervals based on an awareness of the structure of the BAM file.
-     * @param dataSource
-     * @param contig Contig against which to align the intervals.  If null, create a file pointer across unmapped reads.
-     * @param loci
-     * @return
-     */
-    private static List<FilePointer> shardIntervalsOnContig(final SAMDataSource dataSource, final String contig, final GenomeLocSortedSet loci) {
-        // If the contig is null, eliminate the chopping process and build out a file pointer consisting of the unmapped region of all BAMs.
-        if(contig == null) {
-            FilePointer filePointer = new FilePointer(GenomeLoc.UNMAPPED);
-            for(SAMReaderID id: dataSource.getReaderIDs())
-                filePointer.addFileSpans(id,null);
-            return Collections.singletonList(filePointer);
-        }
-
-        // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
-        List<FilePointer> filePointers = new ArrayList<FilePointer>();
-        FilePointer lastFilePointer = null;
-        BAMOverlap lastBAMOverlap = null;
-
-        Map<SAMReaderID,BrowseableBAMIndex> readerToIndexMap = new HashMap<SAMReaderID,BrowseableBAMIndex>();
-        IntervalSharder.BinMergingIterator binMerger = new IntervalSharder.BinMergingIterator();
-        for(SAMReaderID id: dataSource.getReaderIDs()) {
-            final SAMSequenceRecord referenceSequence = dataSource.getHeader(id).getSequence(contig);
-            // If this contig can't be found in the reference, skip over it.
-            if(referenceSequence == null && contig != null)
-                continue;
-            final BrowseableBAMIndex index = (BrowseableBAMIndex)dataSource.getIndex(id);
-            binMerger.addReader(id,
-                                index,
-                                referenceSequence.getSequenceIndex(),
-                                index.getBinsOverlapping(referenceSequence.getSequenceIndex(),1,referenceSequence.getSequenceLength()).iterator());
-            // Cache the reader for later data lookup.
-            readerToIndexMap.put(id,index);
-        }
-
-        PeekableIterator<BAMOverlap> binIterator = new PeekableIterator<BAMOverlap>(binMerger);
-
-        for(GenomeLoc location: loci) {
-            if(!location.getContig().equals(contig))
-                throw new ReviewedStingException("Location outside bounds of contig");
-
-            if(!binIterator.hasNext())
-                break;
-
-            int locationStart = location.getStart();
-            final int locationStop = location.getStop();
-
-            // Advance to first bin.
-            while(binIterator.peek().stop < locationStart)
-                binIterator.next();
-
-            // Add all relevant bins to a list.  If the given bin extends beyond the end of the current interval, make
-            // sure the extending bin is not pruned from the list.
-            List<BAMOverlap> bamOverlaps = new ArrayList<BAMOverlap>();
-            while(binIterator.hasNext() && binIterator.peek().stop <= locationStop)
-                bamOverlaps.add(binIterator.next());
-            if(binIterator.hasNext() && binIterator.peek().start <= locationStop)
-                bamOverlaps.add(binIterator.peek());
-
-            // Bins found; try to match bins with locations.
-            Iterator<BAMOverlap> bamOverlapIterator = bamOverlaps.iterator();
-
-            while(locationStop >= locationStart) {
-                int binStart = lastFilePointer!=null ? lastFilePointer.overlap.start : 0;
-                int binStop =  lastFilePointer!=null ? lastFilePointer.overlap.stop : 0;
-
-                while(binStop < locationStart && bamOverlapIterator.hasNext()) {
-                    if(lastFilePointer != null && lastFilePointer.locations.size() > 0)
-                        filePointers.add(lastFilePointer);
-
-                    lastBAMOverlap = bamOverlapIterator.next();
-                    lastFilePointer = new FilePointer(lastBAMOverlap);
-                    binStart = lastFilePointer.overlap.start;
-                    binStop = lastFilePointer.overlap.stop;
-                }
-
-                if(locationStart < binStart) {
-                    // The region starts before the first bin in the sequence.  Add the region occurring before the sequence.
-                    if(lastFilePointer != null && lastFilePointer.locations.size() > 0) {
-                        filePointers.add(lastFilePointer);
-                        lastFilePointer = null;
-                        lastBAMOverlap = null;
-                    }
-
-                    final int regionStop = Math.min(locationStop,binStart-1);
-
-                    GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop);
-                    lastFilePointer = new FilePointer(subset);
-
-                    locationStart = regionStop + 1;
-                }
-                else if(locationStart > binStop) {
-                    // The region starts after the last bin in the sequence.  Add the region occurring after the sequence.
-                    if(lastFilePointer != null && lastFilePointer.locations.size() > 0) {
-                        filePointers.add(lastFilePointer);
-                        lastFilePointer = null;
-                        lastBAMOverlap = null;
-                    }
-
-                    GenomeLoc subset = loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,locationStop);
-                    filePointers.add(new FilePointer(subset));
-
-                    locationStart = locationStop + 1;
-                }
-                else {
-                    if(lastFilePointer == null)
-                        throw new ReviewedStingException("Illegal state: initializer failed to create cached file pointer.");
-
-                    // The start of the region overlaps the bin.  Add the overlapping subset.
-                    final int regionStop = Math.min(locationStop,binStop);
-                    lastFilePointer.addLocation(loci.getGenomeLocParser().createGenomeLoc(location.getContig(),locationStart,regionStop));
-                    locationStart = regionStop + 1;
-                }
-            }
-        }
-
-        if(lastFilePointer != null && lastFilePointer.locations.size() > 0)
-            filePointers.add(lastFilePointer);
-
-        // Lookup the locations for every file pointer in the index.
-        for(SAMReaderID id: readerToIndexMap.keySet()) {
-            BrowseableBAMIndex index = readerToIndexMap.get(id);
-            for(FilePointer filePointer: filePointers)
-                filePointer.addFileSpans(id,index.getSpanOverlapping(filePointer.overlap.getBin(id)));
-        }
-
-        return filePointers;
-    }
-
-    private static class BinMergingIterator implements Iterator<BAMOverlap> {
-        private PriorityQueue<BinQueueState> binQueue = new PriorityQueue<BinQueueState>();
-        private Queue<BAMOverlap> pendingOverlaps = new LinkedList<BAMOverlap>();
-
-        public void addReader(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, Iterator<Bin> bins) {
-            binQueue.add(new BinQueueState(id,index,referenceSequence,new IntervalSharder.LowestLevelBinFilteringIterator(index,bins)));
-        }
-
-        public boolean hasNext() {
-            return pendingOverlaps.size() > 0 || !binQueue.isEmpty();
-        }
-
-        public BAMOverlap next() {
-            if(!hasNext())
-                throw new NoSuchElementException("No elements left in merging iterator");
-            if(pendingOverlaps.isEmpty())
-                advance();
-            return pendingOverlaps.remove();
-        }
-
-        public void advance() {
-            List<ReaderBin> bins = new ArrayList<ReaderBin>();
-            int boundsStart, boundsStop;
-
-            // Prime the pump
-            if(binQueue.isEmpty())
-                return;
-            bins.add(getNextBin());
-            boundsStart = bins.get(0).getStart();
-            boundsStop  = bins.get(0).getStop();
-
-            // Accumulate all the bins that overlap the current bin, in sorted order.
-            while(!binQueue.isEmpty() && peekNextBin().getStart() <= boundsStop) {
-                ReaderBin bin = getNextBin();
-                bins.add(bin);
-                boundsStart = Math.min(boundsStart,bin.getStart());
-                boundsStop = Math.max(boundsStop,bin.getStop());
-            }
-
-            List<Pair<Integer,Integer>> range = new ArrayList<Pair<Integer,Integer>>();
-            int start = bins.get(0).getStart();
-            int stop = bins.get(0).getStop();
-            while(start <= boundsStop) {
-                // Find the next stopping point.
-                for(ReaderBin bin: bins) {
-                    stop = Math.min(stop,bin.getStop());
-                    if(start < bin.getStart())
-                        stop = Math.min(stop,bin.getStart()-1);
-                }
-
-                range.add(new Pair<Integer,Integer>(start,stop));
-                // If the last entry added included the last element, stop.
-                if(stop >= boundsStop)
-                    break;
-
-                // Find the next start.
-                start = stop + 1;
-                for(ReaderBin bin: bins) {
-                    if(start >= bin.getStart() && start <= bin.getStop())
-                        break;
-                    else if(start < bin.getStart()) {
-                        start = bin.getStart();
-                        break;
-                    }
-                }
-            }
-
-            // Add the next series of BAM overlaps to the window.
-            for(Pair<Integer,Integer> window: range) {
-                BAMOverlap bamOverlap = new BAMOverlap(window.first,window.second);
-                for(ReaderBin bin: bins)
-                    bamOverlap.addBin(bin.id,bin.bin);
-                pendingOverlaps.add(bamOverlap);
-            }
-        }
-
-        public void remove() { throw new UnsupportedOperationException("Cannot remove from a merging iterator."); }
-
-        private ReaderBin peekNextBin() {
-            if(binQueue.isEmpty())
-                throw new NoSuchElementException("No more bins are available");
-            BinQueueState current = binQueue.peek();
-            return new ReaderBin(current.getReaderID(),current.getIndex(),current.getReferenceSequence(),current.peekNextBin());
-        }
-
-        private ReaderBin getNextBin() {
-            if(binQueue.isEmpty())
-                throw new NoSuchElementException("No more bins are available");
-            BinQueueState current = binQueue.remove();
-            ReaderBin readerBin = new ReaderBin(current.getReaderID(),current.getIndex(),current.getReferenceSequence(),current.nextBin());
-            if(current.hasNextBin())
-                binQueue.add(current);
-            return readerBin;
-        }
-
-    }
-
-    /**
-     * Filters out bins not at the lowest level in the tree.
-     */
-    private static class LowestLevelBinFilteringIterator implements Iterator<Bin> {
-        private BrowseableBAMIndex index;
-        private Iterator<Bin> wrappedIterator;
-
-        private Bin nextBin;
-
-        public LowestLevelBinFilteringIterator(final BrowseableBAMIndex index, Iterator<Bin> iterator) {
-            this.index = index;
-            this.wrappedIterator = iterator;
-            advance();
-        }
-
-        public boolean hasNext() {
-            return nextBin != null;
-        }
-
-        public Bin next() {
-            Bin bin = nextBin;
-            advance();
-            return bin;
-        }
-
-        public void remove() { throw new UnsupportedOperationException("Remove operation is not supported"); }
-
-        private void advance() {
-            nextBin = null;
-            while(wrappedIterator.hasNext() && nextBin == null) {
-                Bin bin = wrappedIterator.next();
-                if(index.getLevelForBin(bin) == AbstractBAMFileIndex.getNumIndexLevels()-1)
-                    nextBin = bin;
-            }
-        }
-    }
+    public void remove() { throw new UnsupportedOperationException("Unable to remove from an interval sharder."); }
 }
-
-class BinQueueState implements Comparable<org.broadinstitute.sting.gatk.datasources.reads.BinQueueState> {
-    private final SAMReaderID id;
-    private final BrowseableBAMIndex index;
-    private final int referenceSequence;
-    private final PeekableIterator<Bin> bins;
-
-    private int firstLocusInCurrentBin;
-    private int lastLocusInCurrentBin;
-
-    public BinQueueState(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Iterator<Bin> bins) {
-        this.id = id;
-        this.index = index;
-        this.referenceSequence = referenceSequence;
-        this.bins = new PeekableIterator<Bin>(bins);
-        refreshLocusInBinCache();
-    }
-
-    public SAMReaderID getReaderID() {
-        return id;
-    }
-
-    public BrowseableBAMIndex getIndex() {
-        return index;
-    }
-
-    public int getReferenceSequence() {
-        return referenceSequence;
-    }
-
-    public boolean hasNextBin() {
-        return bins.hasNext();
-    }
-
-    public Bin peekNextBin() {
-        return bins.peek();
-    }
-
-    public Bin nextBin() {
-        Bin nextBin = bins.next();
-        refreshLocusInBinCache();
-        return nextBin;
-    }
-
-    public int compareTo(org.broadinstitute.sting.gatk.datasources.reads.BinQueueState other) {
-        if(!this.bins.hasNext() && !other.bins.hasNext()) return 0;
-        if(!this.bins.hasNext()) return -1;
-        if(!this.bins.hasNext()) return 1;
-
-        // Both BinQueueStates have next bins.  Before proceeding, make sure the bin cache is valid.
-        if(this.firstLocusInCurrentBin <= 0 || this.lastLocusInCurrentBin <= 0 ||
-           other.firstLocusInCurrentBin <= 0 || other.lastLocusInCurrentBin <= 0) {
-            throw new ReviewedStingException("Sharding mechanism error - bin->locus cache is invalid.");
-        }
-
-        // Straight integer subtraction works here because lhsStart, rhsStart always positive.
-        if(this.firstLocusInCurrentBin != other.firstLocusInCurrentBin)
-            return this.firstLocusInCurrentBin - other.firstLocusInCurrentBin;
-
-        // Straight integer subtraction works here because lhsStop, rhsStop always positive.
-        return this.lastLocusInCurrentBin - other.lastLocusInCurrentBin;
-    }
-
-    private void refreshLocusInBinCache() {
-        firstLocusInCurrentBin = -1;
-        lastLocusInCurrentBin = -1;
-        if(bins.hasNext()) {
-            Bin bin = bins.peek();
-            firstLocusInCurrentBin = index.getFirstLocusInBin(bin);
-            lastLocusInCurrentBin = index.getLastLocusInBin(bin);
-        }
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardBalancer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardBalancer.java
@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import java.util.Iterator;
+
+/**
+ * Batch granular file pointers into potentially larger shards.
+ */
+public class LocusShardBalancer extends ShardBalancer {
+    /**
+     * Convert iterators of file pointers into balanced iterators of shards.
+     * @return An iterator over balanced shards.
+     */
+    public Iterator<Shard> iterator() {
+        return new Iterator<Shard>() {
+            public boolean hasNext() {
+                return filePointers.hasNext();
+            }
+
+            public Shard next() {
+                FilePointer current = filePointers.next();
+                while(filePointers.hasNext() && current.minus(filePointers.peek()) == 0)
+                    current = current.combine(parser,filePointers.next());
+                return new LocusShard(parser,readsDataSource,current.getLocations(),current.fileSpans);
+            }
+
+            public void remove() {
+                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
+            }
+        };
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LocusShardStrategy.java
@ -1,178 +0,0 @@
-/*
- * Copyright (c) 2010, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMFileSpan;
-import net.sf.samtools.SAMSequenceRecord;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-/**
- * A sharding strategy for loci based on reading of the index.
- */
-public class LocusShardStrategy implements ShardStrategy {
-    /**
-     * The data source to use when performing this sharding.
-     */
-    private final SAMDataSource reads;
-
-    /**
-     * the parser for creating shards
-     */
-    private GenomeLocParser genomeLocParser;
-
-    /**
-     * An iterator through the available file pointers.
-     */
-    private final Iterator<FilePointer> filePointerIterator;
-
-    /**
-     * construct the shard strategy from a seq dictionary, a shard size, and and genomeLocs
-     * @param reads Data source from which to load index data.
-     * @param locations List of locations for which to load data.
-     */
-    public LocusShardStrategy(SAMDataSource reads, IndexedFastaSequenceFile reference, GenomeLocParser genomeLocParser, GenomeLocSortedSet locations) {
-        this.reads = reads;
-        this.genomeLocParser = genomeLocParser;
-
-        if(!reads.isEmpty()) {
-            GenomeLocSortedSet intervals;
-            if(locations == null) {
-                // If no locations were passed in, shard the entire BAM file.
-                SAMFileHeader header = reads.getHeader();
-                intervals = new GenomeLocSortedSet(genomeLocParser);
-
-                for(SAMSequenceRecord readsSequenceRecord: header.getSequenceDictionary().getSequences()) {
-                    // Check this sequence against the reference sequence dictionary.
-                    // TODO: Do a better job of merging reads + reference.
-                    SAMSequenceRecord refSequenceRecord = reference.getSequenceDictionary().getSequence(readsSequenceRecord.getSequenceName());
-                    if(refSequenceRecord != null) {
-                        final int length = Math.min(readsSequenceRecord.getSequenceLength(),refSequenceRecord.getSequenceLength());
-                        intervals.add(genomeLocParser.createGenomeLoc(readsSequenceRecord.getSequenceName(),1,length));
-                    }
-                }
-            }
-            else
-                intervals = locations;
-
-            if(reads.isLowMemoryShardingEnabled()) {
-                /*
-                Iterator<FilePointer> filePointerIterator = new LowMemoryIntervalSharder(this.reads,intervals);
-                List<FilePointer> filePointers = new ArrayList<FilePointer>();
-                while(filePointerIterator.hasNext())
-                    filePointers.add(filePointerIterator.next());
-                this.filePointerIterator = filePointers.iterator();
-                */
-                this.filePointerIterator = new LowMemoryIntervalSharder(this.reads,intervals);
-            }
-            else
-                this.filePointerIterator = IntervalSharder.shardIntervals(this.reads,intervals);
-        }
-        else {
-            final int maxShardSize = 100000;
-            List<FilePointer> filePointers = new ArrayList<FilePointer>();
-            if(locations == null) {
-                for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
-                    for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
-                        final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
-                        filePointers.add(new FilePointer(genomeLocParser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)));
-                    }
-                }
-            }
-            else {
-                for(GenomeLoc interval: locations) {
-                    while(interval.size() > maxShardSize) {
-                        filePointers.add(new FilePointer(locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)));
-                        interval = locations.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
-                    }
-                    filePointers.add(new FilePointer(interval));
-                }
-            }
-            filePointerIterator = filePointers.iterator();
-        }
-
-    }
-
-    /**
-     * returns true if there are additional shards
-     *
-     * @return false if we're done processing shards
-     */
-    public boolean hasNext() {
-        return filePointerIterator.hasNext();
-    }
-
-    public long shardNumber = 0;
-
-    /**
-     * gets the next Shard
-     *
-     * @return the next shard
-     */
-    public LocusShard next() {
-        FilePointer nextFilePointer = filePointerIterator.next();
-        Map<SAMReaderID,SAMFileSpan> fileSpansBounding = nextFilePointer.fileSpans != null ? nextFilePointer.fileSpans : null;
-
-        /*
-        System.out.printf("Shard %d: interval = {",++shardNumber);
-        for(GenomeLoc locus: nextFilePointer.locations)
-            System.out.printf("%s;",locus);
-        System.out.printf("}; ");
-
-        if(fileSpansBounding == null)
-            System.out.printf("no shard data%n");
-        else {
-            SortedMap<SAMReaderID,SAMFileSpan> sortedSpans = new TreeMap<SAMReaderID,SAMFileSpan>(fileSpansBounding);
-            for(Map.Entry<SAMReaderID,SAMFileSpan> entry: sortedSpans.entrySet()) {
-                System.out.printf("Shard %d:%s = {%s}%n",shardNumber,entry.getKey().samFile,entry.getValue());
-            }
-        }
-        */
-
-        return new LocusShard(genomeLocParser, reads,nextFilePointer.locations,fileSpansBounding);
-    }
-
-    /** we don't support the remove command */
-    public void remove() {
-        throw new UnsupportedOperationException("ShardStrategies don't support remove()");
-    }
-
-    /**
-     * makes the IntervalShard iterable, i.e. usable in a for loop.
-     *
-     * @return
-     */
-    public Iterator<Shard> iterator() {
-        return this;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.picard.util.PeekableIterator;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-
-import java.util.Iterator;
-
-/**
- * Handles the process of aggregating BAM intervals into individual shards.
- */
-public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
-    /**
-     * The iterator actually laying out the data for BAM scheduling.
-     */
-    private final PeekableIterator<FilePointer> wrappedIterator;
-
-    /**
-     * The parser, for interval manipulation.
-     */
-    private final GenomeLocParser parser;
-
-    public LowMemoryIntervalSharder(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
-        wrappedIterator = new PeekableIterator<FilePointer>(new BAMScheduler(dataSource,loci));
-        parser = loci.getGenomeLocParser();
-    }
-
-    public boolean hasNext() {
-        return wrappedIterator.hasNext();
-    }
-
-    /**
-     * Accumulate shards where there's no additional cost to processing the next shard in the sequence.
-     * @return The next file pointer to process.
-     */
-    public FilePointer next() {
-        FilePointer current = wrappedIterator.next();
-        while(wrappedIterator.hasNext() && current.isRegionUnmapped == wrappedIterator.peek().isRegionUnmapped && current.minus(wrappedIterator.peek()) == 0)
-            current = current.combine(parser,wrappedIterator.next());
-        return current;
-    }
-
-    public void remove() { throw new UnsupportedOperationException("Unable to remove from an interval sharder."); }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShard.java
@ -1,34 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-
-import java.util.List;
-
-/**
- * A single, monolithic shard bridging all available data.
- * @author mhanna
- * @version 0.1
- */
-public class MonolithicShard extends Shard {
-    /**
-     * Creates a new monolithic shard of the given type.
-     * @param shardType Type of the shard.  Must be either read or locus; cannot be intervalic.
-     * @param locs Intervals that this monolithic shard should process. 
-     */
-    public MonolithicShard(GenomeLocParser parser, SAMDataSource readsDataSource, ShardType shardType, List<GenomeLoc> locs) {
-        super(parser, shardType, locs, readsDataSource, null, false);
-        if(shardType != ShardType.LOCUS && shardType != ShardType.READ)
-            throw new ReviewedStingException("Invalid shard type for monolithic shard: " + shardType);
-    }
-
-    /**
-     * String representation of this shard.
-     * @return "entire genome".
-     */
-    @Override
-    public String toString() {
-        return "entire genome";    
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/MonolithicShardStrategy.java
@ -1,77 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-
-import java.util.Iterator;
-import java.util.List;
-import java.util.NoSuchElementException;
-
-/**
- * Create a giant shard representing all the data in the input BAM(s).
- *
- * @author mhanna
- * @version 0.1
- */
-public class MonolithicShardStrategy implements ShardStrategy {
-    /**
-     * The single shard associated with this sharding strategy.
-     */
-    private MonolithicShard shard;
-
-    /**
-     * Create a new shard strategy for shards of the given type.
-     * @param shardType The shard type.
-     */
-    public MonolithicShardStrategy(final GenomeLocParser parser, final SAMDataSource readsDataSource, final Shard.ShardType shardType, final List<GenomeLoc> region) {
-        shard = new MonolithicShard(parser,readsDataSource,shardType,region);
-    }
-
-    /**
-     * Convenience for using in a foreach loop.  Will NOT create a new, reset instance of the iterator;
-     * will only return another copy of the active iterator. 
-     * @return A copy of this.
-     */
-    public Iterator<Shard> iterator() {
-        return this;
-    }
-
-    /**
-     * Returns true if the monolithic shard has not yet been consumed, or false otherwise.
-     * @return True if shard has been consumed, false otherwise.
-     */
-    public boolean hasNext() {
-        return shard != null;
-    }
-
-    /**
-     * Returns the monolithic shard if it has not already been retrieved.
-     * @return The monolithic shard.
-     * @throws NoSuchElementException if no such data exists.
-     */
-    public Shard next() {
-        if(shard == null)
-            throw new NoSuchElementException("Monolithic shard has already been retrived.");
-
-        Shard working = shard;
-        shard = null;
-        return working;
-    }
-
-    /**
-     * Mandated by the interface, but is unsupported in this context.  Will throw an exception always.
-     */
-    public void remove() {
-        throw new UnsupportedOperationException("Cannot remove from a shard strategy");
-    }
-
-    /**
-     * Mandated by the interface, but is unsupported in this context.  Will throw an exception always.
-     * @param size adjust the next size to this
-     */
-    public void adjustNextShardSize( long size ) {
-        throw new UnsupportedOperationException("Cannot adjust the next size of a monolithic shard; there will be no next shard.");
-    }
-
-}
-
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShard.java
@ -35,10 +35,15 @@ import java.util.Map;
 * @version 0.1
 */
 public class ReadShard extends Shard {
+    /**
+     * What is the maximum number of reads which should go into a read shard.
+     */
+    public static final int MAX_READS = 10000;
+
    /**
     * The reads making up this shard.
     */
-    private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(ReadShardStrategy.MAX_READS);
+    private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(MAX_READS);

    public ReadShard(GenomeLocParser parser, SAMDataSource readsDataSource, Map<SAMReaderID,SAMFileSpan> fileSpans, List<GenomeLoc> loci, boolean isUnmapped) {
        super(parser, ShardType.READ, loci, readsDataSource, fileSpans, isUnmapped);
@ -66,7 +71,7 @@ public class ReadShard extends Shard {
     * @return True if this shard's buffer is full (and the shard can buffer reads).
     */
    public boolean isBufferFull() {
-        return reads.size() > ReadShardStrategy.MAX_READS;
+        return reads.size() > ReadShard.MAX_READS;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardBalancer.java
@ -0,0 +1,115 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.SAMFileSpan;
+
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Divide up large file pointers containing reads into more manageable subcomponents.
+ */
+public class ReadShardBalancer extends ShardBalancer {
+    /**
+     * Convert iterators of file pointers into balanced iterators of shards.
+     * @return An iterator over balanced shards.
+     */
+    public Iterator<Shard> iterator() {
+        return new Iterator<Shard>() {
+            /**
+             * The cached shard to be returned next.  Prefetched in the peekable iterator style.
+             */
+            private Shard nextShard = null;
+
+            /**
+             * The file pointer currently being processed.
+             */
+            private FilePointer currentFilePointer;
+
+            /**
+             * Ending position of the last shard in the file.
+             */
+            private Map<SAMReaderID,GATKBAMFileSpan> position = readsDataSource.getCurrentPosition();
+
+            {
+                if(filePointers.hasNext())
+                    currentFilePointer = filePointers.next();
+                advance();
+            }
+
+            public boolean hasNext() {
+                return nextShard != null;
+            }
+
+            public Shard next() {
+                if(!hasNext())
+                    throw new NoSuchElementException("No next read shard available");
+                Shard currentShard = nextShard;
+                advance();
+                return currentShard;
+            }
+
+            public void remove() {
+                throw new UnsupportedOperationException("Unable to remove from shard balancing iterator");
+            }
+
+            private void advance() {
+                Map<SAMReaderID,SAMFileSpan> shardPosition;
+                nextShard = null;
+
+                Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
+                while(selectedReaders.size() == 0 && currentFilePointer != null) {
+                    shardPosition = currentFilePointer.fileSpans;
+
+                    for(SAMReaderID id: shardPosition.keySet()) {
+                        SAMFileSpan fileSpan = new GATKBAMFileSpan(shardPosition.get(id).removeContentsBefore(position.get(id)));
+                        if(!fileSpan.isEmpty())
+                            selectedReaders.put(id,fileSpan);
+                    }
+
+                    if(selectedReaders.size() > 0) {
+                        Shard shard = new ReadShard(parser,readsDataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
+                        readsDataSource.fillShard(shard);
+
+                        if(!shard.isBufferEmpty()) {
+                            nextShard = shard;
+                            break;
+                        }
+                    }
+
+                    selectedReaders.clear();
+                    currentFilePointer = filePointers.hasNext() ? filePointers.next() : null;
+                }
+
+                position = readsDataSource.getCurrentPosition();
+            }
+        };
+    }
+
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReadShardStrategy.java
@ -1,183 +0,0 @@
-/*
- * Copyright (c) 2010, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.samtools.SAMFileSpan;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-
-import java.util.*;
-
-/**
- * The sharding strategy for reads using a simple counting mechanism.  Each read shard
- * has a specific number of reads (default to 10K) which is configured in the constructor.
- * @author aaron
- * @version 1.0
- * @date Apr 14, 2009
- */
-public class ReadShardStrategy implements ShardStrategy {
-    /**
-     * What is the maximum number of reads which should go into a read shard.
-     */
-    protected static final int MAX_READS = 10000;
-
-    /**
-     * The data source used to shard.
-     */
-    private final SAMDataSource dataSource;
-
-    /**
-     * The intervals to be processed.
-     */
-    private final GenomeLocSortedSet locations;
-
-    /**
-     * The cached shard to be returned next.  Prefetched in the peekable iterator style.
-     */
-    private Shard nextShard = null;
-
-    /** our storage of the genomic locations they'd like to shard over */
-    private final List<FilePointer> filePointers = new ArrayList<FilePointer>();
-
-    /**
-     * Iterator over the list of file pointers.
-     */
-    private final Iterator<FilePointer> filePointerIterator;
-
-    /**
-     * The file pointer currently being processed.
-     */
-    private FilePointer currentFilePointer;
-
-    /**
-     * Ending position of the last shard in the file.
-     */
-    private Map<SAMReaderID,SAMFileSpan> position;
-
-    /**
-     * An indicator whether the strategy has sharded into the unmapped region.
-     */
-    private boolean isIntoUnmappedRegion = false;
-
-    private final GenomeLocParser parser;
-
-    /**
-     * Create a new read shard strategy, loading read shards from the given BAM file.
-     * @param dataSource Data source from which to load shards.
-     * @param locations intervals to use for sharding.
-     */
-    public ReadShardStrategy(GenomeLocParser parser, SAMDataSource dataSource, GenomeLocSortedSet locations) {
-        this.dataSource = dataSource;
-        this.parser = parser;
-        this.position = this.dataSource.getCurrentPosition();
-        this.locations = locations;
-
-        if(locations != null)
-            filePointerIterator = dataSource.isLowMemoryShardingEnabled() ? new LowMemoryIntervalSharder(this.dataSource,locations) : IntervalSharder.shardIntervals(this.dataSource,locations);
-        else
-            filePointerIterator = filePointers.iterator();
-
-        if(filePointerIterator.hasNext())
-            currentFilePointer = filePointerIterator.next();
-
-        advance();
-    }
-
-    /**
-     * do we have another read shard?
-     * @return True if any more data is available.  False otherwise.
-     */
-    public boolean hasNext() {
-        return nextShard != null;
-    }
-
-    /**
-     * Retrieves the next shard, if available.
-     * @return The next shard, if available.
-     * @throws java.util.NoSuchElementException if no such shard is available.
-     */
-    public Shard next() {
-        if(!hasNext())
-            throw new NoSuchElementException("No next read shard available");
-        Shard currentShard = nextShard;
-        advance();
-        return currentShard;
-    }
-
-    public void advance() {
-        Map<SAMReaderID,SAMFileSpan> shardPosition = new HashMap<SAMReaderID,SAMFileSpan>();
-        nextShard = null;
-
-        if(locations != null) {
-            Map<SAMReaderID,SAMFileSpan> selectedReaders = new HashMap<SAMReaderID,SAMFileSpan>();
-            while(selectedReaders.size() == 0 && currentFilePointer != null) {
-                shardPosition = currentFilePointer.fileSpans;
-
-                for(SAMReaderID id: shardPosition.keySet()) {
-                    SAMFileSpan fileSpan = shardPosition.get(id).removeContentsBefore(position.get(id));
-                    if(!fileSpan.isEmpty())
-                        selectedReaders.put(id,fileSpan);
-                }
-
-                if(selectedReaders.size() > 0) {
-                    Shard shard = new ReadShard(parser, dataSource,selectedReaders,currentFilePointer.locations,currentFilePointer.isRegionUnmapped);
-                    dataSource.fillShard(shard);
-
-                    if(!shard.isBufferEmpty()) {
-                        nextShard = shard;
-                        break;
-                    }
-                }
-
-                selectedReaders.clear();
-                currentFilePointer = filePointerIterator.hasNext() ? filePointerIterator.next() : null;
-            }
-        }
-        else {
-            // todo -- this nulling of intervals is a bit annoying since readwalkers without
-            // todo -- any -L values need to be special cased throughout the code.
-            Shard shard = new ReadShard(parser,dataSource,position,null,false);
-            dataSource.fillShard(shard);
-            nextShard = !shard.isBufferEmpty() ? shard : null;
-        }
-
-        this.position = dataSource.getCurrentPosition();
-    }
-
-    /**
-     * @throws UnsupportedOperationException always.
-     */
-    public void remove() {
-        throw new UnsupportedOperationException("Remove not supported");
-    }
-
-    /**
-     * Convenience method for using ShardStrategy in an foreach loop.
-     * @return A iterator over shards.
-     */
-    public Iterator<Shard> iterator() {
-        return this;
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ReaderBin.java
@ -1,33 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.samtools.Bin;
-import net.sf.samtools.BrowseableBAMIndex;
-
-/**
- * Created by IntelliJ IDEA.
- * User: mhanna
- * Date: Feb 2, 2011
- * Time: 4:36:40 PM
- * To change this template use File | Settings | File Templates.
- */
-class ReaderBin {
-    public final SAMReaderID id;
-    public final BrowseableBAMIndex index;
-    public final int referenceSequence;
-    public final Bin bin;
-
-    public ReaderBin(final SAMReaderID id, final BrowseableBAMIndex index, final int referenceSequence, final Bin bin) {
-        this.id = id;
-        this.index = index;
-        this.referenceSequence = referenceSequence;
-        this.bin = bin;
-    }
-
-    public int getStart() {
-        return index.getFirstLocusInBin(bin);
-    }
-
-    public int getStop() {
-        return index.getLastLocusInBin(bin);
-    }
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSource.java
@ -37,8 +37,10 @@ import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
 import org.broadinstitute.sting.gatk.filters.CountingFilteringIterator;
 import org.broadinstitute.sting.gatk.filters.ReadFilter;
 import org.broadinstitute.sting.gatk.iterators.*;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.baq.BAQ;
 import org.broadinstitute.sting.utils.baq.BAQSamIterator;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -71,7 +73,7 @@ public class SAMDataSource {
    /**
     * Tools for parsing GenomeLocs, for verifying BAM ordering against general ordering.
     */
-    private final GenomeLocParser genomeLocParser;
+    protected final GenomeLocParser genomeLocParser;

    /**
     * Identifiers for the readers driving this data source.
@ -91,13 +93,18 @@ public class SAMDataSource {
    /**
     * How far along is each reader?
     */
-    private final Map<SAMReaderID, SAMFileSpan> readerPositions = new HashMap<SAMReaderID,SAMFileSpan>();
+    private final Map<SAMReaderID,GATKBAMFileSpan> readerPositions = new HashMap<SAMReaderID,GATKBAMFileSpan>();

    /**
     * The merged header.
     */
    private final SAMFileHeader mergedHeader;

+    /**
+     * The constituent headers of the unmerged files.
+     */
+    private final Map<SAMReaderID,SAMFileHeader> headers = new HashMap<SAMReaderID,SAMFileHeader>();
+
    /**
     * The sort order of the BAM files.  Files without a sort order tag are assumed to be
     * in coordinate order.
@ -131,17 +138,24 @@ public class SAMDataSource {
    private final SAMResourcePool resourcePool;

    /**
-     * Whether to enable the new low-memory sharding mechanism.
+     * Asynchronously loads BGZF blocks.
     */
-    private boolean enableLowMemorySharding = false;
+    private final BGZFBlockLoadingDispatcher dispatcher;
+
+    /**
+     * How are threads allocated.
+     */
+    private final ThreadAllocation threadAllocation;

    /**
     * Create a new SAM data source given the supplied read metadata.
     * @param samFiles list of reads files.
     */
-    public SAMDataSource(Collection<SAMReaderID> samFiles,GenomeLocParser genomeLocParser) {
+    public SAMDataSource(Collection<SAMReaderID> samFiles, ThreadAllocation threadAllocation, Integer numFileHandles, GenomeLocParser genomeLocParser) {
        this(
                samFiles,
+                threadAllocation,
+                numFileHandles,
                genomeLocParser,
                false,
                SAMFileReader.ValidationStringency.STRICT,
@ -150,8 +164,7 @@ public class SAMDataSource {
                new ValidationExclusion(),
                new ArrayList<ReadFilter>(),
                false,
-                false,
-                true);
+                false);
    }

    /**
@ -159,6 +172,8 @@ public class SAMDataSource {
     */
    public SAMDataSource(
            Collection<SAMReaderID> samFiles,
+            ThreadAllocation threadAllocation,
+            Integer numFileHandles,
            GenomeLocParser genomeLocParser,
            boolean useOriginalBaseQualities,
            SAMFileReader.ValidationStringency strictness,
@ -167,9 +182,10 @@ public class SAMDataSource {
            ValidationExclusion exclusionList,
            Collection<ReadFilter> supplementalFilters,
            boolean includeReadsWithDeletionAtLoci,
-            boolean generateExtendedEvents,
-            boolean enableLowMemorySharding) {
+            boolean generateExtendedEvents) {
        this(   samFiles,
+                threadAllocation,
+                numFileHandles,
                genomeLocParser,
                useOriginalBaseQualities,
                strictness,
@ -182,8 +198,7 @@ public class SAMDataSource {
                BAQ.CalculationMode.OFF,
                BAQ.QualityMode.DONT_MODIFY,
                null, // no BAQ
-                (byte) -1,
-                enableLowMemorySharding);
+                (byte) -1);
        }

    /**
@ -205,6 +220,8 @@ public class SAMDataSource {
     */
    public SAMDataSource(
            Collection<SAMReaderID> samFiles,
+            ThreadAllocation threadAllocation,
+            Integer numFileHandles,
            GenomeLocParser genomeLocParser,
            boolean useOriginalBaseQualities,
            SAMFileReader.ValidationStringency strictness,
@ -217,13 +234,19 @@ public class SAMDataSource {
            BAQ.CalculationMode cmode,
            BAQ.QualityMode qmode,
            IndexedFastaSequenceFile refReader,
-            byte defaultBaseQualities,
-            boolean enableLowMemorySharding) {
-        this.enableLowMemorySharding(enableLowMemorySharding);
+            byte defaultBaseQualities) {
        this.readMetrics = new ReadMetrics();
        this.genomeLocParser = genomeLocParser;

        readerIDs = samFiles;
+
+        this.threadAllocation = threadAllocation;
+        // TODO: Consider a borrowed-thread dispatcher implementation.
+        if(this.threadAllocation.getNumIOThreads() > 0)
+            dispatcher = new BGZFBlockLoadingDispatcher(this.threadAllocation.getNumIOThreads(), numFileHandles != null ? numFileHandles : 1);
+        else
+            dispatcher = null;
+
        validationStringency = strictness;
        for (SAMReaderID readerID : samFiles) {
            if (!readerID.samFile.canRead())
@ -235,10 +258,13 @@ public class SAMDataSource {
        SAMReaders readers = resourcePool.getAvailableReaders();

        // Determine the sort order.
-        for(SAMFileReader reader: readers.values()) {
+        for(SAMReaderID readerID: readerIDs) {
            // Get the sort order, forcing it to coordinate if unsorted.
+            SAMFileReader reader = readers.getReader(readerID);
            SAMFileHeader header = reader.getFileHeader();

+            headers.put(readerID,header);
+
            if ( header.getReadGroups().isEmpty() ) {
                throw new UserException.MalformedBAM(readers.getReaderID(reader).samFile,
                        "SAM file doesn't have any read groups defined in the header.  The GATK no longer supports SAM files without read groups");
@ -275,7 +301,7 @@ public class SAMDataSource {
                qmode,
                refReader,
                defaultBaseQualities);
-        
+
        // cache the read group id (original) -> read group id (merged)
        // and read group id (merged) -> read group id (original) mappings.
        for(SAMReaderID id: readerIDs) {
@ -296,12 +322,10 @@ public class SAMDataSource {
            originalToMergedReadGroupMappings.put(id,mappingToMerged);
        }

-        if(enableLowMemorySharding) {
-            for(SAMReaderID id: readerIDs) {
-                File indexFile = findIndexFile(id.samFile);
-                if(indexFile != null)
-                    bamIndices.put(id,new GATKBAMIndex(indexFile));
-            }
+        for(SAMReaderID id: readerIDs) {
+            File indexFile = findIndexFile(id.samFile);
+            if(indexFile != null)
+                bamIndices.put(id,new GATKBAMIndex(indexFile));
        }

        resourcePool.releaseReaders(readers);
@ -314,22 +338,6 @@ public class SAMDataSource {
     */
    public ReadProperties getReadsInfo() { return readProperties; }

-    /**
-     * Enable experimental low-memory sharding.
-     * @param enable True to enable sharding.  False otherwise.
-     */
-    public void enableLowMemorySharding(final boolean enable) {
-        enableLowMemorySharding = enable;
-    }
-
-    /**
-     * Returns whether low-memory sharding is enabled.
-     * @return True if enabled, false otherwise.
-     */
-    public boolean isLowMemoryShardingEnabled() {
-        return enableLowMemorySharding;
-    }
-
    /**
     * Checks to see whether any reads files are supplying data.
     * @return True if no reads files are supplying data to the traversal; false otherwise.
@ -368,7 +376,7 @@ public class SAMDataSource {
     * Retrieves the current position within the BAM file.
     * @return A mapping of reader to current position.
     */
-    public Map<SAMReaderID,SAMFileSpan> getCurrentPosition() {
+    public Map<SAMReaderID,GATKBAMFileSpan> getCurrentPosition() {
        return readerPositions;
    }

@ -381,7 +389,7 @@ public class SAMDataSource {
    }

    public SAMFileHeader getHeader(SAMReaderID id) {
-        return resourcePool.getReadersWithoutLocking().getReader(id).getFileHeader();
+        return headers.get(id);
    }

    /**
@ -404,45 +412,21 @@ public class SAMDataSource {
        return mergedToOriginalReadGroupMappings.get(mergedReadGroupId);
    }

-    /**
-     * No read group collisions at this time because only one SAM file is currently supported.
-     * @return False always.
-     */
-    public boolean hasReadGroupCollisions() {
-        return hasReadGroupCollisions;
-    }
-
    /**
     * True if all readers have an index.
     * @return True if all readers have an index.
     */
    public boolean hasIndex() {
-        if(enableLowMemorySharding)
-            return readerIDs.size() == bamIndices.size();
-        else {
-            for(SAMFileReader reader: resourcePool.getReadersWithoutLocking()) {
-                if(!reader.hasIndex())
-                    return false;
-            }
-            return true;
-        }
+        return readerIDs.size() == bamIndices.size();
    }

    /**
     * Gets the index for a particular reader.  Always preloaded.
-     * TODO: Should return object of type GATKBAMIndex, but cannot because there
-     * TODO: is no parent class of both BAMIndex and GATKBAMIndex.  Change when new
-     * TODO: sharding system goes live.      
     * @param id Id of the reader.
     * @return The index.  Will preload the index if necessary.
     */
-    public Object getIndex(final SAMReaderID id) {
-        if(enableLowMemorySharding)
-            return bamIndices.get(id);
-        else {
-            SAMReaders readers = resourcePool.getReadersWithoutLocking();
-            return readers.getReader(id).getBrowseableIndex();
-        }
+    public GATKBAMIndex getIndex(final SAMReaderID id) {
+        return bamIndices.get(id);
    }

    /**
@ -454,7 +438,7 @@ public class SAMDataSource {
    }

    /**
-     * Gets the cumulative read metrics for shards already processed. 
+     * Gets the cumulative read metrics for shards already processed.
     * @return Cumulative read metrics.
     */
    public ReadMetrics getCumulativeReadMetrics() {
@ -507,10 +491,6 @@ public class SAMDataSource {
    }

    public StingSAMIterator seek(Shard shard) {
-        // todo: refresh monolithic sharding implementation
-        if(shard instanceof MonolithicShard)
-            return seekMonolithic(shard);
-
        if(shard.buffersReads()) {
            return shard.iterator();
        }
@ -540,7 +520,7 @@ public class SAMDataSource {
     */
    private void initializeReaderPositions(SAMReaders readers) {
        for(SAMReaderID id: getReaderIDs())
-            readerPositions.put(id,readers.getReader(id).getFilePointerSpanningReads());
+            readerPositions.put(id,new GATKBAMFileSpan(readers.getReader(id).getFilePointerSpanningReads()));
    }

    /**
@ -548,7 +528,6 @@ public class SAMDataSource {
     * @param readers Readers from which to load data.
     * @param shard The shard specifying the data limits.
     * @param enableVerification True to verify.  For compatibility with old sharding strategy.
-     *        TODO: Collapse this flag when the two sharding systems are merged.
     * @return An iterator over the selected data.
     */
    private StingSAMIterator getIterator(SAMReaders readers, Shard shard, boolean enableVerification) {
@ -559,14 +538,20 @@ public class SAMDataSource {

        for(SAMReaderID id: getReaderIDs()) {
            CloseableIterator<SAMRecord> iterator = null;
-            if(!shard.isUnmapped() && shard.getFileSpans().get(id) == null)
-                continue;
-            iterator = shard.getFileSpans().get(id) != null ?
-                    readers.getReader(id).iterator(shard.getFileSpans().get(id)) :
-                    readers.getReader(id).queryUnmapped();
+
+            // TODO: null used to be the signal for unmapped, but we've replaced that with a simple index query for the last bin.
+            // TODO: Kill this check once we've proven that the design elements are gone.
+            if(shard.getFileSpans().get(id) == null)
+                throw new ReviewedStingException("SAMDataSource: received null location for reader " + id + ", but null locations are no longer supported.");
+
+            if(threadAllocation.getNumIOThreads() > 0) {
+                BlockInputStream inputStream = readers.getInputStream(id);
+                inputStream.submitAccessPlan(new SAMReaderPosition(id,inputStream,(GATKBAMFileSpan)shard.getFileSpans().get(id)));
+            }
+            iterator = readers.getReader(id).iterator(shard.getFileSpans().get(id));
            if(readProperties.getReadBufferSize() != null)
                iterator = new BufferingReadIterator(iterator,readProperties.getReadBufferSize());
-            if(shard.getGenomeLocs() != null)
+            if(shard.getGenomeLocs().size() > 0)
                iterator = new IntervalOverlapFilteringIterator(iterator,shard.getGenomeLocs());
            mergingIterator.addIterator(readers.getReader(id),iterator);
        }
@ -584,33 +569,6 @@ public class SAMDataSource {
                readProperties.defaultBaseQualities());
    }

-    /**
-     * A stopgap measure to handle monolithic sharding
-     * @param shard the (monolithic) shard.
-     * @return An iterator over the monolithic shard.
-     */
-    private StingSAMIterator seekMonolithic(Shard shard) {
-        SAMReaders readers = resourcePool.getAvailableReaders();
-
-        // Set up merging and filtering to dynamically merge together multiple BAMs and filter out records not in the shard set.
-        SamFileHeaderMerger headerMerger = new SamFileHeaderMerger(SAMFileHeader.SortOrder.coordinate,readers.headers(),true);
-        MergingSamRecordIterator mergingIterator = new MergingSamRecordIterator(headerMerger,readers.values(),true);
-        for(SAMReaderID id: getReaderIDs())
-            mergingIterator.addIterator(readers.getReader(id),readers.getReader(id).iterator());
-
-        return applyDecoratingIterators(shard.getReadMetrics(),
-                shard instanceof ReadShard,
-                readProperties.useOriginalBaseQualities(),
-                new ReleasingIterator(readers,StingSAMIteratorAdapter.adapt(mergingIterator)),
-                readProperties.getDownsamplingMethod().toFraction,
-                readProperties.getValidationExclusionList().contains(ValidationExclusion.TYPE.NO_READ_ORDER_VERIFICATION),
-                readProperties.getSupplementalFilters(),
-                readProperties.getBAQCalculationMode(),
-                readProperties.getBAQQualityMode(),
-                readProperties.getRefReader(),
-                readProperties.defaultBaseQualities());
-    }
-
    /**
     * Adds this read to the given shard.
     * @param shard The shard to which to add the read.
@ -618,7 +576,7 @@ public class SAMDataSource {
     * @param read The read to add to the shard.
     */
    private void addReadToBufferingShard(Shard shard,SAMReaderID id,SAMRecord read) {
-        SAMFileSpan endChunk = read.getFileSource().getFilePointer().getContentsFollowing();
+        GATKBAMFileSpan endChunk = new GATKBAMFileSpan(read.getFileSource().getFilePointer().getContentsFollowing());
        shard.addRead(read);
        readerPositions.put(id,endChunk);
    }
@ -689,19 +647,6 @@ public class SAMDataSource {
            this.maxEntries = maxEntries;
        }

-        /**
-         * Dangerous internal method; retrieves any set of readers, whether in iteration or not.
-         * Used to handle non-exclusive, stateless operations, such as index queries.
-         * @return Any collection of SAMReaders, whether in iteration or not.
-         */
-        protected SAMReaders getReadersWithoutLocking() {
-            synchronized(this) {
-                if(allResources.size() == 0)
-                    createNewResource();
-            }
-            return allResources.get(0);
-        }
-
        /**
         * Choose a set of readers from the pool to use for this query.  When complete,
         * @return
@ -753,6 +698,11 @@ public class SAMDataSource {
         */
        private final Map<SAMReaderID,SAMFileReader> readers = new LinkedHashMap<SAMReaderID,SAMFileReader>();

+        /**
+         * The inptu streams backing
+         */
+        private final Map<SAMReaderID,BlockInputStream> inputStreams = new LinkedHashMap<SAMReaderID,BlockInputStream>();
+
        /**
         * Derive a new set of readers from the Reads metadata.
         * @param readerIDs reads to load.
@ -760,12 +710,20 @@ public class SAMDataSource {
         */
        public SAMReaders(Collection<SAMReaderID> readerIDs, SAMFileReader.ValidationStringency validationStringency) {
            for(SAMReaderID readerID: readerIDs) {
-                SAMFileReader reader = new SAMFileReader(readerID.samFile);
+                File indexFile = findIndexFile(readerID.samFile);
+
+                SAMFileReader reader = null;
+
+                if(threadAllocation.getNumIOThreads() > 0) {
+                    BlockInputStream blockInputStream = new BlockInputStream(dispatcher,readerID,false);
+                    reader = new SAMFileReader(blockInputStream,indexFile,false);
+                    inputStreams.put(readerID,blockInputStream);
+                }
+                else
+                    reader = new SAMFileReader(readerID.samFile,indexFile,false);
                reader.setSAMRecordFactory(factory);
+
                reader.enableFileSource(true);
-                reader.enableIndexMemoryMapping(false);
-                if(!enableLowMemorySharding)
-                    reader.enableIndexCaching(true);
                reader.setValidationStringency(validationStringency);

                final SAMFileHeader header = reader.getFileHeader();
@ -786,6 +744,15 @@ public class SAMDataSource {
            return readers.get(id);
        }

+        /**
+         * Retrieve the input stream backing a reader.
+         * @param id The ID of the reader to retrieve.
+         * @return the reader associated with the given id.
+         */
+        public BlockInputStream getInputStream(final SAMReaderID id) {
+            return inputStreams.get(id);
+        }
+
        /**
         * Searches for the reader id of this reader.
         * @param reader Reader for which to search.
@ -883,7 +850,7 @@ public class SAMDataSource {
     * Filters out reads that do not overlap the current GenomeLoc.
     * Note the custom implementation: BAM index querying returns all reads that could
     * possibly overlap the given region (and quite a few extras).  In order not to drag
-     * down performance, this implementation is highly customized to its task. 
+     * down performance, this implementation is highly customized to its task.
     */
    private class IntervalOverlapFilteringIterator implements CloseableIterator<SAMRecord> {
        /**
@ -903,7 +870,7 @@ public class SAMDataSource {

        /**
         * Custom representation of interval bounds.
-         * Makes it simpler to track current position. 
+         * Makes it simpler to track current position.
         */
        private int[] intervalContigIndices;
        private int[] intervalStarts;
@ -941,7 +908,7 @@ public class SAMDataSource {
                    i++;
                }
            }
-            
+
            advance();
        }

@ -1070,6 +1037,40 @@ public class SAMDataSource {

        return indexFile;
    }
+
+    /**
+     * Creates a BAM schedule over all reads in the BAM file, both mapped and unmapped.  The outgoing stream
+     * will be as granular as possible given our current knowledge of the best ways to split up BAM files.
+     * @return An iterator that spans all reads in all BAM files.
+     */
+    public Iterable<Shard> createShardIteratorOverAllReads(final ShardBalancer shardBalancer) {
+        shardBalancer.initialize(this,IntervalSharder.shardOverAllReads(this,genomeLocParser),genomeLocParser);
+        return shardBalancer;
+    }
+
+    /**
+     * Creates a BAM schedule over all mapped reads in the BAM file, when a 'mapped' read is defined as any
+     * read that has been assigned
+     * @return
+     */
+    public Iterable<Shard> createShardIteratorOverMappedReads(final SAMSequenceDictionary sequenceDictionary, final ShardBalancer shardBalancer) {
+        shardBalancer.initialize(this,IntervalSharder.shardOverMappedReads(this,sequenceDictionary,genomeLocParser),genomeLocParser);
+        return shardBalancer;
+    }
+
+    /**
+     * Create a schedule for processing the initialized BAM file using the given interval list.
+     * The returned schedule should be as granular as possible.
+     * @param intervals The list of intervals for which to create the schedule.
+     * @return A granular iterator over file pointers.
+     */
+    public Iterable<Shard> createShardIteratorOverIntervals(final GenomeLocSortedSet intervals,final ShardBalancer shardBalancer) {
+        if(intervals == null)
+            throw new ReviewedStingException("Unable to create schedule from intervals; no intervals were provided.");
+        shardBalancer.initialize(this,IntervalSharder.shardOverIntervals(SAMDataSource.this,intervals),genomeLocParser);
+        return shardBalancer;
+    }
 }


+
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/SAMReaderPosition.java
@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.picard.util.PeekableIterator;
+import net.sf.samtools.GATKBAMFileSpan;
+import net.sf.samtools.GATKChunk;
+import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
+
+import java.util.List;
+
+/**
+* Created by IntelliJ IDEA.
+* User: mhanna
+* Date: 10/14/11
+* Time: 10:47 PM
+* To change this template use File | Settings | File Templates.
+*/
+class SAMReaderPosition {
+    private final SAMReaderID reader;
+    private final BlockInputStream inputStream;
+
+    private final List<GATKChunk> positions;
+    private PeekableIterator<GATKChunk> positionIterator;
+
+    /**
+     * Stores the next block address to read, or -1 if no such block is available.
+     */
+    private long nextBlockAddress;
+
+
+    SAMReaderPosition(final SAMReaderID reader, final BlockInputStream inputStream, GATKBAMFileSpan fileSpan) {
+        this.reader = reader;
+        this.inputStream = inputStream;
+
+        this.positions = fileSpan.getGATKChunks();
+        initialize();
+    }
+
+    public SAMReaderID getReader() {
+        return reader;
+    }
+
+    public BlockInputStream getInputStream() {
+        return inputStream;
+    }
+
+    /**
+     * Retrieves the next block address to be read.
+     * @return Next block address to be read.
+     */
+    public long getBlockAddress() {
+        return nextBlockAddress;
+    }
+
+    public void reset() {
+        initialize();
+    }
+
+    /**
+     * Resets the SAM reader position to its original state.
+     */
+    private void initialize() {
+        this.positionIterator = new PeekableIterator<GATKChunk>(positions.iterator());
+        if(positionIterator.hasNext())
+            nextBlockAddress = positionIterator.peek().getBlockStart();
+        else
+            nextBlockAddress = -1;
+    }
+
+    /**
+     * Advances the current position to the next block to read, given the current position in the file.
+     * @param filePosition The current position within the file.
+     */
+    void advancePosition(final long filePosition) {
+        nextBlockAddress = filePosition;
+
+        // Check the current file position against the iterator; if the iterator is before the current file position,
+        // draw the iterator forward.  Remember when performing the check that coordinates are half-open!
+        try {
+            while(positionIterator.hasNext() && isFilePositionPastEndOfChunk(filePosition,positionIterator.peek())) {
+                positionIterator.next();
+                // Check to see if the iterator has more data available.
+                if(positionIterator.hasNext() && filePosition < positionIterator.peek().getBlockStart()) {
+                    nextBlockAddress = positionIterator.peek().getBlockStart();
+                    break;
+                }
+            }
+        }
+        catch(Exception ex) {
+            throw new ReviewedStingException("");
+        }
+    }
+
+    private boolean isFilePositionPastEndOfChunk(final long filePosition, final GATKChunk chunk) {
+        return (filePosition > chunk.getBlockEnd() || (filePosition == chunk.getBlockEnd() && chunk.getBlockOffsetEnd() == 0));
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardBalancer.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardBalancer.java
@ -0,0 +1,21 @@
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.picard.util.PeekableIterator;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+
+import java.util.Iterator;
+
+/**
+ * Balances maximally granular file pointers into shards of reasonable size.
+ */
+public abstract class ShardBalancer implements Iterable<Shard> {
+    protected SAMDataSource readsDataSource;
+    protected PeekableIterator<FilePointer> filePointers;
+    protected GenomeLocParser parser;
+
+    public void initialize(final SAMDataSource readsDataSource, final Iterator<FilePointer> filePointers, final GenomeLocParser parser) {
+        this.readsDataSource = readsDataSource;
+        this.filePointers = new PeekableIterator<FilePointer>(filePointers);
+        this.parser = parser;
+    }
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategy.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategy.java
@ -1,31 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import java.util.Iterator;
-/**
- *
- * User: aaron
- * Date: Apr 10, 2009
- * Time: 4:55:37 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-/**
- * @author aaron
- * @version 1.0
- * @date Apr 10, 2009
- * <p/>
- * Interface ShardStrategy
- * <p/>
- * The base interface for the sharding strategy; before we had a base abstract
- * class, but not this will be an interface to accomidate read based sharding
- */
-public interface ShardStrategy extends Iterator<Shard>, Iterable<Shard> {
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/ShardStrategyFactory.java
@ -1,117 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMSequenceDictionary;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;
-import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
-
-/**
- *
- * User: aaron
- * Date: Apr 6, 2009
- * Time: 7:09:22 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date Apr 6, 2009
- * <p/>
- * Class ShardStrategyFactory
- * <p/>
- * The Shard Strategy Factory,  use this class to create and transfer shard strategies
- * between different approaches.
- */
-public class ShardStrategyFactory {
-    public enum SHATTER_STRATEGY {
-        MONOLITHIC,   // Put all of the available data into one shard.
-        LOCUS_EXPERIMENTAL,
-        READS_EXPERIMENTAL
-    }
-
-    /**
-     * get a new shatter strategy
-     *
-     * @param readsDataSource     File pointer to BAM.
-     * @param referenceDataSource File pointer to reference.
-     * @param strat        what's our strategy - SHATTER_STRATEGY type
-     * @param dic          the seq dictionary
-     * @param startingSize the starting size
-     * @return a shard strategy capable of dividing input data into shards.
-     */
-    static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser) {
-        return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, -1L);
-    }
-
-    /**
-     * get a new shatter strategy
-     *
-     * @param readsDataSource     File pointer to BAM.
-     * @param referenceDataSource File pointer to reference.
-     * @param strat               what's our strategy - SHATTER_STRATEGY type
-     * @param dic                 the seq dictionary
-     * @param startingSize        the starting size
-     * @return a shard strategy capable of dividing input data into shards.
-     */
-    static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, long limitByCount) {
-        switch (strat) {
-            case LOCUS_EXPERIMENTAL:
-                return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,null);
-            case READS_EXPERIMENTAL:
-                return new ReadShardStrategy(genomeLocParser,readsDataSource,null);
-            default:
-                throw new ReviewedStingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
-        }
-
-    }
-
-
-    /**
-     * get a new shatter strategy
-     *
-     * @param readsDataSource     File pointer to BAM.
-     * @param referenceDataSource File pointer to reference.
-     * @param strat        what's our strategy - SHATTER_STRATEGY type
-     * @param dic          the seq dictionary
-     * @param startingSize the starting size
-     * @return a shard strategy capable of dividing input data into shards.
-     */
-    static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst) {
-        return ShardStrategyFactory.shatter(readsDataSource, referenceDataSource, strat, dic, startingSize, genomeLocParser, lst, -1l);
-
-    }
-
-    /**
-     * get a new shatter strategy
-     *
-     * @param readsDataSource The reads used to shatter this file.
-     * @param referenceDataSource The reference used to shatter this file.
-     * @param strat        what's our strategy - SHATTER_STRATEGY type
-     * @param dic          the seq dictionary
-     * @param startingSize the starting size
-     * @return A strategy for shattering this data.
-     */
-    static public ShardStrategy shatter(SAMDataSource readsDataSource, IndexedFastaSequenceFile referenceDataSource, SHATTER_STRATEGY strat, SAMSequenceDictionary dic, long startingSize, GenomeLocParser genomeLocParser, GenomeLocSortedSet lst, long limitDataCount) {
-        switch (strat) {
-            case LOCUS_EXPERIMENTAL:
-                return new LocusShardStrategy(readsDataSource,referenceDataSource,genomeLocParser,lst);
-            case READS_EXPERIMENTAL:
-                return new ReadShardStrategy(genomeLocParser, readsDataSource,lst);
-            default:
-                throw new ReviewedStingException("Strategy: " + strat + " isn't implemented");
-        }
-
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reads/utilities/FindLargeShards.java
@ -30,10 +30,12 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.commandline.CommandLineProgram;
 import org.broadinstitute.sting.commandline.Input;
 import org.broadinstitute.sting.commandline.Output;
+import org.broadinstitute.sting.gatk.datasources.reads.BAMScheduler;
 import org.broadinstitute.sting.gatk.datasources.reads.FilePointer;
-import org.broadinstitute.sting.gatk.datasources.reads.LowMemoryIntervalSharder;
+import org.broadinstitute.sting.gatk.datasources.reads.IntervalSharder;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -92,7 +94,7 @@ public class FindLargeShards extends CommandLineProgram {

        // initialize reads
        List<SAMReaderID> bamReaders = ListFileUtils.unpackBAMFileList(samFiles,parser);
-        SAMDataSource dataSource = new SAMDataSource(bamReaders,genomeLocParser);
+        SAMDataSource dataSource = new SAMDataSource(bamReaders,new ThreadAllocation(),null,genomeLocParser);

        // intervals
        GenomeLocSortedSet intervalSortedSet = null;
@ -106,7 +108,7 @@ public class FindLargeShards extends CommandLineProgram {

        logger.info(String.format("PROGRESS: Calculating mean and variance: Contig\tRegion.Start\tRegion.Stop\tSize"));        

-        LowMemoryIntervalSharder sharder = new LowMemoryIntervalSharder(dataSource,intervalSortedSet);
+        IntervalSharder sharder = IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();

@ -135,7 +137,7 @@ public class FindLargeShards extends CommandLineProgram {
        logger.warn(String.format("PROGRESS: Searching for large shards: Contig\tRegion.Start\tRegion.Stop\tSize"));
        out.printf("Contig\tRegion.Start\tRegion.Stop\tSize%n");

-        sharder = new LowMemoryIntervalSharder(dataSource,intervalSortedSet);
+        sharder =  IntervalSharder.shardOverIntervals(dataSource,intervalSortedSet);
        while(sharder.hasNext()) {
            FilePointer filePointer = sharder.next();

--- a/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/datasources/reference/ReferenceDataSource.java
@ -29,6 +29,14 @@ import net.sf.picard.reference.FastaSequenceIndex;
 import net.sf.picard.reference.FastaSequenceIndexBuilder;
 import net.sf.picard.reference.IndexedFastaSequenceFile;
 import net.sf.picard.sam.CreateSequenceDictionary;
+import net.sf.samtools.SAMSequenceRecord;
+import org.broadinstitute.sting.gatk.datasources.reads.FilePointer;
+import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
+import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
+import org.broadinstitute.sting.gatk.datasources.reads.Shard;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
@ -36,13 +44,17 @@ import org.broadinstitute.sting.utils.file.FSLockWithShared;
 import org.broadinstitute.sting.utils.file.FileSystemInabilityToLockException;

 import java.io.File;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+import java.util.List;

 /**
 * Loads reference data from fasta file
 * Looks for fai and dict files, and tries to create them if they don't exist
 */
 public class ReferenceDataSource {
-    private IndexedFastaSequenceFile index;
+    private IndexedFastaSequenceFile reference;

    /** our log, which we want to capture anything from this class */
    protected static org.apache.log4j.Logger logger = org.apache.log4j.Logger.getLogger(ReferenceDataSource.class);
@ -173,7 +185,7 @@ public class ReferenceDataSource {
                logger.info("Treating existing index file as complete.");
            }

-            index = new CachingIndexedFastaSequenceFile(fastaFile);
+            reference = new CachingIndexedFastaSequenceFile(fastaFile);

        } catch (IllegalArgumentException e) {
            throw new UserException.CouldNotReadInputFile(fastaFile, "Could not read reference sequence.  The FASTA must have either a .fasta or .fa extension", e);
@ -192,6 +204,52 @@ public class ReferenceDataSource {
     * @return IndexedFastaSequenceFile that was created from file
     */
    public IndexedFastaSequenceFile getReference() {
-        return this.index;
+        return this.reference;
+    }
+
+    /**
+     * Creates an iterator for processing the entire reference.
+     * @param readsDataSource the reads datasource to embed in the locus shard.
+     * @param parser used to generate/regenerate intervals.  TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
+     * @param maxShardSize The maximum shard size which can be used to create this list.
+     * @return Creates a schedule for performing a traversal over the entire reference.
+     */
+    public Iterable<Shard> createShardsOverEntireReference(final SAMDataSource readsDataSource, final GenomeLocParser parser, final int maxShardSize) {
+        List<Shard> shards = new ArrayList<Shard>();
+        for(SAMSequenceRecord refSequenceRecord: reference.getSequenceDictionary().getSequences()) {
+            for(int shardStart = 1; shardStart <= refSequenceRecord.getSequenceLength(); shardStart += maxShardSize) {
+                final int shardStop = Math.min(shardStart+maxShardSize-1, refSequenceRecord.getSequenceLength());
+                shards.add(new LocusShard(parser,
+                                          readsDataSource,
+                                          Collections.singletonList(parser.createGenomeLoc(refSequenceRecord.getSequenceName(),shardStart,shardStop)),
+                                          null));
+            }
+        }
+        return shards;
+    }
+
+    /**
+     * Creates an iterator for processing the entire reference.
+     * @param readsDataSource the reads datasource to embed in the locus shard.  TODO: decouple the creation of the shards themselves from the creation of the driving iterator so that datasources need not be passed to datasources.
+     * @param intervals the list of intervals to use when processing the reference.
+     * @param maxShardSize The maximum shard size which can be used to create this list.
+     * @return Creates a schedule for performing a traversal over the entire reference.
+     */
+    public Iterable<Shard> createShardsOverIntervals(final SAMDataSource readsDataSource, final GenomeLocSortedSet intervals, final int maxShardSize) {
+        List<Shard> shards = new ArrayList<Shard>();
+        for(GenomeLoc interval: intervals) {
+            while(interval.size() > maxShardSize) {
+                shards.add(new LocusShard(intervals.getGenomeLocParser(),
+                        readsDataSource,
+                        Collections.singletonList(intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart(),interval.getStart()+maxShardSize-1)),
+                        null));
+                interval = intervals.getGenomeLocParser().createGenomeLoc(interval.getContig(),interval.getStart()+maxShardSize,interval.getStop());
+            }
+            shards.add(new LocusShard(intervals.getGenomeLocParser(),
+                    readsDataSource,
+                    Collections.singletonList(interval),
+                    null));
+        }
+        return shards;
    }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/HierarchicalMicroScheduler.java
@ -5,7 +5,6 @@ import org.broad.tribble.TribbleException;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.io.ThreadLocalOutputTracker;
@ -88,7 +87,7 @@ public class HierarchicalMicroScheduler extends MicroScheduler implements Hierar
        this.threadPool = Executors.newFixedThreadPool(nThreadsToUse);
    }

-    public Object execute( Walker walker, ShardStrategy shardStrategy ) {
+    public Object execute( Walker walker, Iterable<Shard> shardStrategy ) {
        // Fast fail for walkers not supporting TreeReducible interface.
        if (!( walker instanceof TreeReducible ))
            throw new IllegalArgumentException("The GATK can currently run in parallel only with TreeReducible walkers");
--- a/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -7,7 +7,6 @@ import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider
 import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
@ -44,7 +43,7 @@ public class LinearMicroScheduler extends MicroScheduler {
     * @param walker    Computation to perform over dataset.
     * @param shardStrategy A strategy for sharding the data.
     */
-    public Object execute(Walker walker, ShardStrategy shardStrategy) {
+    public Object execute(Walker walker, Iterable<Shard> shardStrategy) {
        walker.initialize();
        Accumulator accumulator = Accumulator.create(engine,walker);

--- a/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/executive/MicroScheduler.java
@ -30,11 +30,11 @@ import org.apache.log4j.Logger;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
 import org.broadinstitute.sting.gatk.datasources.rmd.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.iterators.NullSAMIterator;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.traversals.*;
 import org.broadinstitute.sting.gatk.walkers.*;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -87,20 +87,20 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
     * @param reads         the informations associated with the reads
     * @param reference     the reference file
     * @param rods          the rods to include in the traversal
-     * @param nThreadsToUse Number of threads to utilize.
+     * @param threadAllocation Number of threads to utilize.
     *
     * @return The best-fit microscheduler.
     */
-    public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, int nThreadsToUse) {
-        if (walker instanceof TreeReducible && nThreadsToUse > 1) {
+    public static MicroScheduler create(GenomeAnalysisEngine engine, Walker walker, SAMDataSource reads, IndexedFastaSequenceFile reference, Collection<ReferenceOrderedDataSource> rods, ThreadAllocation threadAllocation) {
+        if (walker instanceof TreeReducible && threadAllocation.getNumCPUThreads() > 1) {
            if(walker.isReduceByInterval())
                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s aggregates results by interval.  Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
            if(walker instanceof ReadWalker)
                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s is a read walker.  Due to a current limitation of the GATK, analyses of this type do not currently support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
-            logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",nThreadsToUse));
-            return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, nThreadsToUse);
+            logger.info(String.format("Running the GATK in parallel mode with %d concurrent threads",threadAllocation.getNumCPUThreads()));
+            return new HierarchicalMicroScheduler(engine, walker, reads, reference, rods, threadAllocation.getNumCPUThreads());
        } else {
-            if(nThreadsToUse > 1)
+            if(threadAllocation.getNumCPUThreads() > 1)
                throw new UserException.BadArgumentValue("nt", String.format("The analysis %s currently does not support parallel execution.  Please run your analysis without the -nt option.", engine.getWalkerName(walker.getClass())));
            return new LinearMicroScheduler(engine, walker, reads, reference, rods);
        }
@ -156,7 +156,7 @@ public abstract class MicroScheduler implements MicroSchedulerMBean {
     *
     * @return the return type of the walker
     */
-    public abstract Object execute(Walker walker, ShardStrategy shardStrategy);
+    public abstract Object execute(Walker walker, Iterable<Shard> shardStrategy);

    /**
     * Retrieves the object responsible for tracking and managing output.
--- a/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/resourcemanagement/ThreadAllocation.java
@ -0,0 +1,93 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.resourcemanagement;
+
+import org.broadinstitute.sting.utils.exceptions.UserException;
+
+/**
+ * Models how threads are distributed between various components of the GATK.
+ */
+public class ThreadAllocation {
+    /**
+     * The number of CPU threads to be used by the GATK.
+     */
+    private final int numCPUThreads;
+
+    /**
+     * Number of threads to devote exclusively to IO.  Default is 0.
+     */
+    private final int numIOThreads;
+
+    public int getNumCPUThreads() {
+        return numCPUThreads;
+    }
+
+    public int getNumIOThreads() {
+        return numIOThreads;
+    }
+
+    /**
+     * Construct the default thread allocation.
+     */
+    public ThreadAllocation() {
+        this(1,null,null);
+    }
+
+    /**
+     * Set up the thread allocation.  Default allocation is 1 CPU thread, 0 IO threads.
+     * (0 IO threads means that no threads are devoted exclusively to IO; they're inline on the CPU thread).
+     * @param totalThreads Complete number of threads to allocate.
+     * @param numCPUThreads Total number of threads allocated to the traversal.
+     * @param numIOThreads Total number of threads allocated exclusively to IO.
+     */
+    public ThreadAllocation(final int totalThreads, final Integer numCPUThreads, final Integer numIOThreads) {
+        // If no allocation information is present, allocate all threads to CPU
+        if(numCPUThreads == null && numIOThreads == null) {
+            this.numCPUThreads = totalThreads;
+            this.numIOThreads = 0;
+        }
+        // If only CPU threads are specified, allocate remainder to IO (minimum 0 dedicated IO threads).
+        else if(numIOThreads == null) {
+            if(numCPUThreads > totalThreads)
+                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) is higher than the total threads",totalThreads,numCPUThreads));
+            this.numCPUThreads = numCPUThreads;
+            this.numIOThreads = totalThreads - numCPUThreads;
+        }
+        // If only IO threads are specified, allocate remainder to CPU (minimum 1 dedicated CPU thread).
+        else if(numCPUThreads == null) {
+            if(numIOThreads > totalThreads)
+                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of io threads (%d) is higher than the total threads",totalThreads,numIOThreads));
+            this.numCPUThreads = Math.max(1,totalThreads-numIOThreads);
+            this.numIOThreads = numIOThreads;
+        }
+        else {
+            if(numCPUThreads + numIOThreads != totalThreads)
+                throw new UserException(String.format("Invalid thread allocation.  User requested %d threads in total, but the count of cpu threads (%d) + the count of io threads (%d) does not match",totalThreads,numCPUThreads,numIOThreads));
+            this.numCPUThreads = numCPUThreads;
+            this.numIOThreads = numIOThreads;
+        }
+    }
+
+}
--- a/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/samples/Sample.java
@ -3,6 +3,7 @@ package org.broadinstitute.sting.gatk.samples;

 import org.broadinstitute.sting.utils.exceptions.UserException;

+import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.Map;

@ -110,6 +111,17 @@ public class Sample implements Comparable<Sample> { // implements java.io.Serial
        return infoDB.getSample(paternalID);
    }

+    public ArrayList<Sample> getParents(){
+        ArrayList<Sample> parents = new ArrayList<Sample>(2);
+        Sample parent = getMother();
+        if(parent != null)
+            parents.add(parent);
+        parent = getFather();
+        if(parent != null)
+            parents.add(parent);
+        return parents;
+    }
+
    /**
     * Get gender of the sample
     * @return property of key "gender" - must be of type Gender
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/DepthOfCoverage.java
@ -49,5 +49,5 @@ public class DepthOfCoverage extends InfoFieldAnnotation implements StandardAnno

    public List<String> getKeyNames() { return Arrays.asList(VCFConstants.DEPTH_KEY); }

-    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Filtered Depth")); }
+    public List<VCFInfoHeaderLine> getDescriptions() { return Arrays.asList(new VCFInfoHeaderLine(getKeyNames().get(0), 1, VCFHeaderLineType.Integer, "Approximate read depth; some reads may have been filtered")); }
 }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@ -56,7 +56,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio

    // We refuse to parse SnpEff output files generated by unsupported versions, or
    // lacking a SnpEff version number in the VCF header:
-    public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
+    public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.4" };
    public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
    public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";

@ -77,13 +77,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
    public enum InfoFieldKey {
        EFFECT_KEY            ("SNPEFF_EFFECT",           -1),
        IMPACT_KEY            ("SNPEFF_IMPACT",            0),
-        CODON_CHANGE_KEY      ("SNPEFF_CODON_CHANGE",      1),
-        AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 2),
-        GENE_NAME_KEY         ("SNPEFF_GENE_NAME",         3),
-        GENE_BIOTYPE_KEY      ("SNPEFF_GENE_BIOTYPE",      4),
-        TRANSCRIPT_ID_KEY     ("SNPEFF_TRANSCRIPT_ID",     6),
-        EXON_ID_KEY           ("SNPEFF_EXON_ID",           7),
-        FUNCTIONAL_CLASS_KEY  ("SNPEFF_FUNCTIONAL_CLASS", -1);
+        FUNCTIONAL_CLASS_KEY  ("SNPEFF_FUNCTIONAL_CLASS",  1),
+        CODON_CHANGE_KEY      ("SNPEFF_CODON_CHANGE",      2),
+        AMINO_ACID_CHANGE_KEY ("SNPEFF_AMINO_ACID_CHANGE", 3),
+        GENE_NAME_KEY         ("SNPEFF_GENE_NAME",         4),
+        GENE_BIOTYPE_KEY      ("SNPEFF_GENE_BIOTYPE",      5),
+        TRANSCRIPT_ID_KEY     ("SNPEFF_TRANSCRIPT_ID",     7),
+        EXON_ID_KEY           ("SNPEFF_EXON_ID",           8);

        // Actual text of the key
        private final String keyName;
@ -110,70 +110,53 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
    // are validated against this list.
    public enum EffectType {
        // High-impact effects:
-        FRAME_SHIFT                           (EffectFunctionalClass.NONE,     false),
-        STOP_GAINED                           (EffectFunctionalClass.NONSENSE, false),
-        START_LOST                            (EffectFunctionalClass.NONE,     false),
-        SPLICE_SITE_ACCEPTOR                  (EffectFunctionalClass.NONE,     false),
-        SPLICE_SITE_DONOR                     (EffectFunctionalClass.NONE,     false),
-        EXON_DELETED                          (EffectFunctionalClass.NONE,     false),
-        STOP_LOST                             (EffectFunctionalClass.NONE,     false),
+        SPLICE_SITE_ACCEPTOR,
+        SPLICE_SITE_DONOR,
+        START_LOST,
+        EXON_DELETED,
+        FRAME_SHIFT,
+        STOP_GAINED,
+        STOP_LOST,

        // Moderate-impact effects:
-        NON_SYNONYMOUS_CODING                 (EffectFunctionalClass.MISSENSE, false),
-        CODON_CHANGE                          (EffectFunctionalClass.NONE,     false),
-        CODON_INSERTION                       (EffectFunctionalClass.NONE,     false),
-        CODON_CHANGE_PLUS_CODON_INSERTION     (EffectFunctionalClass.NONE,     false),
-        CODON_DELETION                        (EffectFunctionalClass.NONE,     false),
-        CODON_CHANGE_PLUS_CODON_DELETION      (EffectFunctionalClass.NONE,     false),
-        UTR_5_DELETED                         (EffectFunctionalClass.NONE,     false),
-        UTR_3_DELETED                         (EffectFunctionalClass.NONE,     false),
+        NON_SYNONYMOUS_CODING,
+        CODON_CHANGE,
+        CODON_INSERTION,
+        CODON_CHANGE_PLUS_CODON_INSERTION,
+        CODON_DELETION,
+        CODON_CHANGE_PLUS_CODON_DELETION,
+        UTR_5_DELETED,
+        UTR_3_DELETED,

        // Low-impact effects:
-        SYNONYMOUS_CODING                     (EffectFunctionalClass.SILENT,   false),
-        SYNONYMOUS_START                      (EffectFunctionalClass.SILENT,   false),
-        NON_SYNONYMOUS_START                  (EffectFunctionalClass.SILENT,   false),
-        SYNONYMOUS_STOP                       (EffectFunctionalClass.SILENT,   false),
-        NON_SYNONYMOUS_STOP                   (EffectFunctionalClass.SILENT,   false),
-        START_GAINED                          (EffectFunctionalClass.NONE,     false),
+        SYNONYMOUS_START,
+        NON_SYNONYMOUS_START,
+        START_GAINED,
+        SYNONYMOUS_CODING,
+        SYNONYMOUS_STOP,
+        NON_SYNONYMOUS_STOP,

        // Modifiers:
-        NONE                                  (EffectFunctionalClass.NONE,     true),
-        CHROMOSOME                            (EffectFunctionalClass.NONE,     true),
-        INTERGENIC                            (EffectFunctionalClass.NONE,     true),
-        UPSTREAM                              (EffectFunctionalClass.NONE,     true),
-        UTR_5_PRIME                           (EffectFunctionalClass.NONE,     true),
-        CDS                                   (EffectFunctionalClass.NONE,     true),
-        GENE                                  (EffectFunctionalClass.NONE,     true),
-        TRANSCRIPT                            (EffectFunctionalClass.NONE,     true),
-        EXON                                  (EffectFunctionalClass.NONE,     true),
-        INTRON                                (EffectFunctionalClass.NONE,     true),
-        UTR_3_PRIME                           (EffectFunctionalClass.NONE,     true),
-        DOWNSTREAM                            (EffectFunctionalClass.NONE,     true),
-        INTRON_CONSERVED                      (EffectFunctionalClass.NONE,     true),
-        INTERGENIC_CONSERVED                  (EffectFunctionalClass.NONE,     true),
-        REGULATION                            (EffectFunctionalClass.NONE,     true),
-        CUSTOM                                (EffectFunctionalClass.NONE,     true),
-        WITHIN_NON_CODING_GENE                (EffectFunctionalClass.NONE,     true);
-
-        private final EffectFunctionalClass functionalClass;
-        private final boolean isModifier;
-
-        EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) {
-            this.functionalClass = functionalClass;
-            this.isModifier = isModifier;
-        }
-
-        public EffectFunctionalClass getFunctionalClass() {
-            return functionalClass;
-        }
-
-        public boolean isModifier() {
-            return isModifier;
-        }
+        NONE,
+        CHROMOSOME,
+        CUSTOM,
+        CDS,
+        GENE,
+        TRANSCRIPT,
+        EXON,
+        INTRON_CONSERVED,
+        UTR_5_PRIME,
+        UTR_3_PRIME,
+        DOWNSTREAM,
+        INTRAGENIC,
+        INTERGENIC,
+        INTERGENIC_CONSERVED,
+        UPSTREAM,
+        REGULATION,
+        INTRON
    }

-    // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of
-    // classifying some of the LOW impact effects as MODIFIERs.
+    // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact, or as a MODIFIER.
    public enum EffectImpact {
        MODIFIER  (0),
        LOW       (1),
@ -202,7 +185,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
        UNKNOWN
    }

-    // We assign a functional class to each SnpEff effect.
+    // SnpEff assigns a functional class to each effect.
    public enum EffectFunctionalClass {
        NONE     (0),
        SILENT   (1),
@ -379,13 +362,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
    public List<String> getKeyNames() {
        return Arrays.asList( InfoFieldKey.EFFECT_KEY.getKeyName(),
                              InfoFieldKey.IMPACT_KEY.getKeyName(),
+                              InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),
                              InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),
                              InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(),
                              InfoFieldKey.GENE_NAME_KEY.getKeyName(),
                              InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),
                              InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),
-                              InfoFieldKey.EXON_ID_KEY.getKeyName(),
-                              InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()
+                              InfoFieldKey.EXON_ID_KEY.getKeyName()
                            );
    }

@ -393,13 +376,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
        return Arrays.asList(
            new VCFInfoHeaderLine(InfoFieldKey.EFFECT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
            new VCFInfoHeaderLine(InfoFieldKey.IMPACT_KEY.getKeyName(),            1, VCFHeaderLineType.String,  "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
+            new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),  1, VCFHeaderLineType.String,  "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())),
            new VCFInfoHeaderLine(InfoFieldKey.CODON_CHANGE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Old/New codon for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant"),
+            new VCFInfoHeaderLine(InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), 1, VCFHeaderLineType.String,  "Old/New amino acid for the highest-impact effect resulting from the current variant (in HGVS style)"),
            new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(),         1, VCFHeaderLineType.String,  "Gene name for the highest-impact effect resulting from the current variant"),
            new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(),      1, VCFHeaderLineType.String,  "Gene biotype for the highest-impact effect resulting from the current variant"),
            new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(),     1, VCFHeaderLineType.String,  "Transcript ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant"),
-            new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(),  1, VCFHeaderLineType.String,  "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values()))
+            new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(),           1, VCFHeaderLineType.String,  "Exon ID for the highest-impact effect resulting from the current variant")
        );
    }

@ -409,6 +392,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
    protected static class SnpEffEffect {
        private EffectType effect;
        private EffectImpact impact;
+        private EffectFunctionalClass functionalClass;
        private String codonChange;
        private String aminoAcidChange;
        private String geneName;
@ -420,16 +404,21 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
        private String parseError = null;
        private boolean isWellFormed = true;

-        private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
-        private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
-        private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
+        private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 9;
+        private static final int NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR = 10;
+        private static final int NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR = 11;

-        // Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
-        // errors come after warnings, not vice versa:
-        private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
-        private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
+        // If there is either a warning OR an error, it will be in the last field. If there is both
+        // a warning AND an error, the warning will be in the second-to-last field, and the error will
+        // be in the last field.
+        private static final int SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR - 1;
+        private static final int SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 2;
+        private static final int SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR = NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR - 1;

-        private static final int SNPEFF_CODING_FIELD_INDEX = 5;
+        // Position of the field indicating whether the effect is coding or non-coding. This field is used
+        // in selecting the most significant effect, but is not included in the annotations we return
+        // since it can be deduced from the SNPEFF_GENE_BIOTYPE field.
+        private static final int SNPEFF_CODING_FIELD_INDEX = 6;

        public SnpEffEffect ( String effectName, String[] effectMetadata ) {
            parseEffectName(effectName);
@ -447,11 +436,14 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio

        private void parseEffectMetadata ( String[] effectMetadata ) {
            if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
-                if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
-                    parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
+                if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_EITHER_WARNING_OR_ERROR ) {
+                    parseError(String.format("SnpEff issued the following warning or error: \"%s\"",
+                                             effectMetadata[SNPEFF_WARNING_OR_ERROR_FIELD_UPON_SINGLE_ERROR]));
                }
-                else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
-                    parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
+                else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_BOTH_WARNING_AND_ERROR ) {
+                    parseError(String.format("SnpEff issued the following warning: \"%s\", and the following error: \"%s\"",
+                                             effectMetadata[SNPEFF_WARNING_FIELD_UPON_BOTH_WARNING_AND_ERROR],
+                                             effectMetadata[SNPEFF_ERROR_FIELD_UPON_BOTH_WARNING_AND_ERROR]));
                }
                else {
                    parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
@ -461,23 +453,33 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
                return;
            }

-            if ( effect != null && effect.isModifier() ) {
-                impact = EffectImpact.MODIFIER;
+            // The impact field will never be empty, and should always contain one of the enumerated values:
+            try {
+                impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
            }
-            else {
+            catch ( IllegalArgumentException e ) {
+                parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
+            }
+
+            // The functional class field will be empty when the effect has no functional class associated with it:
+            if ( effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()].trim().length() > 0 ) {
                try {
-                    impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]);
+                    functionalClass = EffectFunctionalClass.valueOf(effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]);
                }
                catch ( IllegalArgumentException e ) {
-                    parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]));
+                    parseError(String.format("Unrecognized value for effect functional class: %s", effectMetadata[InfoFieldKey.FUNCTIONAL_CLASS_KEY.getFieldIndex()]));
                }
            }
+            else {
+                functionalClass = EffectFunctionalClass.NONE;
+            }

            codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()];
            aminoAcidChange = effectMetadata[InfoFieldKey.AMINO_ACID_CHANGE_KEY.getFieldIndex()];
            geneName = effectMetadata[InfoFieldKey.GENE_NAME_KEY.getFieldIndex()];
            geneBiotype = effectMetadata[InfoFieldKey.GENE_BIOTYPE_KEY.getFieldIndex()];

+            // The coding field will be empty when SnpEff has no coding info for the effect:
            if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
                try {
                    coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
@ -534,7 +536,7 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio
                return true;
            }
            else if ( impact.isSameImpactAs(other.impact) ) {
-                return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass());
+                return functionalClass.isHigherPriorityThan(other.functionalClass);
            }

            return false;
@ -545,13 +547,13 @@ public class SnpEff extends InfoFieldAnnotation implements RodRequiringAnnotatio

            addAnnotation(annotations, InfoFieldKey.EFFECT_KEY.getKeyName(), effect.toString());
            addAnnotation(annotations, InfoFieldKey.IMPACT_KEY.getKeyName(), impact.toString());
+            addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), functionalClass.toString());
            addAnnotation(annotations, InfoFieldKey.CODON_CHANGE_KEY.getKeyName(), codonChange);
            addAnnotation(annotations, InfoFieldKey.AMINO_ACID_CHANGE_KEY.getKeyName(), aminoAcidChange);
            addAnnotation(annotations, InfoFieldKey.GENE_NAME_KEY.getKeyName(), geneName);
            addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype);
            addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID);
            addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID);
-            addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString());

            return annotations;
        }
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidGenotype.java
@ -34,7 +34,7 @@ import org.broadinstitute.sting.utils.BaseUtils;
 * Time: 6:46:09 PM
 * To change this template use File | Settings | File Templates.
 */
-enum DiploidGenotype {
+public enum DiploidGenotype {
    AA ('A', 'A'),
    AC ('A', 'C'),
    AG ('A', 'G'),
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/DiploidSNPGenotypeLikelihoods.java
@ -28,7 +28,6 @@ package org.broadinstitute.sting.gatk.walkers.genotyper;
 import net.sf.samtools.SAMUtils;
 import org.broadinstitute.sting.utils.BaseUtils;
 import org.broadinstitute.sting.utils.fragments.FragmentCollection;
-import org.broadinstitute.sting.utils.fragments.FragmentUtils;
 import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.QualityUtils;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -275,19 +274,20 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {

    public int add(PileupElement elt, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
        byte obsBase = elt.getBase();
+        byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);

        if ( elt.isReducedRead() ) {
            // reduced read representation
-            byte qual = elt.getQual();
-            if ( BaseUtils.isRegularBase( elt.getBase() )) {
+            if ( BaseUtils.isRegularBase( obsBase )) {
                add(obsBase, qual, (byte)0, (byte)0, elt.getRepresentativeCount()); // fast calculation of n identical likelihoods
                return elt.getRepresentativeCount(); // we added nObs bases here
-            } else // odd bases or deletions => don't use them
-                return 0;
-        } else {
-            byte qual = qualToUse(elt, ignoreBadBases, capBaseQualsAtMappingQual, minBaseQual);
-            return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
+            }
+
+            // odd bases or deletions => don't use them
+            return 0;
        }
+
+        return qual > 0 ? add(obsBase, qual, (byte)0, (byte)0, 1) : 0;
    }

    public int add(List<PileupElement> overlappingPair, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
@ -511,20 +511,19 @@ public class DiploidSNPGenotypeLikelihoods implements Cloneable {
     * @return
     */
    private static byte qualToUse(PileupElement p, boolean ignoreBadBases, boolean capBaseQualsAtMappingQual, int minBaseQual) {
-        if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) ) {
+        if ( ignoreBadBases && !BaseUtils.isRegularBase( p.getBase() ) )
            return 0;
-        } else {
-            byte qual = p.getQual();

-            if ( qual > SAMUtils.MAX_PHRED_SCORE )
-                throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s.  Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
-            if ( capBaseQualsAtMappingQual )
-                qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
-            if ( (int)qual < minBaseQual )
-                qual = (byte)0;
+        byte qual = p.getQual();

-            return qual;
-        }
+        if ( qual > SAMUtils.MAX_PHRED_SCORE )
+            throw new UserException.MalformedBAM(p.getRead(), String.format("the maximum allowed quality score is %d, but a quality of %d was observed in read %s.  Perhaps your BAM incorrectly encodes the quality scores in Sanger format; see http://en.wikipedia.org/wiki/FASTQ_format for more details", SAMUtils.MAX_PHRED_SCORE, qual, p.getRead().getReadName()));
+        if ( capBaseQualsAtMappingQual )
+            qual = (byte)Math.min((int)p.getQual(), p.getMappingQual());
+        if ( (int)qual < minBaseQual )
+            qual = (byte)0;
+
+        return qual;
    }

    // -----------------------------------------------------------------------------------------------------------------
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/GenotypeLikelihoodsCalculationModel.java
@ -26,7 +26,6 @@
 package org.broadinstitute.sting.gatk.walkers.genotyper;

 import org.apache.log4j.Logger;
-import org.broadinstitute.sting.commandline.RodBinding;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
 import org.broadinstitute.sting.gatk.contexts.AlignmentContextUtils;
 import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
@ -36,7 +35,6 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.pileup.PileupElement;
 import org.broadinstitute.sting.utils.pileup.ReadBackedPileup;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
-import org.broadinstitute.sting.utils.variantcontext.VariantContext;

 import java.util.Map;

@ -83,8 +81,7 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
     * @param priors               priors to use for GLs
     * @param GLs                  hash of sample->GL to fill in
     * @param alternateAlleleToUse the alternate allele to use, null if not set
-     *
-     * @param useBAQedPileup
+     * @param useBAQedPileup       should we use the BAQed pileup or the raw one?
     * @return genotype likelihoods per sample for AA, AB, BB
     */
    public abstract Allele getLikelihoods(RefMetaDataTracker tracker,
@ -93,13 +90,14 @@ public abstract class GenotypeLikelihoodsCalculationModel implements Cloneable {
                                          AlignmentContextUtils.ReadOrientation contextType,
                                          GenotypePriors priors,
                                          Map<String, MultiallelicGenotypeLikelihoods> GLs,
-                                          Allele alternateAlleleToUse, boolean useBAQedPileup);
+                                          Allele alternateAlleleToUse,
+                                          boolean useBAQedPileup);

    protected int getFilteredDepth(ReadBackedPileup pileup) {
        int count = 0;
        for ( PileupElement p : pileup ) {
            if ( BaseUtils.isRegularBase( p.getBase() ) )
-                count++;
+                count += p.getRepresentativeCount();
        }

        return count;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@ -258,7 +258,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
        Set<VCFFormatHeaderLine> result = new HashSet<VCFFormatHeaderLine>();
        result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_KEY, 1, VCFHeaderLineType.String, "Genotype"));
        result.add(new VCFFormatHeaderLine(VCFConstants.GENOTYPE_QUALITY_KEY, 1, VCFHeaderLineType.Float, "Genotype Quality"));
-        result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Read Depth (only filtered reads used for calling)"));
+        result.add(new VCFFormatHeaderLine(VCFConstants.DEPTH_KEY, 1, VCFHeaderLineType.Integer, "Approximate read depth (reads with MQ=255 or with bad mates are filtered)"));
        result.add(new VCFFormatHeaderLine(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY, VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));

        return result;
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmission.java
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/qc/CountPairsWalker.java
@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2010.
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
- * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.gatk.walkers.qc;
-
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.commandline.Output;
-import org.broadinstitute.sting.gatk.walkers.ReadPairWalker;
-import org.broadinstitute.sting.utils.collections.ExpandingArrayList;
-
-import java.io.PrintStream;
-import java.util.Collection;
-import java.util.List;
-
-/**
- * Counts the number of read pairs encountered in a file sorted in
- * query name order.  Breaks counts down by total pairs and number
- * of paired reads.
- *
- *
- * <h2>Input</h2>
- * <p>
- * One or more bam files.
- * </p>
- *
- * <h2>Output</h2>
- * <p>
- * Number of pairs seen.
- * </p>
- *
- * <h2>Examples</h2>
- * <pre>
- * java -Xmx2g -jar GenomeAnalysisTK.jar \
- *   -R ref.fasta \
- *   -T CountPairs \
- *   -o output.txt \
- *   -I input.bam
- * </pre>
- *
- * @author mhanna
- */
-public class CountPairsWalker extends ReadPairWalker<Integer,Long> {
-    @Output
-    private PrintStream out;
-
-    /**
-     * How many reads are the first in a pair, based on flag 0x0040 from the SAM spec.
-     */
-    private long firstOfPair = 0;
-
-    /**
-     * How many reads are the second in a pair, based on flag 0x0080 from the SAM spec.
-     */
-    private long secondOfPair = 0;
-
-    /**
-     * A breakdown of the total number of reads seen with exactly the same read name.
-     */
-    private List<Long> pairCountsByType = new ExpandingArrayList<Long>();
-
-    /**
-     * Maps a read pair to a given reduce of type MapType.  Semantics determined by subclasser.
-     * @param reads Collection of reads having the same name.
-     * @return Semantics defined by implementer.
-     */
-    @Override
-    public Integer map(Collection<SAMRecord> reads) {
-        if(pairCountsByType.get(reads.size()) != null)
-            pairCountsByType.set(reads.size(),pairCountsByType.get(reads.size())+1);
-        else
-            pairCountsByType.set(reads.size(),1L);
-
-        for(SAMRecord read: reads) {
-            if(read.getFirstOfPairFlag()) firstOfPair++;
-            if(read.getSecondOfPairFlag()) secondOfPair++;
-        }
-
-        return 1;
-    }
-
-    /**
-     * No pairs at the beginning of a traversal.
-     * @return 0 always.
-     */
-    @Override
-    public Long reduceInit() {
-        return 0L;
-    }
-
-    /**
-     * Combine number of pairs seen in this iteration (always 1) with total number of pairs
-     * seen in previous iterations. 
-     * @param value Pairs in this iteration (1), from the map function.
-     * @param sum Count of all pairs in prior iterations.
-     * @return All pairs encountered in previous iterations + all pairs encountered in this iteration (sum + 1).
-     */
-    @Override
-    public Long reduce(Integer value, Long sum) {
-        return value + sum;    
-    }
-
-    /**
-     * Print summary statistics over the entire traversal.
-     * @param sum A count of all read pairs viewed.
-     */
-    @Override
-    public void onTraversalDone(Long sum) {
-        out.printf("Total number of pairs               : %d%n",sum);
-        out.printf("Total number of first reads in pair : %d%n",firstOfPair);
-        out.printf("Total number of second reads in pair: %d%n",secondOfPair);
-        for(int i = 1; i < pairCountsByType.size(); i++) {
-            if(pairCountsByType.get(i) == null)
-                continue;
-            out.printf("Pairs of size %d: %d%n",i,pairCountsByType.get(i));
-        }
-    }
-
-}
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariants.java
@ -270,8 +270,8 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
    private double MENDELIAN_VIOLATION_QUAL_THRESHOLD = 0;

    /**
-     * Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so use it only for a reasonable
-     * number of variants.  Use --select_random_fraction for larger numbers of variants.
+     * Variants are kept in memory to guarantee that exactly n variants will be chosen randomly, so make sure you supply the program with enough memory
+     * given your input set.  This option will NOT work well for large callsets; use --select_random_fraction for sets with a large numbers of variants.
     */
    @Argument(fullName="select_random_number", shortName="number", doc="Selects a number of variants at random from the variant track", required=false)
    private int numRandom = 0;
@ -527,7 +527,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
                    }
                }
                if (SELECT_RANDOM_NUMBER) {
-                    randomlyAddVariant(++variantNumber, sub, ref.getBase());
+                    randomlyAddVariant(++variantNumber, sub);
                }
                else if (!SELECT_RANDOM_FRACTION || ( GenomeAnalysisEngine.getRandomGenerator().nextDouble() < fractionRandom)) {
                    vcfWriter.add(sub);
@ -691,7 +691,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> {
        return new VariantContextBuilder(builder.make()).attributes(attributes).make();
    }

-    private void randomlyAddVariant(int rank, VariantContext vc, byte refBase) {
+    private void randomlyAddVariant(int rank, VariantContext vc) {
        if (nVariantsAdded < numRandom)
            variantArray[nVariantsAdded++] = new RandomVariantStructure(vc);

--- a/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
+++ b/public/java/src/org/broadinstitute/sting/utils/GenomeLocParser.java
@ -554,4 +554,54 @@ public class GenomeLocParser {
        return createGenomeLoc(contigName,contig.getSequenceIndex(),1,contig.getSequenceLength(), true);
    }

+    /**
+     * Creates a loc to the left (starting at the loc start + 1) of maxBasePairs size.
+     * @param loc The original loc
+     * @param maxBasePairs The maximum number of basePairs
+     * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the start of the contig.
+     */
+    @Requires({"loc != null", "maxBasePairs > 0"})
+    public GenomeLoc createGenomeLocAtStart(GenomeLoc loc, int maxBasePairs) {
+        if (GenomeLoc.isUnmapped(loc))
+            return null;
+        String contigName = loc.getContig();
+        SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        int contigIndex = contig.getSequenceIndex();
+
+        int start = loc.getStart() - maxBasePairs;
+        int stop = loc.getStart() - 1;
+
+        if (start < 1)
+            start = 1;
+        if (stop < 1)
+            return null;
+
+        return createGenomeLoc(contigName, contigIndex, start, stop, true);
+    }
+
+    /**
+     * Creates a loc to the right (starting at the loc stop + 1) of maxBasePairs size.
+     * @param loc The original loc
+     * @param maxBasePairs The maximum number of basePairs
+     * @return The contiguous loc of up to maxBasePairs length or null if the loc is already at the end of the contig.
+     */
+    @Requires({"loc != null", "maxBasePairs > 0"})
+    public GenomeLoc createGenomeLocAtStop(GenomeLoc loc, int maxBasePairs) {
+        if (GenomeLoc.isUnmapped(loc))
+            return null;
+        String contigName = loc.getContig();
+        SAMSequenceRecord contig = contigInfo.getSequence(contigName);
+        int contigIndex = contig.getSequenceIndex();
+        int contigLength = contig.getSequenceLength();
+
+        int start = loc.getStop() + 1;
+        int stop = loc.getStop() + maxBasePairs;
+
+        if (start > contigLength)
+            return null;
+        if (stop > contigLength)
+            stop = contigLength;
+
+        return createGenomeLoc(contigName, contigIndex, start, stop, true);
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
+++ b/public/java/src/org/broadinstitute/sting/utils/clipreads/ReadClipper.java
@ -171,6 +171,9 @@ public class ReadClipper {
                    clippedRead = op.apply(algorithm, clippedRead);
                }
                wasClipped = true;
+                ops.clear();
+                if ( clippedRead.isEmpty() )
+                    return new GATKSAMRecord( clippedRead.getHeader() );
                return clippedRead;
            } catch (CloneNotSupportedException e) {
                throw new RuntimeException(e); // this should never happen
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/StandardVCFWriter.java
@ -353,7 +353,7 @@ public class StandardVCFWriter extends IndexingVCFWriter {

                // some exceptions
                if ( key.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
-                    if ( Math.abs(g.getLog10PError() + Genotype.NO_LOG10_PERROR) < 1e-6)
+                    if ( ! g.hasLog10PError() )
                        val = VCFConstants.MISSING_VALUE_v4;
                    else {
                        val = getQualValue(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
--- a/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
+++ b/public/java/src/org/broadinstitute/sting/utils/interval/IntervalUtils.java
@ -233,8 +233,12 @@ public class IntervalUtils {
     *
     * Returns a null string if there are no differences, otherwise returns a string describing the difference
     * (useful for UnitTests).  Assumes both lists are sorted
+     *
+     * @param masterArg sorted master genome locs
+     * @param testArg sorted test genome locs
+     * @return null string if there are no difference, otherwise a string describing the difference
     */
-    public static final String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
+    public static String equateIntervals(List<GenomeLoc> masterArg, List<GenomeLoc> testArg) {
        LinkedList<GenomeLoc> master = new LinkedList<GenomeLoc>(masterArg);
        LinkedList<GenomeLoc> test = new LinkedList<GenomeLoc>(testArg);

@ -317,23 +321,6 @@ public class IntervalUtils {
        return lengths;
    }

-    /**
-     * Counts the number of interval files an interval list can be split into using scatterIntervalArguments.
-     * @param locs The genome locs.
-     * @return The maximum number of parts the intervals can be split into.
-     */
-    public static int countContigIntervals(List<GenomeLoc> locs) {
-        int maxFiles = 0;
-        String contig = null;
-        for (GenomeLoc loc: locs) {
-            if (contig == null || !contig.equals(loc.getContig())) {
-                maxFiles++;
-                contig = loc.getContig();
-            }
-        }
-        return maxFiles;
-    }
-
    /**
     * Splits an interval list into multiple files.
     * @param fileHeader The sam file header.
@ -373,7 +360,6 @@ public class IntervalUtils {
     * @return A list of lists of genome locs, split according to splits
     */
    public static List<List<GenomeLoc>> splitIntervalsToSubLists(List<GenomeLoc> locs, List<Integer> splits) {
-        int locIndex = 1;
        int start = 0;
        List<List<GenomeLoc>> sublists = new ArrayList<List<GenomeLoc>>(splits.size());
        for (Integer stop: splits) {
@ -465,7 +451,7 @@ public class IntervalUtils {

    @Requires({"remaining != null", "!remaining.isEmpty()", "idealSplitSize > 0"})
    @Ensures({"result != null"})
-    final static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
+    static SplitLocusRecursive splitLocusIntervals1(LinkedList<GenomeLoc> remaining, long idealSplitSize) {
        final List<GenomeLoc> split = new ArrayList<GenomeLoc>();
        long size = 0;

@ -579,10 +565,101 @@ public class IntervalUtils {
        }
    }

-    public static final long intervalSize(final List<GenomeLoc> locs) {
+    public static long intervalSize(final List<GenomeLoc> locs) {
        long size = 0;
        for ( final GenomeLoc loc : locs )
            size += loc.size();
        return size;
    }
+
+    public static void writeFlankingIntervals(File reference, File inputIntervals, File flankingIntervals, int basePairs) {
+        ReferenceDataSource referenceDataSource = new ReferenceDataSource(reference);
+        GenomeLocParser parser = new GenomeLocParser(referenceDataSource.getReference());
+        List<GenomeLoc> originalList = intervalFileToList(parser, inputIntervals.getAbsolutePath());
+
+        if (originalList.isEmpty())
+            throw new UserException.MalformedFile(inputIntervals, "File contains no intervals");
+
+        List<GenomeLoc> flankingList = getFlankingIntervals(parser, originalList, basePairs);
+
+        if (flankingList.isEmpty())
+            throw new UserException.MalformedFile(inputIntervals, "Unable to produce any flanks for the intervals");
+
+        SAMFileHeader samFileHeader = new SAMFileHeader();
+        samFileHeader.setSequenceDictionary(referenceDataSource.getReference().getSequenceDictionary());
+        IntervalList intervalList = new IntervalList(samFileHeader);
+        int i = 0;
+        for (GenomeLoc loc: flankingList)
+            intervalList.add(toInterval(loc, ++i));
+        intervalList.write(flankingIntervals);
+    }
+
+    /**
+     * Returns a list of intervals between the passed int locs. Does not extend UNMAPPED locs.
+     * @param parser A genome loc parser for creating the new intervals
+     * @param locs Original genome locs
+     * @param basePairs Number of base pairs on each side of loc
+     * @return The list of intervals between the locs
+     */
+    public static List<GenomeLoc> getFlankingIntervals(final GenomeLocParser parser, final List<GenomeLoc> locs, final int basePairs) {
+        List<GenomeLoc> sorted = sortAndMergeIntervals(parser, locs, IntervalMergingRule.ALL).toList();
+
+        if (sorted.size() == 0)
+            return Collections.emptyList();
+
+        LinkedHashMap<String, List<GenomeLoc>> locsByContig = splitByContig(sorted);
+        List<GenomeLoc> expanded = new ArrayList<GenomeLoc>();
+        for (String contig: locsByContig.keySet()) {
+            List<GenomeLoc> contigLocs = locsByContig.get(contig);
+            int contigLocsSize = contigLocs.size();
+
+            GenomeLoc startLoc, stopLoc;
+
+            // Create loc at start of the list
+            startLoc = parser.createGenomeLocAtStart(contigLocs.get(0), basePairs);
+            if (startLoc != null)
+                expanded.add(startLoc);
+
+            // Create locs between each loc[i] and loc[i+1]
+            for (int i = 0; i < contigLocsSize - 1; i++) {
+                stopLoc = parser.createGenomeLocAtStop(contigLocs.get(i), basePairs);
+                startLoc = parser.createGenomeLocAtStart(contigLocs.get(i + 1), basePairs);
+                if (stopLoc.getStop() + 1 >= startLoc.getStart()) {
+                    // NOTE: This is different than GenomeLoc.merge()
+                    // merge() returns a loc which covers the entire range of stop and start,
+                    // possibly returning positions inside loc(i) or loc(i+1)
+                    // We want to make sure that the start of the stopLoc is used, and the stop of the startLoc
+                    GenomeLoc merged = parser.createGenomeLoc(
+                            stopLoc.getContig(), stopLoc.getStart(), startLoc.getStop());
+                    expanded.add(merged);
+                } else {
+                    expanded.add(stopLoc);
+                    expanded.add(startLoc);
+                }
+            }
+
+            // Create loc at the end of the list
+            stopLoc = parser.createGenomeLocAtStop(contigLocs.get(contigLocsSize - 1), basePairs);
+            if (stopLoc != null)
+                expanded.add(stopLoc);
+        }
+        return expanded;
+    }
+
+    private static LinkedHashMap<String, List<GenomeLoc>> splitByContig(List<GenomeLoc> sorted) {
+        LinkedHashMap<String, List<GenomeLoc>> splits = new LinkedHashMap<String, List<GenomeLoc>>();
+        GenomeLoc last = null;
+        List<GenomeLoc> contigLocs = null;
+        for (GenomeLoc loc: sorted) {
+            if (GenomeLoc.isUnmapped(loc))
+                continue;
+            if (last == null || !last.onSameContig(loc)) {
+                contigLocs = new ArrayList<GenomeLoc>();
+                splits.put(loc.getContig(), contigLocs);
+            }
+            contigLocs.add(loc);
+            last = loc;
+        }
+        return splits;
+    }
 }
--- a/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
+++ b/public/java/src/org/broadinstitute/sting/utils/sam/GATKSAMRecord.java
@ -261,7 +261,7 @@ public class GATKSAMRecord extends BAMRecord {
     * @return true if the read has no bases
     */
    public boolean isEmpty() {
-        return this.getReadLength() == 0;
+        return super.getReadBases() == null || super.getReadLength() == 0;
    }

    /**
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/CommonInfo.java
@ -25,8 +25,7 @@ final class CommonInfo {
    public CommonInfo(String name, double log10PError, Set<String> filters, Map<String, Object> attributes) {
        this.name = name;
        setLog10PError(log10PError);
-        if ( filters != null && ! filters.isEmpty() )
-            this.filters = filters;
+        this.filters = filters;
        if ( attributes != null && ! attributes.isEmpty() ) {
            this.attributes = attributes;
        }
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java
@ -25,7 +25,13 @@
 package org.broadinstitute.sting.utils.variantcontext;

 import org.broad.tribble.TribbleException;
+import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
+import org.broadinstitute.sting.utils.MathUtils;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
+import org.jgrapht.util.MathUtil;
+
+import java.util.EnumMap;
+import java.util.Map;

 public class GenotypeLikelihoods {
    public static final boolean CAP_PLS = false;
@ -94,6 +100,47 @@ public class GenotypeLikelihoods {
        return likelihoodsAsString_PLs;
    }

+    //Return genotype likelihoods as an EnumMap with Genotypes as keys and likelihoods as values
+    //Returns null in case of missing likelihoods
+    public EnumMap<Genotype.Type,Double> getAsMap(boolean normalizeFromLog10){
+        //Make sure that the log10likelihoods are set
+        double[] likelihoods = normalizeFromLog10 ? MathUtils.normalizeFromLog10(getAsVector()) : getAsVector();
+        if(likelihoods == null)
+            return null;
+        EnumMap<Genotype.Type,Double> likelihoodsMap = new EnumMap<Genotype.Type, Double>(Genotype.Type.class);
+        likelihoodsMap.put(Genotype.Type.HOM_REF,likelihoods[Genotype.Type.HOM_REF.ordinal()-1]);
+        likelihoodsMap.put(Genotype.Type.HET,likelihoods[Genotype.Type.HET.ordinal()-1]);
+        likelihoodsMap.put(Genotype.Type.HOM_VAR, likelihoods[Genotype.Type.HOM_VAR.ordinal() - 1]);
+        return likelihoodsMap;
+    }
+
+    //Return the neg log10 Genotype Quality (GQ) for the given genotype
+    //Returns Double.NEGATIVE_INFINITY in case of missing genotype
+    public double getLog10GQ(Genotype.Type genotype){
+        EnumMap<Genotype.Type,Double> likelihoods = getAsMap(false);
+        if(likelihoods == null)
+            return Double.NEGATIVE_INFINITY;
+
+        double qual = Double.NEGATIVE_INFINITY;
+        for(Map.Entry<Genotype.Type,Double> likelihood : likelihoods.entrySet()){
+            if(likelihood.getKey() == genotype)
+                continue;
+            if(likelihood.getValue() > qual)
+                qual = likelihood.getValue();
+        }
+
+        //Quality of the most likely genotype = likelihood(most likely) - likelihood (2nd best)
+        qual = likelihoods.get(genotype) - qual;
+
+        //Quality of other genotypes 1-P(G)
+        if (qual < 0) {
+            double[] normalized = MathUtils.normalizeFromLog10(getAsVector());
+            double chosenGenotype = normalized[genotype.ordinal()-1];
+            qual = Math.log10(1.0 - chosenGenotype);
+        }
+        return -1 * qual;
+    }
+
    private final static double[] parsePLsIntoLikelihoods(String likelihoodsAsString_PLs) {
        if ( !likelihoodsAsString_PLs.equals(VCFConstants.MISSING_VALUE_v4) ) {
            String[] strings = likelihoodsAsString_PLs.split(",");
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@ -318,7 +318,7 @@ public class VariantContext implements Feature { // to enable tribble intergrati
    public VariantContext subContextFromSamples(Set<String> sampleNames, Collection<Allele> alleles) {
        loadGenotypes();
        VariantContextBuilder builder = new VariantContextBuilder(this);
-        return builder.genotypes(genotypes.subsetToSamples(sampleNames)).make();
+        return builder.genotypes(genotypes.subsetToSamples(sampleNames)).alleles(alleles).make();
    }

    public VariantContext subContextFromSamples(Set<String> sampleNames) {
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/providers/LocusViewTemplate.java
@ -11,6 +11,7 @@ import org.broadinstitute.sting.gatk.executive.WindowMaker;
 import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
@ -49,7 +50,7 @@ public abstract class LocusViewTemplate extends BaseTest {
        SAMRecordIterator iterator = new SAMRecordIterator();

        GenomeLoc shardBounds = genomeLocParser.createGenomeLoc("chr1", 1, 5);
-        Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
+        Shard shard = new LocusShard(genomeLocParser, new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),Collections.singletonList(shardBounds),Collections.<SAMReaderID,SAMFileSpan>emptyMap());
        WindowMaker windowMaker = new WindowMaker(shard,genomeLocParser,iterator,shard.getGenomeLocs());
        WindowMaker.WindowMakerIterator window = windowMaker.next();
        LocusShardDataProvider dataProvider = new LocusShardDataProvider(shard, null, genomeLocParser, window.getLocus(), window, null, null);
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/MockLocusShard.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/MockLocusShard.java
@ -26,6 +26,7 @@ package org.broadinstitute.sting.gatk.datasources.reads;

 import org.broadinstitute.sting.gatk.datasources.reads.LocusShard;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -42,7 +43,7 @@ import java.util.Collections;
 public class MockLocusShard extends LocusShard {
    public MockLocusShard(final GenomeLocParser genomeLocParser,final List<GenomeLoc> intervals) {
        super(  genomeLocParser,
-                new SAMDataSource(Collections.<SAMReaderID>emptyList(),genomeLocParser),
+                new SAMDataSource(Collections.<SAMReaderID>emptyList(),new ThreadAllocation(),null,genomeLocParser),
                intervals,
                null);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMBAMDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMBAMDataSourceUnitTest.java
@ -1,223 +0,0 @@
-package org.broadinstitute.sting.gatk.datasources.reads;
-
-import static org.testng.Assert.fail;
-import net.sf.picard.reference.IndexedFastaSequenceFile;
-import net.sf.samtools.SAMRecord;
-import org.broadinstitute.sting.BaseTest;
-import org.broadinstitute.sting.commandline.Tags;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
-import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
-import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
-import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
-import org.broadinstitute.sting.utils.GenomeLocParser;
-import org.broadinstitute.sting.utils.GenomeLoc;
-import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.exceptions.UserException;
-import org.testng.annotations.AfterMethod;
-import org.testng.annotations.BeforeMethod;
-
-import org.testng.annotations.Test;
-
-import java.io.File;
-import java.io.FileNotFoundException;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- *
- * User: aaron
- * Date: Apr 8, 2009
- * Time: 8:14:23 PM
- *
- * The Broad Institute
- * SOFTWARE COPYRIGHT NOTICE AGREEMENT 
- * This software and its documentation are copyright 2009 by the
- * Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
- *
- * This software is supplied without any warranty or guaranteed support whatsoever. Neither
- * the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
- *
- */
-
-
-/**
- * @author aaron
- * @version 1.0
- * @date Apr 8, 2009
- * <p/>
- * Class SAMBAMDataSourceUnitTest
- * <p/>
- * The test of the SAMBAM simple data source.
- */
-public class SAMBAMDataSourceUnitTest extends BaseTest {
-
-    private List<SAMReaderID> readers;
-    private IndexedFastaSequenceFile seq;
-    private GenomeLocParser genomeLocParser;
-
-    /**
-     * This function does the setup of our parser, before each method call.
-     * <p/>
-     * Called before every test case method.
-     */
-    @BeforeMethod
-    public void doForEachTest() throws FileNotFoundException {
-        readers = new ArrayList<SAMReaderID>();
-
-        // sequence
-        seq = new CachingIndexedFastaSequenceFile(new File(hg18Reference));
-        genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
-    }
-
-    /**
-     * Tears down the test fixture after each call.
-     * <p/>
-     * Called after every test case method.
-     */
-    @AfterMethod
-    public void undoForEachTest() {
-        seq = null;
-        readers.clear();
-    }
-
-
-    /** Test out that we can shard the file and iterate over every read */
-    @Test
-    public void testLinearBreakIterateAll() {
-        logger.warn("Executing testLinearBreakIterateAll");
-
-        // setup the data
-        readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
-
-        // the sharding strat.
-        SAMDataSource data = new SAMDataSource(readers,genomeLocParser);
-        ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser);
-        int count = 0;
-
-        try {
-            for (Shard sh : strat) {
-                int readCount = 0;
-                count++;
-
-                GenomeLoc firstLocus = sh.getGenomeLocs().get(0), lastLocus = sh.getGenomeLocs().get(sh.getGenomeLocs().size()-1);
-                logger.debug("Start : " + firstLocus.getStart() + " stop : " + lastLocus.getStop() + " contig " + firstLocus.getContig());
-                logger.debug("count = " + count);
-                StingSAMIterator datum = data.seek(sh);
-
-                // for the first couple of shards make sure we can see the reads
-                if (count < 5) {
-                    for (SAMRecord r : datum) {
-                    }
-                    readCount++;
-                }
-                datum.close();
-
-                // if we're over 100 shards, break out
-                if (count > 100) {
-                    break;
-                }
-            }
-        }
-        catch (UserException.CouldNotReadInputFile e) {
-            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
-            fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
-        }
-    }
-
-
-    /** Test out that we can shard the file and iterate over every read */
-    @Test
-    public void testMergingTwoBAMFiles() {
-        logger.warn("Executing testMergingTwoBAMFiles");
-
-        // setup the test files
-        readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
-
-        // the sharding strat.
-        SAMDataSource data = new SAMDataSource(readers,genomeLocParser);
-        ShardStrategy strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000,genomeLocParser);
-
-        ArrayList<Integer> readcountPerShard = new ArrayList<Integer>();
-        ArrayList<Integer> readcountPerShard2 = new ArrayList<Integer>();
-
-        // count up the first hundred shards
-        int shardsToCount = 100;
-        int count = 0;
-
-        try {
-            for (Shard sh : strat) {
-                int readCount = 0;
-                count++;
-                if (count > shardsToCount) {
-                    break;
-                }
-
-                StingSAMIterator datum = data.seek(sh);
-
-                for (SAMRecord r : datum) {
-                    readCount++;
-
-                }
-                readcountPerShard.add(readCount);
-                logger.debug("read count = " + readCount);
-                datum.close();
-            }
-        }
-        catch (UserException.CouldNotReadInputFile e) {
-            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
-            fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
-        }
-
-
-        // setup the data and the counter before our second run
-        readers.clear();
-        readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
-        readers.add(new SAMReaderID(new File(validationDataLocation + "/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
-
-        count = 0;
-        // the sharding strat.
-        data = new SAMDataSource(readers,genomeLocParser);
-        strat = ShardStrategyFactory.shatter(data,seq,ShardStrategyFactory.SHATTER_STRATEGY.LOCUS_EXPERIMENTAL, seq.getSequenceDictionary(), 100000, genomeLocParser);
-
-        logger.debug("Pile two:");
-        try {
-            for (Shard sh : strat) {
-                int readCount = 0;
-                count++;
-
-                // can we leave?
-                if (count > shardsToCount) {
-                    break;
-                }
-
-                StingSAMIterator datum = data.seek(sh);
-
-                for (SAMRecord r : datum) {
-                    readCount++;
-                }
-
-                readcountPerShard2.add(readCount);
-                logger.debug("read count = " + readCount);
-                datum.close();
-            }
-        }
-        catch (UserException.CouldNotReadInputFile e) {
-            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
-            fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
-        }
-
-        /*int pos = 0;
-        for (; pos < 100; pos++) {
-            if (!readcountPerShard.get(pos).equals(readcountPerShard2.get(pos))) {
-                fail("Shard number " + pos + " in the two approaches had different read counts, " + readcountPerShard.get(pos) + " and " + readcountPerShard2.get(pos));
-            }
-        } */
-
-    }
-
-
-
-
-}
--- a/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/datasources/reads/SAMDataSourceUnitTest.java
@ -0,0 +1,147 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import static org.testng.Assert.fail;
+import net.sf.picard.reference.IndexedFastaSequenceFile;
+import net.sf.samtools.SAMFileReader;
+import net.sf.samtools.SAMRecord;
+import org.broadinstitute.sting.BaseTest;
+import org.broadinstitute.sting.commandline.Tags;
+import org.broadinstitute.sting.gatk.arguments.ValidationExclusion;
+import org.broadinstitute.sting.gatk.filters.ReadFilter;
+import org.broadinstitute.sting.gatk.iterators.StingSAMIterator;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
+import org.broadinstitute.sting.utils.exceptions.UserException;
+import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeMethod;
+
+import org.testng.annotations.Test;
+
+import java.io.File;
+import java.io.FileNotFoundException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ * @author aaron
+ * @version 1.0
+ * @date Apr 8, 2009
+ * <p/>
+ * Class SAMDataSourceUnitTest
+ * <p/>
+ * The test of the SAMBAM simple data source.
+ */
+public class SAMDataSourceUnitTest extends BaseTest {
+
+    private List<SAMReaderID> readers;
+    private IndexedFastaSequenceFile seq;
+    private GenomeLocParser genomeLocParser;
+
+    /**
+     * This function does the setup of our parser, before each method call.
+     * <p/>
+     * Called before every test case method.
+     */
+    @BeforeMethod
+    public void doForEachTest() throws FileNotFoundException {
+        readers = new ArrayList<SAMReaderID>();
+
+        // sequence
+        seq = new CachingIndexedFastaSequenceFile(new File(b36KGReference));
+        genomeLocParser = new GenomeLocParser(seq.getSequenceDictionary());
+    }
+
+    /**
+     * Tears down the test fixture after each call.
+     * <p/>
+     * Called after every test case method.
+     */
+    @AfterMethod
+    public void undoForEachTest() {
+        seq = null;
+        readers.clear();
+    }
+
+
+    /** Test out that we can shard the file and iterate over every read */
+    @Test
+    public void testLinearBreakIterateAll() {
+        logger.warn("Executing testLinearBreakIterateAll");
+
+        // setup the data
+        readers.add(new SAMReaderID(new File(validationDataLocation+"/NA12878.chrom6.SLX.SRP000032.2009_06.selected.bam"),new Tags()));
+
+        // the sharding strat.
+        SAMDataSource data = new SAMDataSource(readers,
+                new ThreadAllocation(),
+                null,
+                genomeLocParser,
+                false,
+                SAMFileReader.ValidationStringency.SILENT,
+                null,
+                null,
+                new ValidationExclusion(),
+                new ArrayList<ReadFilter>(),
+                false,
+                false);
+
+        Iterable<Shard> strat = data.createShardIteratorOverMappedReads(seq.getSequenceDictionary(),new LocusShardBalancer());
+        int count = 0;
+
+        try {
+            for (Shard sh : strat) {
+                int readCount = 0;
+                count++;
+
+                GenomeLoc firstLocus = sh.getGenomeLocs().get(0), lastLocus = sh.getGenomeLocs().get(sh.getGenomeLocs().size()-1);
+                logger.debug("Start : " + firstLocus.getStart() + " stop : " + lastLocus.getStop() + " contig " + firstLocus.getContig());
+                logger.debug("count = " + count);
+                StingSAMIterator datum = data.seek(sh);
+
+                // for the first couple of shards make sure we can see the reads
+                if (count < 5) {
+                    for (SAMRecord r : datum) {
+                    }
+                    readCount++;
+                }
+                datum.close();
+
+                // if we're over 100 shards, break out
+                if (count > 100) {
+                    break;
+                }
+            }
+        }
+        catch (UserException.CouldNotReadInputFile e) {
+            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
+            fail("testLinearBreakIterateAll: We Should get a UserException.CouldNotReadInputFile exception");
+        }
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/refdata/RefMetaDataTrackerUnitTest.java
@ -40,6 +40,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.variantcontext.Allele;
 import org.broadinstitute.sting.utils.variantcontext.VariantContext;
+import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
 import org.testng.Assert;
 import org.testng.annotations.*;
 import java.util.*;
@ -66,9 +67,9 @@ public class RefMetaDataTrackerUnitTest {
        C = Allele.create("C");
        G = Allele.create("G");
        T = Allele.create("T");
-        AC_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, C).make());
-        AG_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, G).make());
-        AT_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, T).make());
+        AC_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, C)).make();
+        AG_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, G)).make();
+        AT_SNP = new VariantContextBuilder("x", "chr1", START_POS, START_POS, Arrays.asList(A, T)).make();
        span10_10 = makeSpan(10, 10);
        span1_20 = makeSpan(1, 20);
        span10_20 = makeSpan(10, 20);
--- a/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/traversals/TraverseReadsUnitTest.java
@ -5,14 +5,13 @@ import net.sf.picard.reference.IndexedFastaSequenceFile;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.commandline.Tags;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
-import org.broadinstitute.sting.gatk.ReadMetrics;
 import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
 import org.broadinstitute.sting.gatk.datasources.providers.ReadShardDataProvider;
+import org.broadinstitute.sting.gatk.datasources.reads.ReadShardBalancer;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.reads.Shard;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategy;
-import org.broadinstitute.sting.gatk.datasources.reads.ShardStrategyFactory;
 import org.broadinstitute.sting.gatk.datasources.reads.SAMReaderID;
+import org.broadinstitute.sting.gatk.resourcemanagement.ThreadAllocation;
 import org.broadinstitute.sting.gatk.walkers.qc.CountReadsWalker;
 import org.broadinstitute.sting.gatk.walkers.Walker;
 import org.broadinstitute.sting.utils.GenomeLocParser;
@ -66,7 +65,6 @@ public class TraverseReadsUnitTest extends BaseTest {
    private List<SAMReaderID> bamList;
    private Walker countReadWalker;
    private File output;
-    private long readSize = 100000;
    private TraverseReads traversalEngine = null;

    private IndexedFastaSequenceFile ref = null;
@ -117,18 +115,14 @@ public class TraverseReadsUnitTest extends BaseTest {
    /** Test out that we can shard the file and iterate over every read */
    @Test
    public void testUnmappedReadCount() {
-        SAMDataSource dataSource = new SAMDataSource(bamList,genomeLocParser);
-        ShardStrategy shardStrategy = ShardStrategyFactory.shatter(dataSource,ref, ShardStrategyFactory.SHATTER_STRATEGY.READS_EXPERIMENTAL,
-                ref.getSequenceDictionary(),
-                readSize,
-                genomeLocParser);
+        SAMDataSource dataSource = new SAMDataSource(bamList,new ThreadAllocation(),null,genomeLocParser);
+        Iterable<Shard> shardStrategy = dataSource.createShardIteratorOverAllReads(new ReadShardBalancer());

        countReadWalker.initialize();
        Object accumulator = countReadWalker.reduceInit();

-        while (shardStrategy.hasNext()) {
+        for(Shard shard: shardStrategy) {
            traversalEngine.startTimersIfNecessary();
-            Shard shard = shardStrategy.next();

            if (shard == null) {
                fail("Shard == null");
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
@ -33,7 +33,7 @@ public class SnpEffUnitTest {
    @Test
    public void testParseWellFormedEffect() {
        String effectName = "NON_SYNONYMOUS_CODING";
-        String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+        String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
        Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
@ -42,7 +42,7 @@ public class SnpEffUnitTest {
    @Test
    public void testParseInvalidEffectNameEffect() {
        String effectName = "MADE_UP_EFFECT";
-        String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+        String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
        Assert.assertFalse(effect.isWellFormed());
@ -51,7 +51,7 @@ public class SnpEffUnitTest {
    @Test
    public void testParseInvalidEffectImpactEffect() {
        String effectName = "NON_SYNONYMOUS_CODING";
-        String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+        String[] effectMetadata = { "MEDIUM", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
        Assert.assertFalse(effect.isWellFormed());
@ -60,27 +60,27 @@ public class SnpEffUnitTest {
    @Test
    public void testParseWrongNumberOfMetadataFieldsEffect() {
        String effectName = "NON_SYNONYMOUS_CODING";
-        String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
+        String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
        Assert.assertFalse(effect.isWellFormed());
    }

    @Test
-    public void testParseSnpEffWarningEffect() {
+    public void testParseSnpEffOneWarningOrErrorEffect() {
        String effectName = "NON_SYNONYMOUS_CODING";
-        String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
+        String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_OR_ERROR_TEXT" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
-        Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
+        Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning or error: \"SNPEFF_WARNING_OR_ERROR_TEXT\"") );
    }

    @Test
-    public void testParseSnpEffErrorEffect() {
+    public void testParseSnpEffBothWarningAndErrorEffect() {
        String effectName = "NON_SYNONYMOUS_CODING";
-        String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
+        String[] effectMetadata = { "MODERATE", "MISSENSE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING_TEXT", "SNPEFF_ERROR_TEXT" };

        SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
-        Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
+        Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: \"SNPEFF_WARNING_TEXT\", and the following error: \"SNPEFF_ERROR_TEXT\"") );
    }
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@ -32,7 +32,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("a6687f0d3830fa6e518b7874857f6f70"));
+                Arrays.asList("9beb795536e95954f810835c6058f2ad"));
        executeTest("test file has annotations, asking for annotations, #1", spec);
    }

@ -40,7 +40,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testHasAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("64b6804cb1e27826e3a47089349be581"));
+                Arrays.asList("2977bb30c8b84a5f4094fe6090658561"));
        executeTest("test file has annotations, asking for annotations, #2", spec);
    }

@ -64,7 +64,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking1() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("b59508cf66da6b2de280a79b3b7d85b1"));
+                Arrays.asList("49d989f467b8d6d8f98f7c1b67cd4a05"));
        executeTest("test file doesn't have annotations, asking for annotations, #1", spec);
    }

@ -72,7 +72,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoAnnotsAsking2() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,000,000-10,050,000", 1,
-                Arrays.asList("09f8e840770a9411ff77508e0ed0837f"));
+                Arrays.asList("0948cd1dba7d61f283cc4cf2a7757d92"));
        executeTest("test file doesn't have annotations, asking for annotations, #2", spec);
    }

@ -80,7 +80,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testExcludeAnnotations() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard -XA FisherStrand -XA ReadPosRankSumTest --variant:VCF3 " + validationDataLocation + "vcfexample2empty.vcf -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -L 1:10,020,000-10,021,000", 1,
-                Arrays.asList("b8e18b23568e4d2381f51d4430213040"));
+                Arrays.asList("33062eccd6eb73bc49440365430454c4"));
        executeTest("test exclude annotations", spec);
    }

@ -88,7 +88,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testOverwritingHeader() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample4.vcf -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -L 1:10,001,292", 1,
-                Arrays.asList("78d2c19f8107d865970dbaf3e12edd92"));
+                Arrays.asList("062155edec46a8c52243475fbf3a2943"));
        executeTest("test overwriting header", spec);
    }

@ -96,7 +96,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testNoReads() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
-                Arrays.asList("16e3a1403fc376320d7c69492cad9345"));
+                Arrays.asList("06635f2dd91b539bfbce9bf7914d8e43"));
        executeTest("not passing it any reads", spec);
    }

@ -104,7 +104,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testDBTagWithDbsnp() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --dbsnp " + b36dbSNP129 + " -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
-                Arrays.asList("3da8ca2b6bdaf6e92d94a8c77a71313d"));
+                Arrays.asList("820eeba1f6e3a0758a69d937c524a38e"));
        executeTest("getting DB tag with dbSNP", spec);
    }

@ -112,7 +112,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testDBTagWithHapMap() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --comp:H3 " + validationDataLocation + "fakeHM3.vcf -G Standard --variant " + validationDataLocation + "vcfexample3empty.vcf -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
-                Arrays.asList("1bc01c5b3bd0b7aef75230310c3ce688"));
+                Arrays.asList("31cc2ce157dd20771418c08d6b3be1fa"));
        executeTest("getting DB tag with HM3", spec);
    }

@ -120,7 +120,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testUsingExpression() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.AF -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
-                Arrays.asList("ae30a1ac7bfbc3d22a327f8b689cad31"));
+                Arrays.asList("074865f8f8c0ca7bfd58681f396c49e9"));
        executeTest("using expression", spec);
    }

@ -128,7 +128,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
    public void testUsingExpressionWithID() {
        WalkerTestSpec spec = new WalkerTestSpec(
                baseTestString() + " --resource:foo " + validationDataLocation + "targetAnnotations.vcf -G Standard --variant:VCF3 " + validationDataLocation + "vcfexample3empty.vcf -E foo.ID -L " + validationDataLocation + "vcfexample3empty.vcf", 1,
-                Arrays.asList("1b4921085b26cbfe07d53b7c947de1e5"));
+                Arrays.asList("97b26db8135d083566fb585a677fbe8a"));
        executeTest("using expression with ID", spec);
    }

@ -148,9 +148,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
        WalkerTestSpec spec = new WalkerTestSpec(
            "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
            validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile  " + validationDataLocation +
-            "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
+            "snpEff2.0.4.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429",
            1,
-            Arrays.asList("122321a85e448f21679f6ca15c5e22ad")
+            Arrays.asList("51258f5c880bd1ca3eb45a1711335c66")
        );
        executeTest("Testing SnpEff annotations", spec);
    }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperIntegrationTest.java
@ -5,7 +5,6 @@ import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.exceptions.UserException;
 import org.testng.annotations.Test;

-import java.io.File;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/phasing/PhaseByTransmissionIntegrationTest.java
@ -6,23 +6,131 @@ import org.testng.annotations.Test;
 import java.util.Arrays;

 public class PhaseByTransmissionIntegrationTest extends WalkerTest {
-    private static String phaseByTransmissionTestDataRoot = validationDataLocation + "/PhaseByTransmission";
-    private static String fundamentalTestVCF = phaseByTransmissionTestDataRoot + "/" + "FundamentalsTest.unfiltered.vcf";
+    private static String phaseByTransmissionTestDataRoot = validationDataLocation + "PhaseByTransmission/";
+    private static String goodFamilyFile =  phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.goodFamilies.ped";
+    private static String TNTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TN.vcf";
+    private static String TPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.TP.vcf";
+    private static String FPTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.FP.vcf";
+    private static String SpecialTest = phaseByTransmissionTestDataRoot + "PhaseByTransmission.IntegrationTest.Special.vcf";

+    //Tests using PbT on all genotypes with default parameters
+    //And all reporting options
    @Test
-    public void testBasicFunctionality() {
+    public void testTrueNegativeMV() {
        WalkerTestSpec spec = new WalkerTestSpec(
                buildCommandLine(
                        "-T PhaseByTransmission",
                        "-NO_HEADER",
                        "-R " + b37KGReference,
-                        "--variant " + fundamentalTestVCF,
-                        "-f NA12892+NA12891=NA12878",
+                        "--variant " + TNTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
+                        "-mvf %s",
+                        "-o %s"
+                ),
+                2,
+                Arrays.asList("16fefda693156eadf1481fd9de23facb","9418a7a6405b78179ca13a67b8bfcc14")
+        );
+        executeTest("testTrueNegativeMV", spec);
+    }
+
+    @Test
+    public void testTruePositiveMV() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T PhaseByTransmission",
+                        "-NO_HEADER",
+                        "-R " + b37KGReference,
+                        "--variant " + TPTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
+                        "-mvf %s",
+                        "-o %s"
+                ),
+                2,
+                Arrays.asList("14cf1d21a54d8b9fb506df178b634c56","efc66ae3d036715b721f9bd35b65d556")
+        );
+        executeTest("testTruePositiveMV", spec);
+    }
+
+    @Test
+    public void testFalsePositiveMV() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T PhaseByTransmission",
+                        "-NO_HEADER",
+                        "-R " + b37KGReference,
+                        "--variant " + FPTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
+                        "-mvf %s",
+                        "-o %s"
+                ),
+                2,
+                Arrays.asList("f9b0fae9fe1e0f09b883a292b0e70a12","398724bc1e65314cc5ee92706e05a3ee")
+        );
+        executeTest("testFalsePositiveMV", spec);
+    }
+
+    @Test
+    public void testSpecialCases() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T PhaseByTransmission",
+                        "-NO_HEADER",
+                        "-R " + b37KGReference,
+                        "--variant " + SpecialTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
+                        "-mvf %s",
+                        "-o %s"
+                ),
+                2,
+                Arrays.asList("b8d1aa3789ce77b45430c62d13ee3006","a1a333e08fafb288cda0e7711909e1c3")
+        );
+        executeTest("testSpecialCases", spec);
+    }
+
+    //Test using a different prior
+    //Here the FP file is used but as the prior is lowered, 3 turn to TP
+    @Test
+    public void testPriorOption() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T PhaseByTransmission",
+                        "-NO_HEADER",
+                        "-R " + b37KGReference,
+                        "--variant " + FPTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
+                        "-prior 1e-4",
+                        "-mvf %s",
+                        "-o %s"
+                ),
+                2,
+                Arrays.asList("7201ce7cc47db5840ac6b647709f7c33","c11b5e7cd7459d90d0160f917eff3b1e")
+        );
+        executeTest("testPriorOption", spec);
+    }
+
+    //Test when running without MV reporting option
+    //This is the exact same test file as FP but should not generate a .mvf file
+    @Test
+    public void testMVFileOption() {
+        WalkerTestSpec spec = new WalkerTestSpec(
+                buildCommandLine(
+                        "-T PhaseByTransmission",
+                        "-NO_HEADER",
+                        "-R " + b37KGReference,
+                        "--variant " + FPTest,
+                        "-ped "+ goodFamilyFile,
+                        "-L 1:10109-10315",
                        "-o %s"
                ),
                1,
-                Arrays.asList("")
+                Arrays.asList("398724bc1e65314cc5ee92706e05a3ee")
        );
-        executeTest("testBasicFunctionality", spec);
+        executeTest("testMVFileOption", spec);
    }
+
 }
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@ -21,16 +21,16 @@ public class VariantEvalIntegrationTest extends WalkerTest {
                                        "-T VariantEval",
                                        "-R " + b37KGReference,
                                        "--dbsnp " + b37dbSNP132,
-                                        "--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
+                                        "--eval " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
                                        "-noEV",
                                        "-EV TiTvVariantEvaluator",
                                        "-noST",
                                        "-ST FunctionalClass",
-                                        "-L " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
+                                        "-L " + validationDataLocation + "snpEff2.0.4.AFR.unfiltered.VariantAnnotator.output.vcf",
                                        "-o %s"
                                ),
                                1,
-                                Arrays.asList("d9dcb352c53106f54fcc981f15d35a90")
+                                Arrays.asList("a36414421621b377d6146d58d2fcecd0")
                              );
        executeTest("testFunctionClassWithSnpeff", spec);
    }
--- a/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/GenomeLocParserUnitTest.java
@ -2,7 +2,6 @@ package org.broadinstitute.sting.utils;


 import net.sf.samtools.SAMFileHeader;
-import net.sf.samtools.SAMSequenceDictionary;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
 import org.broadinstitute.sting.utils.exceptions.UserException;
@ -11,6 +10,7 @@ import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertTrue;
 import org.testng.annotations.BeforeClass;
+import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;

 /**
@ -36,7 +36,6 @@ public class GenomeLocParserUnitTest extends BaseTest {

    @Test
    public void testGetContigIndexValid() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
        assertEquals(genomeLocParser.getContigIndex("chr1"), 0); // should be in the reference
    }

@ -67,7 +66,6 @@ public class GenomeLocParserUnitTest extends BaseTest {

    @Test
    public void testGetContigInfoKnownContig() {
-        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 10);
        assertEquals(0, "chr1".compareTo(genomeLocParser.getContigInfo("chr1").getSequenceName())); // should be in the reference
    }

@ -191,4 +189,104 @@ public class GenomeLocParserUnitTest extends BaseTest {
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",1,-2)); // bad stop
        assertTrue(!genomeLocParser.isValidGenomeLoc("chr1",10,11)); // bad start, past end
    }
+
+    private static class FlankingGenomeLocTestData extends TestDataProvider {
+        final GenomeLocParser parser;
+        final int basePairs;
+        final GenomeLoc original, flankStart, flankStop;
+
+        private FlankingGenomeLocTestData(String name, GenomeLocParser parser, int basePairs, String original, String flankStart, String flankStop) {
+            super(FlankingGenomeLocTestData.class, name);
+            this.parser = parser;
+            this.basePairs = basePairs;
+            this.original = parse(parser, original);
+            this.flankStart = flankStart == null ? null : parse(parser, flankStart);
+            this.flankStop = flankStop == null ? null : parse(parser, flankStop);
+        }
+
+        private static GenomeLoc parse(GenomeLocParser parser, String str) {
+            return "unmapped".equals(str) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(str);
+        }
+    }
+
+    @DataProvider(name = "flankingGenomeLocs")
+    public Object[][] getFlankingGenomeLocs() {
+        int contigLength = 10000;
+        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, contigLength);
+        GenomeLocParser parser = new GenomeLocParser(header.getSequenceDictionary());
+
+        new FlankingGenomeLocTestData("atStartBase1", parser, 1,
+                "chr1:1", null, "chr1:2");
+
+        new FlankingGenomeLocTestData("atStartBase50", parser, 50,
+                "chr1:1", null, "chr1:2-51");
+
+        new FlankingGenomeLocTestData("atStartRange50", parser, 50,
+                "chr1:1-10", null, "chr1:11-60");
+
+        new FlankingGenomeLocTestData("atEndBase1", parser, 1,
+                "chr1:" + contigLength, "chr1:" + (contigLength - 1), null);
+
+        new FlankingGenomeLocTestData("atEndBase50", parser, 50,
+                "chr1:" + contigLength, String.format("chr1:%d-%d", contigLength - 50, contigLength - 1), null);
+
+        new FlankingGenomeLocTestData("atEndRange50", parser, 50,
+                String.format("chr1:%d-%d", contigLength - 10, contigLength),
+                String.format("chr1:%d-%d", contigLength - 60, contigLength - 11),
+                null);
+
+        new FlankingGenomeLocTestData("nearStartBase1", parser, 1,
+                "chr1:2", "chr1:1", "chr1:3");
+
+        new FlankingGenomeLocTestData("nearStartRange50", parser, 50,
+                "chr1:21-30", "chr1:1-20", "chr1:31-80");
+
+        new FlankingGenomeLocTestData("nearEndBase1", parser, 1,
+                "chr1:" + (contigLength - 1), "chr1:" + (contigLength - 2), "chr1:" + contigLength);
+
+        new FlankingGenomeLocTestData("nearEndRange50", parser, 50,
+                String.format("chr1:%d-%d", contigLength - 30, contigLength - 21),
+                String.format("chr1:%d-%d", contigLength - 80, contigLength - 31),
+                String.format("chr1:%d-%d", contigLength - 20, contigLength));
+
+        new FlankingGenomeLocTestData("beyondStartBase1", parser, 1,
+                "chr1:3", "chr1:2", "chr1:4");
+
+        new FlankingGenomeLocTestData("beyondStartRange50", parser, 50,
+                "chr1:101-200", "chr1:51-100", "chr1:201-250");
+
+        new FlankingGenomeLocTestData("beyondEndBase1", parser, 1,
+                "chr1:" + (contigLength - 3),
+                "chr1:" + (contigLength - 4),
+                "chr1:" + (contigLength - 2));
+
+        new FlankingGenomeLocTestData("beyondEndRange50", parser, 50,
+                String.format("chr1:%d-%d", contigLength - 200, contigLength - 101),
+                String.format("chr1:%d-%d", contigLength - 250, contigLength - 201),
+                String.format("chr1:%d-%d", contigLength - 100, contigLength - 51));
+
+        new FlankingGenomeLocTestData("unmapped", parser, 50,
+                "unmapped", null, null);
+
+        new FlankingGenomeLocTestData("fullContig", parser, 50,
+                "chr1", null, null);
+
+        return FlankingGenomeLocTestData.getTests(FlankingGenomeLocTestData.class);
+    }
+
+    @Test(dataProvider = "flankingGenomeLocs")
+    public void testCreateGenomeLocAtStart(FlankingGenomeLocTestData data) {
+        GenomeLoc actual = data.parser.createGenomeLocAtStart(data.original, data.basePairs);
+        String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
+                data.toString(), data.original, actual, data.flankStart);
+        assertEquals(actual, data.flankStart, description);
+    }
+
+    @Test(dataProvider = "flankingGenomeLocs")
+    public void testCreateGenomeLocAtStop(FlankingGenomeLocTestData data) {
+        GenomeLoc actual = data.parser.createGenomeLocAtStop(data.original, data.basePairs);
+        String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
+                data.toString(), data.original, actual, data.flankStop);
+        assertEquals(actual, data.flankStop, description);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/SimpleTimerUnitTest.java
@ -41,11 +41,6 @@ public class SimpleTimerUnitTest extends BaseTest {
        double t6 = t.getElapsedTime();
        Assert.assertTrue(t5 >= t4, "Restarted timer elapsed time should be after elapsed time preceding the restart");
        Assert.assertTrue(t6 >= t5, "Second elapsed time not after the first in restarted timer");
-
-        t.stop().start();
-        Assert.assertTrue(t.isRunning(), "second started timer isn't running");
-        Assert.assertTrue(t.getElapsedTime() >= 0.0, "elapsed time should have been reset");
-        Assert.assertTrue(t.getElapsedTime() < t6, "elapsed time isn't less than time before start call"); // we should have effective no elapsed time
    }

    private final static void idleLoop() {
--- a/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/clipreads/ReadClipperUnitTest.java
@ -30,8 +30,10 @@ import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.sam.ArtificialSAMUtils;
 import org.broadinstitute.sting.utils.sam.GATKSAMRecord;
 import org.testng.Assert;
-import org.testng.annotations.BeforeClass;
-import org.testng.annotations.Test;
+import org.testng.annotations.*;
+
+import java.util.LinkedList;
+import java.util.List;

 /**
 * Created by IntelliJ IDEA.
@ -44,180 +46,214 @@ public class ReadClipperUnitTest extends BaseTest {

    // TODO: Add error messages on failed tests

+    //int debug = 0;
+
    GATKSAMRecord read, expected;
    ReadClipper readClipper;
    final static String BASES = "ACTG";
    final static String QUALS = "!+5?"; //ASCII values = 33,43,53,63

-    @BeforeClass
+
+    public void testIfEqual( GATKSAMRecord read, byte[] readBases, String baseQuals, String cigar) {
+        Assert.assertEquals(read.getReadBases(), readBases);
+        Assert.assertEquals(read.getBaseQualityString(), baseQuals);
+        Assert.assertEquals(read.getCigarString(), cigar);
+    }
+
+    public class testParameter {
+        int inputStart;
+        int inputStop;
+        int substringStart;
+        int substringStop;
+        String cigar;
+
+        public testParameter(int InputStart, int InputStop, int SubstringStart, int SubstringStop, String Cigar) {
+            inputStart = InputStart;
+            inputStop = InputStop;
+            substringStart = SubstringStart;
+            substringStop = SubstringStop;
+            cigar = Cigar;
+            }
+    }
+
+    // What the test read looks like
+    // Ref:    1 2 3 4 5 6 7 8
+    // Read:   0 1 2 3 - - - -
+    // -----------------------------
+    // Bases:  A C T G - - - -
+    // Quals:  ! + 5 ? - - - -
+
+    @BeforeMethod
    public void init() {
        SAMFileHeader header = ArtificialSAMUtils.createArtificialSamHeader(1, 1, 1000);
        read = ArtificialSAMUtils.createArtificialRead(header, "read1", 0, 1, BASES.length());
-        read.setReadUnmappedFlag(true);
        read.setReadBases(new String(BASES).getBytes());
        read.setBaseQualityString(new String(QUALS));

        readClipper = new ReadClipper(read);
+        //logger.warn(read.getCigarString());
    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )
    public void testHardClipBothEndsByReferenceCoordinates() {
-        logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");

+        logger.warn("Executing testHardClipBothEndsByReferenceCoordinates");
+        //int debug = 1;
        //Clip whole read
-        Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(0,0), new GATKSAMRecord(read.getHeader()));
+        Assert.assertEquals(readClipper.hardClipBothEndsByReferenceCoordinates(1,1), new GATKSAMRecord(read.getHeader()));
+
        //clip 1 base
-        expected = readClipper.hardClipBothEndsByReferenceCoordinates(0,3);
+        expected = readClipper.hardClipBothEndsByReferenceCoordinates(1,4);
        Assert.assertEquals(expected.getReadBases(), BASES.substring(1,3).getBytes());
        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,3));
        Assert.assertEquals(expected.getCigarString(), "1H2M1H");

    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )
    public void testHardClipByReadCoordinates() {
+
        logger.warn("Executing testHardClipByReadCoordinates");

        //Clip whole read
        Assert.assertEquals(readClipper.hardClipByReadCoordinates(0,3), new GATKSAMRecord(read.getHeader()));

-        //clip 1 base at start
-        expected = readClipper.hardClipByReadCoordinates(0,0);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
-        Assert.assertEquals(expected.getCigarString(), "1H3M");
+        List<testParameter> testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(0,0,1,4,"1H3M"));//clip 1 base at start
+        testList.add(new testParameter(3,3,0,3,"3M1H"));//clip 1 base at end
+        testList.add(new testParameter(0,1,2,4,"2H2M"));//clip 2 bases at start
+        testList.add(new testParameter(2,3,0,2,"2M2H"));//clip 2 bases at end

-        //clip 1 base at end
-        expected = readClipper.hardClipByReadCoordinates(3,3);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
-        Assert.assertEquals(expected.getCigarString(), "3M1H");
-
-        //clip 2 bases at start
-        expected = readClipper.hardClipByReadCoordinates(0,1);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
-        Assert.assertEquals(expected.getCigarString(), "2H2M");
-
-        //clip 2 bases at end
-        expected = readClipper.hardClipByReadCoordinates(2,3);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
-        Assert.assertEquals(expected.getCigarString(), "2M2H");
+        for ( testParameter p : testList ) {
+            init();
+            //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipByReadCoordinates(p.inputStart, p.inputStop),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }

    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )
    public void testHardClipByReferenceCoordinates() {
        logger.warn("Executing testHardClipByReferenceCoordinates");
-
+        //logger.warn(debug);
        //Clip whole read
        Assert.assertEquals(readClipper.hardClipByReferenceCoordinates(1,4), new GATKSAMRecord(read.getHeader()));

-        //clip 1 base at start
-        expected = readClipper.hardClipByReferenceCoordinates(-1,1);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
-        Assert.assertEquals(expected.getCigarString(), "1H3M");
+        List<testParameter> testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(-1,1,1,4,"1H3M"));//clip 1 base at start
+        testList.add(new testParameter(4,-1,0,3,"3M1H"));//clip 1 base at end
+        testList.add(new testParameter(-1,2,2,4,"2H2M"));//clip 2 bases at start
+        testList.add(new testParameter(3,-1,0,2,"2M2H"));//clip 2 bases at end

-        //clip 1 base at end
-        expected = readClipper.hardClipByReferenceCoordinates(3,-1);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
-        Assert.assertEquals(expected.getCigarString(), "3M1H");
-
-        //clip 2 bases at start
-        expected = readClipper.hardClipByReferenceCoordinates(-1,2);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
-        Assert.assertEquals(expected.getCigarString(), "2H2M");
-
-        //clip 2 bases at end
-        expected = readClipper.hardClipByReferenceCoordinates(2,-1);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
-        Assert.assertEquals(expected.getCigarString(), "2M2H");
+        for ( testParameter p : testList ) {
+            init();
+            //logger.warn("Testing Parameters: " + p.inputStart+","+p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipByReferenceCoordinates(p.inputStart,p.inputStop),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }

    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )
    public void testHardClipByReferenceCoordinatesLeftTail() {
+        init();
        logger.warn("Executing testHardClipByReferenceCoordinatesLeftTail");

        //Clip whole read
        Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesLeftTail(4), new GATKSAMRecord(read.getHeader()));

-        //clip 1 base at start
-        expected = readClipper.hardClipByReferenceCoordinatesLeftTail(1);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
-        Assert.assertEquals(expected.getCigarString(), "1H3M");
+        List<testParameter> testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(1, -1, 1, 4, "1H3M"));//clip 1 base at start
+        testList.add(new testParameter(2, -1, 2, 4, "2H2M"));//clip 2 bases at start

-        //clip 2 bases at start
-        expected = readClipper.hardClipByReferenceCoordinatesLeftTail(2);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
-        Assert.assertEquals(expected.getCigarString(), "2H2M");
+        for ( testParameter p : testList ) {
+            init();
+            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipByReferenceCoordinatesLeftTail(p.inputStart),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }

    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )
    public void testHardClipByReferenceCoordinatesRightTail() {
+        init();
        logger.warn("Executing testHardClipByReferenceCoordinatesRightTail");

        //Clip whole read
        Assert.assertEquals(readClipper.hardClipByReferenceCoordinatesRightTail(1), new GATKSAMRecord(read.getHeader()));

-        //clip 1 base at end
-        expected = readClipper.hardClipByReferenceCoordinatesRightTail(3);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
-        Assert.assertEquals(expected.getCigarString(), "3M1H");
+        List<testParameter> testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(-1, 4, 0, 3, "3M1H"));//clip 1 base at end
+        testList.add(new testParameter(-1, 3, 0, 2, "2M2H"));//clip 2 bases at end

-        //clip 2 bases at end
-        expected = readClipper.hardClipByReferenceCoordinatesRightTail(2);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
-        Assert.assertEquals(expected.getCigarString(), "2M2H");
+        for ( testParameter p : testList ) {
+            init();
+            //logger.warn("Testing Parameters: " + p.inputStop+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipByReferenceCoordinatesRightTail(p.inputStop),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }

    }

-    @Test ( enabled = false )
+    @Test ( enabled = true )  // TODO This function is returning null reads
    public void testHardClipLowQualEnds() {
-        logger.warn("Executing testHardClipByReferenceCoordinates");

+        logger.warn("Executing testHardClipByReferenceCoordinates");

        //Clip whole read
        Assert.assertEquals(readClipper.hardClipLowQualEnds((byte)64), new GATKSAMRecord(read.getHeader()));

-        //clip 1 base at start
-        expected = readClipper.hardClipLowQualEnds((byte)34);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(1,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(1,4));
-        Assert.assertEquals(expected.getCigarString(), "1H3M");
-
-        //clip 2 bases at start
-        expected = readClipper.hardClipLowQualEnds((byte)44);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(2,4).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(2,4));
-        Assert.assertEquals(expected.getCigarString(), "2H2M");
+        List<testParameter> testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(1,-1,1,4,"1H3M"));//clip 1 base at start
+        testList.add(new testParameter(11,-1,2,4,"2H2M"));//clip 2 bases at start

+        for ( testParameter p : testList ) {
+            init();
+            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }
+     /*      todo find a better way to test lowqual tail clipping on both sides
        // Reverse Quals sequence
        readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33

-        //clip 1 base at end
-        expected = readClipper.hardClipLowQualEnds((byte)34);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,3).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,3));
-        Assert.assertEquals(expected.getCigarString(), "3M1H");
+        testList = new LinkedList<testParameter>();
+        testList.add(new testParameter(1,-1,0,3,"3M1H"));//clip 1 base at end
+        testList.add(new testParameter(11,-1,0,2,"2M2H"));//clip 2 bases at end

-        //clip 2 bases at end
-        expected = readClipper.hardClipLowQualEnds((byte)44);
-        Assert.assertEquals(expected.getReadBases(), BASES.substring(0,2).getBytes());
-        Assert.assertEquals(expected.getBaseQualityString(), QUALS.substring(0,2));
-        Assert.assertEquals(expected.getCigarString(), "2M2H");
-
-        // revert Qual sequence
-        readClipper.getRead().setBaseQualityString(QUALS);
+        for ( testParameter p : testList ) {
+            init();
+            readClipper.getRead().setBaseQualityString("?5+!"); // 63,53,43,33
+            //logger.warn("Testing Parameters: " + p.inputStart+","+p.substringStart+","+p.substringStop+","+p.cigar);
+            testIfEqual( readClipper.hardClipLowQualEnds( (byte)p.inputStart ),
+                    BASES.substring(p.substringStart,p.substringStop).getBytes(),
+                    QUALS.substring(p.substringStart,p.substringStop),
+                    p.cigar );
+        }
+        */
    }
-}
+
+    public class CigarReadMaker  {
+
+    }
+
+    @Test ( enabled = false )
+    public void testHardClipSoftClippedBases() {
+
+        // Generate a list of cigars to test
+        // We will use testParameter in the following way
+        // Right tail, left tail,
+    }
+}
--- a/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/interval/IntervalUtilsUnitTest.java
@ -1,8 +1,8 @@
 package org.broadinstitute.sting.utils.interval;

 import net.sf.picard.reference.ReferenceSequenceFile;
-import net.sf.picard.util.IntervalUtil;
 import net.sf.samtools.SAMFileHeader;
+import org.apache.commons.io.FileUtils;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.gatk.datasources.reference.ReferenceDataSource;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
@ -762,4 +762,225 @@ public class IntervalUtilsUnitTest extends BaseTest {
        List<GenomeLoc> merged = IntervalUtils.mergeIntervalLocations(locs, IntervalMergingRule.ALL);
        Assert.assertEquals(merged.size(), 1);
    }
+
+    /*
+    Split into tests that can be written to files and tested by writeFlankingIntervals,
+    and lists that cannot but are still handled by getFlankingIntervals.
+    */
+    private static abstract class FlankingIntervalsTestData extends TestDataProvider {
+        final public File referenceFile;
+        final public GenomeLocParser parser;
+        final int basePairs;
+        final List<GenomeLoc> original;
+        final List<GenomeLoc> expected;
+
+        protected FlankingIntervalsTestData(Class<?> clazz, String name, File referenceFile, GenomeLocParser parser,
+                                          int basePairs, List<String> original, List<String> expected) {
+            super(clazz, name);
+            this.referenceFile = referenceFile;
+            this.parser = parser;
+            this.basePairs = basePairs;
+            this.original = parse(parser, original);
+            this.expected = parse(parser, expected);
+        }
+
+        private static List<GenomeLoc> parse(GenomeLocParser parser, List<String> locs) {
+            List<GenomeLoc> parsed = new ArrayList<GenomeLoc>();
+            for (String loc: locs)
+                parsed.add("unmapped".equals(loc) ? GenomeLoc.UNMAPPED : parser.parseGenomeLoc(loc));
+            return parsed;
+        }
+    }
+
+    private static class FlankingIntervalsFile extends FlankingIntervalsTestData {
+        public FlankingIntervalsFile(String name, File referenceFile, GenomeLocParser parser,
+                                     int basePairs, List<String> original, List<String> expected) {
+            super(FlankingIntervalsFile.class, name, referenceFile, parser, basePairs, original, expected);
+        }
+    }
+
+    private static class FlankingIntervalsList extends FlankingIntervalsTestData {
+        public FlankingIntervalsList(String name, File referenceFile, GenomeLocParser parser,
+                                     int basePairs, List<String> original, List<String> expected) {
+            super(FlankingIntervalsList.class, name, referenceFile, parser, basePairs, original, expected);
+        }
+    }
+
+    /* Intervals where the original and the flanks can be written to files. */
+    @DataProvider(name = "flankingIntervalsFiles")
+    public Object[][] getFlankingIntervalsFiles() {
+        File hg19ReferenceFile = new File(BaseTest.hg19Reference);
+        int hg19Length1 = hg19GenomeLocParser.getContigInfo("1").getSequenceLength();
+
+        new FlankingIntervalsFile("atStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:1"),
+                Arrays.asList("1:2"));
+
+        new FlankingIntervalsFile("atStartBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:1"),
+                Arrays.asList("1:2-51"));
+
+        new FlankingIntervalsFile("atStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:1-10"),
+                Arrays.asList("1:11-60"));
+
+        new FlankingIntervalsFile("atEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:" + hg19Length1),
+                Arrays.asList("1:" + (hg19Length1 - 1)));
+
+        new FlankingIntervalsFile("atEndBase50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:" + hg19Length1),
+                Arrays.asList(String.format("1:%d-%d", hg19Length1 - 50, hg19Length1 - 1)));
+
+        new FlankingIntervalsFile("atEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList(String.format("1:%d-%d", hg19Length1 - 10, hg19Length1)),
+                Arrays.asList(String.format("1:%d-%d", hg19Length1 - 60, hg19Length1 - 11)));
+
+        new FlankingIntervalsFile("nearStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:2"),
+                Arrays.asList("1:1", "1:3"));
+
+        new FlankingIntervalsFile("nearStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:21-30"),
+                Arrays.asList("1:1-20", "1:31-80"));
+
+        new FlankingIntervalsFile("nearEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:" + (hg19Length1 - 1)),
+                Arrays.asList("1:" + (hg19Length1 - 2), "1:" + hg19Length1));
+
+        new FlankingIntervalsFile("nearEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList(String.format("1:%d-%d", hg19Length1 - 30, hg19Length1 - 21)),
+                Arrays.asList(
+                        String.format("1:%d-%d", hg19Length1 - 80, hg19Length1 - 31),
+                        String.format("1:%d-%d", hg19Length1 - 20, hg19Length1)));
+
+        new FlankingIntervalsFile("beyondStartBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:3"),
+                Arrays.asList("1:2", "1:4"));
+
+        new FlankingIntervalsFile("beyondStartRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200"),
+                Arrays.asList("1:51-100", "1:201-250"));
+
+        new FlankingIntervalsFile("beyondEndBase1", hg19ReferenceFile, hg19GenomeLocParser, 1,
+                Arrays.asList("1:" + (hg19Length1 - 3)),
+                Arrays.asList("1:" + (hg19Length1 - 4), "1:" + (hg19Length1 - 2)));
+
+        new FlankingIntervalsFile("beyondEndRange50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList(String.format("1:%d-%d", hg19Length1 - 200, hg19Length1 - 101)),
+                Arrays.asList(
+                        String.format("1:%d-%d", hg19Length1 - 250, hg19Length1 - 201),
+                        String.format("1:%d-%d", hg19Length1 - 100, hg19Length1 - 51)));
+
+        new FlankingIntervalsFile("betweenFar50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:401-500"),
+                Arrays.asList("1:51-100", "1:201-250", "1:351-400", "1:501-550"));
+
+        new FlankingIntervalsFile("betweenSpan50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:301-400"),
+                Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
+
+        new FlankingIntervalsFile("betweenOverlap50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:271-400"),
+                Arrays.asList("1:51-100", "1:201-270", "1:401-450"));
+
+        new FlankingIntervalsFile("betweenShort50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:221-400"),
+                Arrays.asList("1:51-100", "1:201-220", "1:401-450"));
+
+        new FlankingIntervalsFile("betweenNone50", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:121-400"),
+                Arrays.asList("1:51-100", "1:401-450"));
+
+        new FlankingIntervalsFile("twoContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "2:301-400"),
+                Arrays.asList("1:51-100", "1:201-250", "2:251-300", "2:401-450"));
+
+        // Explicit testing a problematic agilent target pair
+        new FlankingIntervalsFile("badAgilent", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("2:74756257-74756411", "2:74756487-74756628"),
+                // wrong!    ("2:74756206-74756256", "2:74756412-74756462", "2:74756436-74756486", "2:74756629-74756679")
+                Arrays.asList("2:74756207-74756256", "2:74756412-74756486", "2:74756629-74756678"));
+
+        return TestDataProvider.getTests(FlankingIntervalsFile.class);
+    }
+
+    /* Intervals where either the original and/or the flanks cannot be written to a file. */
+    @DataProvider(name = "flankingIntervalsLists")
+    public Object[][] getFlankingIntervalsLists() {
+        File hg19ReferenceFile = new File(BaseTest.hg19Reference);
+        List<String> empty = Collections.emptyList();
+
+        new FlankingIntervalsList("empty", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                empty,
+                empty);
+
+        new FlankingIntervalsList("unmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("unmapped"),
+                empty);
+
+        new FlankingIntervalsList("fullContig", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1"),
+                empty);
+
+        new FlankingIntervalsList("fullContigs", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1", "2", "3"),
+                empty);
+
+        new FlankingIntervalsList("betweenWithUnmapped", hg19ReferenceFile, hg19GenomeLocParser, 50,
+                Arrays.asList("1:101-200", "1:301-400", "unmapped"),
+                Arrays.asList("1:51-100", "1:201-300", "1:401-450"));
+
+        return TestDataProvider.getTests(FlankingIntervalsList.class);
+    }
+
+    @Test(dataProvider = "flankingIntervalsFiles")
+    public void testWriteFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
+        File originalFile = createTempFile("original.", ".intervals");
+        File flankingFile = createTempFile("flanking.", ".intervals");
+        try {
+            List<String> lines = new ArrayList<String>();
+            for (GenomeLoc loc: data.original)
+                lines.add(loc.toString());
+            FileUtils.writeLines(originalFile, lines);
+
+            IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
+
+            List<GenomeLoc> actual = IntervalUtils.intervalFileToList(data.parser, flankingFile.getAbsolutePath());
+
+            String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
+                    data.toString(), data.original, actual, data.expected);
+            Assert.assertEquals(actual, data.expected, description);
+        } finally {
+            FileUtils.deleteQuietly(originalFile);
+            FileUtils.deleteQuietly(flankingFile);
+        }
+    }
+
+    @Test(dataProvider = "flankingIntervalsLists", expectedExceptions = UserException.class)
+    public void testWritingBadFlankingIntervals(FlankingIntervalsTestData data) throws Exception {
+        File originalFile = createTempFile("original.", ".intervals");
+        File flankingFile = createTempFile("flanking.", ".intervals");
+        try {
+            List<String> lines = new ArrayList<String>();
+            for (GenomeLoc loc: data.original)
+                lines.add(loc.toString());
+            FileUtils.writeLines(originalFile, lines);
+
+            // Should throw a user exception on bad input if either the original
+            // intervals are empty or if the flanking intervals are empty
+            IntervalUtils.writeFlankingIntervals(data.referenceFile, originalFile, flankingFile, data.basePairs);
+        } finally {
+            FileUtils.deleteQuietly(originalFile);
+            FileUtils.deleteQuietly(flankingFile);
+        }
+    }
+
+    @Test(dataProvider = "flankingIntervalsLists")
+    public void testGetFlankingIntervals(FlankingIntervalsTestData data) {
+        List<GenomeLoc> actual = IntervalUtils.getFlankingIntervals(data.parser, data.original, data.basePairs);
+        String description = String.format("%n      name: %s%n  original: %s%n    actual: %s%n  expected: %s%n",
+                data.toString(), data.original, actual, data.expected);
+        Assert.assertEquals(actual, data.expected, description);
+    }
 }
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java
@ -29,10 +29,13 @@ package org.broadinstitute.sting.utils.variantcontext;
 // the imports for unit testing.


+import org.broadinstitute.sting.utils.MathUtils;
 import org.testng.Assert;
 import org.testng.annotations.Test;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;

+import java.util.EnumMap;
+

 /**
 * Basic unit test for Genotype likelihoods objects
@ -69,6 +72,50 @@ public class GenotypeLikelihoodsUnitTest {
        gl.getAsVector();
    }

+    @Test
+    public void testGetAsMap(){
+        GenotypeLikelihoods gl = new GenotypeLikelihoods(v);
+        //Log scale
+        EnumMap<Genotype.Type,Double> glMap = gl.getAsMap(false);
+        Assert.assertEquals(v[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
+        Assert.assertEquals(v[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
+        Assert.assertEquals(v[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
+
+        //Linear scale
+        glMap = gl.getAsMap(true);
+        double [] vl = MathUtils.normalizeFromLog10(v);
+        Assert.assertEquals(vl[Genotype.Type.HOM_REF.ordinal()-1],glMap.get(Genotype.Type.HOM_REF));
+        Assert.assertEquals(vl[Genotype.Type.HET.ordinal()-1],glMap.get(Genotype.Type.HET));
+        Assert.assertEquals(vl[Genotype.Type.HOM_VAR.ordinal()-1],glMap.get(Genotype.Type.HOM_VAR));
+
+        //Test missing likelihoods
+        gl = new GenotypeLikelihoods(".");
+        glMap = gl.getAsMap(false);
+        Assert.assertNull(glMap);
+
+    }
+
+    @Test
+    public void testGetLog10GQ(){
+        GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
+
+        //GQ for the best guess genotype
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),-3.9);
+
+        double[] test = MathUtils.normalizeFromLog10(gl.getAsVector());
+
+        //GQ for the other genotypes
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF), -1 * Math.log10(1.0 - test[Genotype.Type.HOM_REF.ordinal()-1]));
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR), -1 * Math.log10(1.0 - test[Genotype.Type.HOM_VAR.ordinal()-1]));
+
+       //Test missing likelihoods
+        gl = new GenotypeLikelihoods(".");
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_REF),Double.NEGATIVE_INFINITY);
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HET),Double.NEGATIVE_INFINITY);
+        Assert.assertEquals(gl.getLog10GQ(Genotype.Type.HOM_VAR),Double.NEGATIVE_INFINITY);
+
+    }
+
    private void assertDoubleArraysAreEqual(double[] v1, double[] v2) {
        Assert.assertEquals(v1.length, v2.length);
        for ( int i = 0; i < v1.length; i++ ) {
--- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
+++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextUnitTest.java
@ -8,6 +8,7 @@ package org.broadinstitute.sting.utils.variantcontext;
 import org.broadinstitute.sting.BaseTest;
 import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants;
 import org.testng.annotations.BeforeSuite;
+import org.testng.annotations.BeforeTest;
 import org.testng.annotations.DataProvider;
 import org.testng.annotations.Test;
 import org.testng.Assert;
@ -55,7 +56,10 @@ public class VariantContextUnitTest extends BaseTest {

        ATC = Allele.create("ATC");
        ATCref = Allele.create("ATC", true);
+    }

+    @BeforeTest
+    public void beforeTest() {
        basicBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
        snpBuilder = new VariantContextBuilder("test", snpLoc,snpLocStart, snpLocStop, Arrays.asList(Aref, T)).referenceBaseForIndel((byte)'A');
        insBuilder = new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATC)).referenceBaseForIndel((byte)'A');
@ -75,16 +79,16 @@ public class VariantContextUnitTest extends BaseTest {

        // test REF
        List<Allele> alleles = Arrays.asList(Tref);
-        VariantContext vc = snpBuilder.alleles(alleles).make();
+        VariantContext vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.NO_VARIATION);

        // test SNPs
        alleles = Arrays.asList(Tref, A);
-        vc = snpBuilder.alleles(alleles).make();
+        vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);

        alleles = Arrays.asList(Tref, A, C);
-        vc = snpBuilder.alleles(alleles).make();
+        vc = snpBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.SNP);

        // test MNPs
@ -98,7 +102,7 @@ public class VariantContextUnitTest extends BaseTest {

        // test INDELs
        alleles = Arrays.asList(Aref, ATC);
-        vc = basicBuilder.alleles(alleles).make();
+        vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);

        alleles = Arrays.asList(ATCref, A);
@ -106,7 +110,7 @@ public class VariantContextUnitTest extends BaseTest {
        Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);

        alleles = Arrays.asList(Tref, TA, TC);
-        vc = basicBuilder.alleles(alleles).make();
+        vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.INDEL);

        alleles = Arrays.asList(ATCref, A, AC);
@ -131,12 +135,12 @@ public class VariantContextUnitTest extends BaseTest {
        Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);

        alleles = Arrays.asList(Aref, T, symbolic);
-        vc = basicBuilder.alleles(alleles).make();
+        vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.MIXED);

        // test SYMBOLIC
        alleles = Arrays.asList(Tref, symbolic);
-        vc = basicBuilder.alleles(alleles).stop(snpLocStop+2).make();
+        vc = basicBuilder.alleles(alleles).stop(snpLocStop).make();
        Assert.assertEquals(vc.getType(), VariantContext.Type.SYMBOLIC);
    }

@ -280,50 +284,50 @@ public class VariantContextUnitTest extends BaseTest {
        Assert.assertEquals(vc.getGenotype("foo").getType(), Genotype.Type.MIXED);
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadConstructorArgs1() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, ATCref)).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadConstructorArgs2() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(delRef, del)).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadConstructorArgs3() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(del)).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Throwable.class)
    public void testBadConstructorArgs4() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Collections.<Allele>emptyList()).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadConstructorArgsDuplicateAlleles1() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, T, T)).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadConstructorArgsDuplicateAlleles2() {
        new VariantContextBuilder("test", insLoc, insLocStart, insLocStop, Arrays.asList(Aref, A)).make();
    }

-    @Test (expectedExceptions = IllegalStateException.class)
+    @Test (expectedExceptions = Throwable.class)
    public void testBadLoc1() {
        List<Allele> alleles = Arrays.asList(Aref, T, del);
        new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, alleles).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Throwable.class)
    public void testBadID1() {
        new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, Arrays.asList(Aref, T)).id(null).make();
    }

-    @Test (expectedExceptions = IllegalArgumentException.class)
+    @Test (expectedExceptions = Exception.class)
    public void testBadID2() {
-        new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, Arrays.asList(Aref, T)).id("");
+        new VariantContextBuilder("test", delLoc, delLocStart, delLocStop, Arrays.asList(Aref, T)).id("").make();
    }

    @Test
@ -557,7 +561,7 @@ public class VariantContextUnitTest extends BaseTest {
    @Test(dataProvider = "getAlleles")
    public void testMergeAlleles(GetAllelesTest cfg) {
        final List<Allele> altAlleles = cfg.alleles.subList(1, cfg.alleles.size());
-        final VariantContext vc = snpBuilder.alleles(cfg.alleles).make();
+        final VariantContext vc = new VariantContextBuilder("test", snpLoc, snpLocStart, snpLocStop, cfg.alleles).referenceBaseForIndel((byte)'A').make();

        Assert.assertEquals(vc.getAlleles(), cfg.alleles, "VC alleles not the same as input alleles");
        Assert.assertEquals(vc.getNAlleles(), cfg.alleles.size(), "VC getNAlleles not the same as input alleles size");
--- a/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/WriteFlankingIntervalsFunction.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/extensions/gatk/WriteFlankingIntervalsFunction.scala
@ -22,30 +22,27 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-package net.sf.samtools;
+package org.broadinstitute.sting.queue.extensions.gatk

-import java.util.BitSet;
+import org.broadinstitute.sting.queue.function.InProcessFunction
+import org.broadinstitute.sting.commandline.{Output, Argument, Input}
+import java.io.File
+import org.broadinstitute.sting.utils.interval.IntervalUtils

-/**
- * A temporary solution to work around Java access rights issues:
- * override chunk and make it public.
- * TODO: Eliminate once we determine the final fate of the BAM index reading code.
- */
-public class GATKBinList extends BinList {
-    /**
-     * Create a new BinList over sequenceCount sequences, consisting of the given bins.
-     * @param referenceSequence Reference sequence to which these bins are relevant.
-     * @param bins The given bins to include.
-     */
-    public GATKBinList(final int referenceSequence, final BitSet bins) {
-        super(referenceSequence,bins);
-    }
+class WriteFlankingIntervalsFunction extends InProcessFunction {
+  @Input(doc="The reference sequence")
+  var reference : File = _

-    /**
-     * Retrieves the bins stored in this list.
-     * @return A bitset where a bin is present in the list if the bit is true.
-     */
-    public BitSet getBins() {
-        return super.getBins();
-    }
+  @Input(doc="The interval list to flank")
+  var inputIntervals : File = _
+
+  @Output(doc="The output intervals file to write to")
+  var outputIntervals: File = _
+
+  @Argument(doc="Number of base pair to flank the input intervals")
+  var flankSize : Int = _
+
+  def run() {
+    IntervalUtils.writeFlankingIntervals(reference, inputIntervals, outputIntervals, flankSize)
+  }
 }
--- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/ExpandIntervals.scala
@ -1,135 +0,0 @@
-package org.broadinstitute.sting.queue.library.ipf.intervals
-
-import org.broadinstitute.sting.queue.function.InProcessFunction
-import org.broadinstitute.sting.commandline._
-import java.io.{PrintStream, File}
-import collection.JavaConversions._
-import org.broadinstitute.sting.utils.text.XReadLines
-import net.sf.picard.reference.FastaSequenceFile
-import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
-import collection.immutable.TreeSet
-
-// todo -- this is unsafe. Need to use a reference dictionary to ensure no off-contig targets are created
-class ExpandIntervals(in : File, start: Int, size: Int, out: File, ref: File, ipType: String, opType: String) extends InProcessFunction {
-  @Input(doc="The interval list to expand") val inList : File = in
-  @Input(doc="The reference sequence") val refDict : File = ref
-  @Argument(doc="Number of basepair to start the expanded interval") val startInt : Int = start
-  @Argument(doc="Number of baispair to stop the expanded interval") val sizeInt : Int = size
-  @Output(doc="The output intervals file to write to") val outList : File = out
-  @Argument(doc="The output format for the intervals") val outTypeStr = opType
-  @Argument(doc="The input format for the intervals") val inTypeStr = ipType
-
-  var output : PrintStream = _
-  var parser : GenomeLocParser = _
-  var xrl : XReadLines = _
-  val outType = IntervalFormatType.convert(outTypeStr)
-  val inType = IntervalFormatType.convert(inTypeStr)
-
-  var offsetIn : Int = 0
-  var offsetOut : Int = 0
-
-  var first : Boolean = true
-  var lastTwo : (GenomeLoc,GenomeLoc) = _
-
-  var intervalCache : TreeSet[GenomeLoc] = _
-  val LINES_TO_CACHE : Int = 1000
-
-  def run = {
-    output = new PrintStream(outList)
-    intervalCache = new TreeSet[GenomeLoc]()(new Ordering[GenomeLoc]{
-      def compare(o1: GenomeLoc, o2: GenomeLoc) : Int = { o1.compareTo(o2) }
-    })
-    parser = new GenomeLocParser(new FastaSequenceFile(ref,true))
-    xrl = new XReadLines(inList)
-    offsetIn = if (isBed(inType)) 1 else 0
-    offsetOut = if( isBed(outType)) 1 else 0
-    var line : String = xrl.next
-    while ( line.startsWith("@") ) {
-      line = xrl.next
-    }
-    var prevLoc: GenomeLoc = null
-    var curLoc: GenomeLoc = null
-    var nextLoc : GenomeLoc = parseGenomeInterval(line)
-    var linesProcessed : Int = 1
-    while ( prevLoc != null || curLoc != null || nextLoc != null ) {
-      prevLoc = curLoc
-      curLoc = nextLoc
-      nextLoc = if ( xrl.hasNext ) parseGenomeInterval(xrl.next) else null
-      if ( curLoc != null ) {
-        val left: GenomeLoc =  refine(expandLeft(curLoc),prevLoc)
-        val right: GenomeLoc =  refine(expandRight(curLoc),nextLoc)
-        if ( left != null ) {
-          intervalCache += left
-        }
-        if ( right != null ) {
-          intervalCache += right
-        }
-      }
-      linesProcessed += 1
-      if ( linesProcessed % LINES_TO_CACHE == 0 ) {
-        val toPrint = intervalCache.filter( u => (u.isBefore(prevLoc) && u.distance(prevLoc) > startInt+sizeInt))
-        intervalCache = intervalCache -- toPrint
-        toPrint.foreach(u => output.print("%s%n".format(repr(u))))
-      }
-      //System.out.printf("%s".format(if ( curLoc == null ) "null" else repr(curLoc)))
-    }
-
-    intervalCache.foreach(u => output.print("%s%n".format(repr(u))))
-
-    output.close()
-  }
-
-  def expandLeft(g: GenomeLoc) : GenomeLoc = {
-    parser.createGenomeLoc(g.getContig,g.getStart-startInt-sizeInt,g.getStart-startInt)
-  }
-
-  def expandRight(g: GenomeLoc) : GenomeLoc = {
-    parser.createGenomeLoc(g.getContig,g.getStop+startInt,g.getStop+startInt+sizeInt)
-  }
-
-  def refine(newG: GenomeLoc, borderG: GenomeLoc) : GenomeLoc = {
-    if ( borderG == null || ! newG.overlapsP(borderG) ) {
-      return newG
-    } else {
-      if ( newG.getStart < borderG.getStart ) {
-        if ( borderG.getStart - startInt > newG.getStart ) {
-          return parser.createGenomeLoc(newG.getContig,newG.getStart,borderG.getStart-startInt)
-        }
-      } else {
-        if ( borderG.getStop + startInt < newG.getStop ){
-          return parser.createGenomeLoc(newG.getContig,borderG.getStop+startInt,newG.getStop)
-        }
-      }
-    }
-
-    null
-  }
-
-  def repr(loc : GenomeLoc) : String = {
-    if ( loc == null ) return "null"
-    if ( outType == IntervalFormatType.INTERVALS ) {
-      return "%s:%d-%d".format(loc.getContig,loc.getStart,loc.getStop)
-    } else {
-      return "%s\t%d\t%d".format(loc.getContig,loc.getStart-offsetOut,loc.getStop+offsetOut)
-    }
-  }
-
-  def isBed(t: IntervalFormatType.IntervalFormatType) : Boolean = {
-    t == IntervalFormatType.BED
-   }
-
-  def parseGenomeInterval( s : String ) : GenomeLoc = {
-    val sp = s.split("\\s+")
-    // todo -- maybe specify whether the bed format [0,6) --> (1,2,3,4,5) is what's wanted  
-    if ( s.contains(":") ) parser.parseGenomeLoc(s) else parser.createGenomeLoc(sp(0),sp(1).toInt+offsetIn,sp(2).toInt-offsetIn)
-  }
-
-  object IntervalFormatType extends Enumeration("INTERVALS","BED","TDF") {
-    type IntervalFormatType = Value
-    val INTERVALS,BED,TDF = Value
-
-    def convert(s : String) : IntervalFormatType = {
-      if ( s.equals("INTERVALS") ) INTERVALS else { if (s.equals("BED") ) BED else TDF}
-    }
-  }
-}
--- a/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/IntersectIntervals.scala
+++ b/public/scala/src/org/broadinstitute/sting/queue/library/ipf/intervals/IntersectIntervals.scala
@ -1,70 +0,0 @@
-package org.broadinstitute.sting.queue.library.ipf.intervals
-
-import org.broadinstitute.sting.queue.function.InProcessFunction
-import collection.JavaConversions._
-import org.broadinstitute.sting.commandline._
-import java.io.{PrintStream, File}
-import net.sf.samtools.{SAMSequenceRecord, SAMFileHeader, SAMSequenceDictionary}
-import org.broadinstitute.sting.utils.text.XReadLines
-import org.broadinstitute.sting.utils.{GenomeLoc, GenomeLocParser}
-
-class IntersectIntervals(iVals: List[File], outFile: File, bed: Boolean) extends InProcessFunction {
-  @Input(doc="List of interval files to find the intersection of") val intervals : List[File] = iVals
-  @Output(doc="Output interval file to which to write") val output : File = outFile
-  @Argument(doc="Assume the input interval lists are sorted in the proper order") var assumeSorted = false
-  @Argument(doc="Is the tdf in bed file (0-based clopen: 0  5 for {1,2,3,4}?") var isBed = bed
-
-
-  var outStream : PrintStream = _
-  var contigs : List[String] = Nil
-  var dict : SAMSequenceDictionary = _
-  var parser : GenomeLocParser = _
-
-  def run = {
-    outStream = new PrintStream(output)
-    dict = new SAMSequenceDictionary
-    // note: memory hog
-    val sources : List[(List[(String,Int,Int)],Int)] = intervals.map(g => asScalaIterator(new XReadLines(g)).map(u => parse(u)).toList).zipWithIndex
-    sources.map(u => u._1).flatten.map(u => u._1).distinct.foreach(u => dict.addSequence(new SAMSequenceRecord(u,Integer.MAX_VALUE)))
-    parser = new GenomeLocParser(dict)
-    sources.map( (u: (List[(String,Int,Int)],Int)) => u._1.map(g => (newGenomeLoc(g),u._2))).flatten.sortWith( (a,b) => (a._1 compareTo b._1) < 0 ).foldLeft[List[List[(GenomeLoc,Int)]]](Nil)( (a,b) => overlapFold(a,b)).map(u => mapIntersect(u)).filter(h => h != null && h.size > 0).foreach(h => writeOut(h))
-    outStream.close()
-  }
-
-  def writeOut(g : GenomeLoc) : Unit = {
-    outStream.print("%s%n".format(g.toString))
-  }
-
-  def parse(s : String) : (String,Int,Int) = {
-    if ( s.contains(":") ) {
-      val split1 = s.split(":")
-      val split2 = split1(1).split("-")
-      return (split1(0),split2(0).toInt,split2(1).toInt)
-    } else {
-      val split = s.split("\\s+")
-      return (split(0),split(1).toInt + (if(isBed) 1 else 0) ,split(2).toInt - (if(isBed) 1 else 0) )
-    }
-  }
-
-  def newGenomeLoc(coords : (String,Int,Int) ) : GenomeLoc = {
-    parser.createGenomeLoc(coords._1,coords._2,coords._3)
-  }
-
-  def overlapFold( a: List[List[(GenomeLoc,Int)]], b: (GenomeLoc,Int) ) : List[List[(GenomeLoc,Int)]] = {
-    if ( a.last.forall(u => u._1.overlapsP(b._1)) ) {
-      a.init :+ (a.last :+ b)
-    } else {
-      a :+ ( a.last.dropWhile(u => ! u._1.overlapsP(b._1)) :+ b)
-    }
-  }
-
-  def mapIntersect( u: List[(GenomeLoc,Int)]) : GenomeLoc = {
-    if ( u.map(h => h._2).distinct.sum != range(1,intervals.size).sum ) { // if all sources not accounted for
-      null
-    }
-    u.map(h => h._1).reduceLeft[GenomeLoc]( (a,b) => a.intersect(b) )
-  }
-
-  def range(a: Int, b: Int) : Range = new Range(a,b+1,1)
-
-}
--- a/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.jar
+++ b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.jar
--- a/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.xml
+++ b/settings/repository/net.sf.snpeff/snpeff-2.0.4rc3.xml
@ -1,3 +1,3 @@
 <ivy-module version="1.0">
-    <info organisation="net.sf.snpeff" module="snpeff" revision="2.0.2" status="release" />
+    <info organisation="net.sf.snpeff" module="snpeff" revision="2.0.4rc3" status="release" />
 </ivy-module>