Prototype implementation of protoshard merging when protoshard n and protoshard

n+1 completely overlap. Gives a small but consistent performance increase in non-intervaled whole exome traversals (2.79min original, 2.69min revised). Needs a more in depth analysis of optimal shard sizing to determine a true optimum. Also renamed a variable because Khalid disapproved of my naming choices. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5595 348d0f76-0448-11de-a6fe-93d51630548a
2011-04-08 02:09:14 +00:00 · 2011-04-08 02:09:14 +00:00 · fece2167b3
parent 32d502c122
commit fece2167b3
6 changed files with 349 additions and 193 deletions
--- a/java/src/net/sf/samtools/GATKBAMFileSpan.java
+++ b/java/src/net/sf/samtools/GATKBAMFileSpan.java
@ -190,7 +190,7 @@ public class GATKBAMFileSpan extends BAMFileSpan {
     * @return This file span minuse the other file span.
     */

-    public GATKBAMFileSpan subtract(final GATKBAMFileSpan other) {
+    public GATKBAMFileSpan minus(final GATKBAMFileSpan other) {
        Iterator<GATKChunk> thisIterator = getGATKChunks().iterator();
        Iterator<GATKChunk> otherIterator = other.getGATKChunks().iterator();

@ -204,7 +204,7 @@ public class GATKBAMFileSpan extends BAMFileSpan {

        while(thisChunk != null && otherChunk != null) {
            // If this iterator is before the other, add this to the subtracted list and forge ahead.
-            if(thisChunk.getChunkEnd() < otherChunk.getChunkStart()) {
+            if(thisChunk.getChunkEnd() <= otherChunk.getChunkStart()) {
                subtracted.add(thisChunk);
                thisChunk = thisIterator.hasNext() ? thisIterator.next() : null;
                continue;
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMScheduler.java
@ -0,0 +1,220 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.datasources.reads;
+
+import net.sf.picard.util.PeekableIterator;
+import net.sf.samtools.GATKBAMFileSpan;
+import org.apache.log4j.Logger;
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.NoSuchElementException;
+
+/**
+ * Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
+ */
+public class BAMScheduler implements Iterator<FilePointer> {
+    private final SAMDataSource dataSource;
+
+    private final Map<SAMReaderID,GATKBAMIndex> indices = new HashMap<SAMReaderID,GATKBAMIndex>();
+
+    private FilePointer nextFilePointer = null;
+
+    private final GenomeLocSortedSet loci;
+
+    private final PeekableIterator<GenomeLoc> locusIterator;
+
+    private GenomeLoc currentLocus;    
+
+    public BAMScheduler(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
+        this.dataSource = dataSource;
+        for(SAMReaderID reader: dataSource.getReaderIDs())
+            indices.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
+        this.loci = loci;
+        locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
+        if(locusIterator.hasNext())
+            currentLocus = locusIterator.next();
+        advance();
+    }
+
+    public boolean hasNext() {
+        return nextFilePointer != null;
+    }
+
+    public FilePointer next() {
+        if(!hasNext())
+            throw new NoSuchElementException("No next element available in interval sharder");
+        FilePointer currentFilePointer = nextFilePointer;
+        advance();
+        return currentFilePointer;
+    }
+
+    public void remove() {
+        throw new UnsupportedOperationException("Unable to remove FilePointers from an IntervalSharder");
+    }
+
+    private void advance() {
+        if(loci.isEmpty())
+            return;
+
+        nextFilePointer = null;
+        while(nextFilePointer == null && currentLocus != null) {
+            // special case handling of the unmapped shard.
+            if(currentLocus == GenomeLoc.UNMAPPED) {
+                nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
+                for(SAMReaderID id: dataSource.getReaderIDs())
+                    nextFilePointer.addFileSpans(id,null);
+                currentLocus = null;
+                continue;
+            }
+
+            nextFilePointer = new FilePointer();
+
+            int coveredRegionStart = 1;
+            int coveredRegionStop = Integer.MAX_VALUE;
+            GenomeLoc coveredRegion = null;
+
+            BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indices,currentLocus);
+
+            // No overlapping data at all.
+            if(scheduleEntry != null) {
+                coveredRegionStart = Math.max(coveredRegionStart,scheduleEntry.start);
+                coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop);
+                coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
+
+                nextFilePointer.addFileSpans(scheduleEntry.fileSpans);
+            }
+            else {
+                // Always create a file span, whether there was covered data or not.  If there was no covered data, then the binTree is empty.
+                //System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
+                for(SAMReaderID reader: indices.keySet())
+                    nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
+            }
+
+            // Early exit if no bins were found.
+            if(coveredRegion == null) {
+                // for debugging only: maximum split is 16384.                
+                if(currentLocus.size() > 16384) {
+                    GenomeLoc[] splitContigs = currentLocus.split(currentLocus.getStart()+16384);
+                    nextFilePointer.addLocation(splitContigs[0]);
+                    currentLocus = splitContigs[1];
+                }
+                else {
+                    nextFilePointer.addLocation(currentLocus);
+                    currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
+                }
+                continue;
+            }
+
+            // Early exit if only part of the first interval was found.
+            if(currentLocus.startsBefore(coveredRegion)) {
+                // for debugging only: maximum split is 16384.
+                int splitPoint = Math.min(coveredRegion.getStart()-currentLocus.getStart(),16384)+currentLocus.getStart();
+                GenomeLoc[] splitContigs = currentLocus.split(splitPoint);
+                nextFilePointer.addLocation(splitContigs[0]);
+                currentLocus = splitContigs[1];
+                continue;
+            }
+
+            // Define the initial range of the file pointer, aka the region where the locus currently being processed intersects the BAM list.
+            GenomeLoc initialLocation = currentLocus.intersect(coveredRegion);
+            nextFilePointer.addLocation(initialLocation);
+
+            // See whether the BAM regions discovered overlap the next set of intervals in the interval list.  If so, include every overlapping interval.
+            if(!nextFilePointer.locations.isEmpty()) {
+                while(locusIterator.hasNext() && locusIterator.peek().overlapsP(coveredRegion)) {
+                    currentLocus = locusIterator.next();
+                    nextFilePointer.addLocation(currentLocus.intersect(coveredRegion));
+                }
+
+                // Chop off the uncovered portion of the locus.  Since we know that the covered region overlaps the current locus,
+                  // we can simplify the interval creation process to the end of the covered region to the stop of the given interval.
+                if(coveredRegionStop < currentLocus.getStop())
+                    currentLocus = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStop+1,currentLocus.getStop());
+                else if(locusIterator.hasNext())
+                    currentLocus = locusIterator.next();
+                else
+                    currentLocus = null;
+            }
+
+        }
+    }
+
+    
+    /**
+     * The last reference sequence processed by this iterator.
+     */
+    private Integer lastReferenceSequenceLoaded = null;
+
+    /**
+     * The stateful iterator used to progress through the genoem.
+     */
+    private PeekableIterator<BAMScheduleEntry> bamScheduleIterator = null;
+
+    /**
+     * Get the next overlapping tree of bins associated with the given BAM file.
+     * @param indices BAM index representation.
+     * @param currentLocus The actual locus for which to check overlap.
+     * @return The next schedule entry overlapping with the given list of loci.
+     */
+    private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
+        // Stale reference sequence or first invocation.  (Re)create the binTreeIterator.
+        if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
+            if(bamScheduleIterator != null)
+                bamScheduleIterator.close();
+            lastReferenceSequenceLoaded = currentLocus.getContigIndex();
+
+            // Naive algorithm: find all elements in current contig for proper schedule creation.
+            List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
+            for(GenomeLoc locus: loci) {
+                if(locus.getContigIndex() == lastReferenceSequenceLoaded)
+                    lociInContig.add(locus);
+            }
+
+            bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
+        }
+
+        if(!bamScheduleIterator.hasNext())
+            return null;
+
+        // Peek the iterator along until finding the first binTree at or following the current locus.
+        BAMScheduleEntry bamScheduleEntry = bamScheduleIterator.peek();
+        while(bamScheduleEntry != null && bamScheduleEntry.isBefore(currentLocus)) {
+            bamScheduleIterator.next();
+            bamScheduleEntry = bamScheduleIterator.hasNext() ? bamScheduleIterator.peek() : null;
+        }                                   
+
+        return (bamScheduleEntry != null && bamScheduleEntry.overlaps(currentLocus)) ? bamScheduleEntry : null;
+    }
+
+}
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/FilePointer.java
@ -24,22 +24,29 @@

 package org.broadinstitute.sting.gatk.datasources.reads;

+import net.sf.picard.util.PeekableIterator;
 import net.sf.samtools.GATKBAMFileSpan;
 import net.sf.samtools.SAMFileSpan;
 import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
+import org.broadinstitute.sting.utils.interval.IntervalMergingRule;
+import org.broadinstitute.sting.utils.interval.IntervalUtils;

 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.HashMap;
+import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
+import java.util.SortedMap;
+import java.util.TreeMap;

 /**
 * Represents a small section of a BAM file, and every associated interval.
 */
 class FilePointer {
-    protected final Map<SAMReaderID,SAMFileSpan> fileSpans = new HashMap<SAMReaderID,SAMFileSpan>();
-    protected final String referenceSequence;
+    protected final SortedMap<SAMReaderID,SAMFileSpan> fileSpans = new TreeMap<SAMReaderID,SAMFileSpan>();
    protected final BAMOverlap overlap;
    protected final List<GenomeLoc> locations;

@ -48,24 +55,22 @@ class FilePointer {
     */
    protected final boolean isRegionUnmapped;

+    public FilePointer() {
+        this((BAMOverlap)null);
+    }
+
    public FilePointer(final GenomeLoc location) {
-        this.referenceSequence = location.getContig();
        this.overlap = null;
        this.locations = Collections.singletonList(location);
        this.isRegionUnmapped = GenomeLoc.isUnmapped(location);
    }

-    public FilePointer(final String referenceSequence,final BAMOverlap overlap) {
-        this.referenceSequence = referenceSequence;
+    public FilePointer(final BAMOverlap overlap) {
        this.overlap = overlap;
        this.locations = new ArrayList<GenomeLoc>();
        this.isRegionUnmapped = false;
    }

-    public FilePointer(final String referenceSequence) {
-        this(referenceSequence,null);
-    }
-
    public void addLocation(final GenomeLoc location) {
        locations.add(location);
    }
@ -77,4 +82,89 @@ class FilePointer {
    public void addFileSpans(final Map<SAMReaderID, GATKBAMFileSpan> fileSpans) {
        this.fileSpans.putAll(fileSpans);
    }
+
+
+    /**
+     * Computes the size of this file span, in uncompressed bytes.
+     * @return Size of the file span.
+     */
+    public long size() {
+        long size = 0L;
+        for(SAMFileSpan fileSpan: fileSpans.values())
+            size += ((GATKBAMFileSpan)fileSpan).size();
+        return size;
+    }
+
+    /**
+     * Returns the difference in size between two filespans.
+     * @param other Other filespan against which to measure.
+     * @return The difference in size between the two file pointers.
+     */
+    public long minus(final FilePointer other) {
+        long difference = 0;
+        PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> thisIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(this.fileSpans.entrySet().iterator());
+        PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> otherIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(other.fileSpans.entrySet().iterator());
+
+        while(thisIterator.hasNext() || otherIterator.hasNext()) {
+            int compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey());
+
+            if(compareValue < 0) {
+                // This before other.
+                difference += ((GATKBAMFileSpan)thisIterator.next().getValue()).size();
+            }
+            else if(compareValue > 0) {
+                // Other before this.
+                difference += ((GATKBAMFileSpan)otherIterator.next().getValue()).size();
+            }
+            else {
+                // equality; difference the values.
+                GATKBAMFileSpan thisRegion = (GATKBAMFileSpan)thisIterator.next().getValue();
+                GATKBAMFileSpan otherRegion = (GATKBAMFileSpan)otherIterator.next().getValue();
+                difference += Math.abs(thisRegion.minus(otherRegion).size());
+            }
+        }
+        return difference;
+    }
+
+    /**
+     * Combines two file pointers into one.
+     * @param parser The genomelocparser to use when manipulating intervals.
+     * @param other File pointer to combine into this one.
+     * @return A completely new file pointer that is the combination of the two.
+     */
+    public FilePointer combine(final GenomeLocParser parser, final FilePointer other) {
+        FilePointer combined = new FilePointer();
+
+        List<GenomeLoc> intervals = new ArrayList<GenomeLoc>();
+        intervals.addAll(locations);
+        intervals.addAll(other.locations);
+        for(GenomeLoc interval: IntervalUtils.sortAndMergeIntervals(parser,intervals,IntervalMergingRule.ALL))
+            combined.addLocation(interval);
+
+        PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> thisIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(this.fileSpans.entrySet().iterator());
+        PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>> otherIterator = new PeekableIterator<Map.Entry<SAMReaderID,SAMFileSpan>>(other.fileSpans.entrySet().iterator());
+
+        while(thisIterator.hasNext() || otherIterator.hasNext()) {
+            int compareValue = thisIterator.peek().getKey().compareTo(otherIterator.peek().getKey());
+
+            if(compareValue < 0) {
+                // This before other.
+                Map.Entry<SAMReaderID,SAMFileSpan> entry = thisIterator.next();
+                combined.addFileSpans(entry.getKey(),entry.getValue());
+            }
+            else if(compareValue > 0) {
+                // Other before this.
+                Map.Entry<SAMReaderID,SAMFileSpan> entry = otherIterator.next();
+                combined.addFileSpans(entry.getKey(),entry.getValue());
+            }
+            else {
+                // equality; union the values.
+                SAMReaderID reader = thisIterator.peek().getKey();
+                GATKBAMFileSpan thisRegion = (GATKBAMFileSpan)thisIterator.next().getValue();
+                GATKBAMFileSpan otherRegion = (GATKBAMFileSpan)otherIterator.next().getValue();
+                combined.addFileSpans(reader,thisRegion.union(otherRegion));
+            }
+        }
+        return combined;
+    }
 }
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/IntervalSharder.java
@ -185,7 +185,7 @@ public class IntervalSharder {
                        filePointers.add(lastFilePointer);

                    lastBAMOverlap = bamOverlapIterator.next();
-                    lastFilePointer = new FilePointer(contig,lastBAMOverlap);
+                    lastFilePointer = new FilePointer(lastBAMOverlap);
                    binStart = lastFilePointer.overlap.start;
                    binStop = lastFilePointer.overlap.stop;
                }
--- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java
@ -25,198 +25,44 @@
 package org.broadinstitute.sting.gatk.datasources.reads;

 import net.sf.picard.util.PeekableIterator;
-import net.sf.samtools.GATKBAMFileSpan;
-import org.apache.log4j.Logger;
-import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;

-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.HashMap;
 import java.util.Iterator;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Map;
-import java.util.NoSuchElementException;

 /**
- * Assign intervals to the most appropriate blocks, keeping as little as possible in memory at once.
+ * Handles the process of aggregating BAM intervals into individual shards.
 */
 public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
-    private static Logger logger = Logger.getLogger(IntervalSharder.class);
+    /**
+     * The iterator actually laying out the data for BAM scheduling.
+     */
+    private final PeekableIterator<FilePointer> wrappedIterator;

-    private final SAMDataSource dataSource;
-
-    private final Map<SAMReaderID,GATKBAMIndex> indices = new HashMap<SAMReaderID,GATKBAMIndex>();
-
-    private FilePointer nextFilePointer = null;
-
-    private final GenomeLocSortedSet loci;
-
-    private final PeekableIterator<GenomeLoc> locusIterator;
-
-    private GenomeLoc currentLocus;    
+    /**
+     * The parser, for interval manipulation.
+     */
+    private final GenomeLocParser parser;

    public LowMemoryIntervalSharder(final SAMDataSource dataSource, final GenomeLocSortedSet loci) {
-        this.dataSource = dataSource;
-        for(SAMReaderID reader: dataSource.getReaderIDs())
-            indices.put(reader,(GATKBAMIndex)dataSource.getIndex(reader));
-        this.loci = loci;
-        locusIterator = new PeekableIterator<GenomeLoc>(loci.iterator());
-        if(locusIterator.hasNext())
-            currentLocus = locusIterator.next();
-        advance();
+        wrappedIterator = new PeekableIterator<FilePointer>(new BAMScheduler(dataSource,loci));
+        parser = loci.getGenomeLocParser();
    }

    public boolean hasNext() {
-        return nextFilePointer != null;
+        return wrappedIterator.hasNext();
    }

+    /**
+     * Accumulate shards where there's no additional cost to processing the next shard in the sequence.
+     * @return The next file pointer to process.
+     */
    public FilePointer next() {
-        if(!hasNext())
-            throw new NoSuchElementException("No next element available in interval sharder");
-        FilePointer currentFilePointer = nextFilePointer;
-        advance();
-        return currentFilePointer;
-    }
-
-    public void remove() {
-        throw new UnsupportedOperationException("Unable to remove FilePointers from an IntervalSharder");
-    }
-
-    private void advance() {
-        if(loci.isEmpty())
-            return;
-
-        nextFilePointer = null;
-        while(nextFilePointer == null && currentLocus != null) {
-            // special case handling of the unmapped shard.
-            if(currentLocus == GenomeLoc.UNMAPPED) {
-                nextFilePointer = new FilePointer(GenomeLoc.UNMAPPED);
-                for(SAMReaderID id: dataSource.getReaderIDs())
-                    nextFilePointer.addFileSpans(id,null);
-                currentLocus = null;
-                continue;
-            }
-
-            nextFilePointer = new FilePointer(currentLocus.getContig());
-
-            int coveredRegionStart = 1;
-            int coveredRegionStop = Integer.MAX_VALUE;
-            GenomeLoc coveredRegion = null;
-
-            BAMScheduleEntry scheduleEntry = getNextOverlappingBAMScheduleEntry(indices,currentLocus);
-
-            // No overlapping data at all.
-            if(scheduleEntry != null) {
-                coveredRegionStart = Math.max(coveredRegionStart,scheduleEntry.start);
-                coveredRegionStop = Math.min(coveredRegionStop,scheduleEntry.stop);
-                coveredRegion = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStart,coveredRegionStop);
-
-                nextFilePointer.addFileSpans(scheduleEntry.fileSpans);
-            }
-            else {
-                // Always create a file span, whether there was covered data or not.  If there was no covered data, then the binTree is empty.
-                //System.out.printf("Shard: index file = %s; reference sequence = %d; ",index.getIndexFile(),currentLocus.getContigIndex());
-                for(SAMReaderID reader: indices.keySet())
-                    nextFilePointer.addFileSpans(reader,new GATKBAMFileSpan());
-            }
-
-            // Early exit if no bins were found.
-            if(coveredRegion == null) {
-                // for debugging only: maximum split is 16384.                
-                if(currentLocus.size() > 16384) {
-                    GenomeLoc[] splitContigs = currentLocus.split(currentLocus.getStart()+16384);
-                    nextFilePointer.addLocation(splitContigs[0]);
-                    currentLocus = splitContigs[1];
-                }
-                else {
-                    nextFilePointer.addLocation(currentLocus);
-                    currentLocus = locusIterator.hasNext() ? locusIterator.next() : null;
-                }
-                continue;
-            }
-
-            // Early exit if only part of the first interval was found.
-            if(currentLocus.startsBefore(coveredRegion)) {
-                // for debugging only: maximum split is 16384.
-                int splitPoint = Math.min(coveredRegion.getStart()-currentLocus.getStart(),16384)+currentLocus.getStart();
-                GenomeLoc[] splitContigs = currentLocus.split(splitPoint);
-                nextFilePointer.addLocation(splitContigs[0]);
-                currentLocus = splitContigs[1];
-                continue;
-            }
-
-            // Define the initial range of the file pointer, aka the region where the locus currently being processed intersects the BAM list.
-            GenomeLoc initialLocation = currentLocus.intersect(coveredRegion);
-            nextFilePointer.addLocation(initialLocation);
-
-            // See whether the BAM regions discovered overlap the next set of intervals in the interval list.  If so, include every overlapping interval.
-            if(!nextFilePointer.locations.isEmpty()) {
-                while(locusIterator.hasNext() && locusIterator.peek().overlapsP(coveredRegion)) {
-                    currentLocus = locusIterator.next();
-                    nextFilePointer.addLocation(currentLocus.intersect(coveredRegion));
-                }
-
-                // Chop off the uncovered portion of the locus.  Since we know that the covered region overlaps the current locus,
-                  // we can simplify the interval creation process to the end of the covered region to the stop of the given interval.
-                if(coveredRegionStop < currentLocus.getStop())
-                    currentLocus = loci.getGenomeLocParser().createGenomeLoc(currentLocus.getContig(),coveredRegionStop+1,currentLocus.getStop());
-                else if(locusIterator.hasNext())
-                    currentLocus = locusIterator.next();
-                else
-                    currentLocus = null;
-            }
-
-        }
-    }
-
-    
-    /**
-     * The last reference sequence processed by this iterator.
-     */
-    private Integer lastReferenceSequenceLoaded = null;
-
-    /**
-     * The stateful iterator used to progress through the genoem.
-     */
-    private PeekableIterator<BAMScheduleEntry> bamScheduleIterator = null;
-
-    /**
-     * Get the next overlapping tree of bins associated with the given BAM file.
-     * @param indices BAM index representation.
-     * @param currentLocus The actual locus for which to check overlap.
-     * @return The next schedule entry overlapping with the given list of loci.
-     */
-    private BAMScheduleEntry getNextOverlappingBAMScheduleEntry(final Map<SAMReaderID,GATKBAMIndex> indices, final GenomeLoc currentLocus) {
-        // Stale reference sequence or first invocation.  (Re)create the binTreeIterator.
-        if(lastReferenceSequenceLoaded == null || lastReferenceSequenceLoaded != currentLocus.getContigIndex()) {
-            if(bamScheduleIterator != null)
-                bamScheduleIterator.close();
-            lastReferenceSequenceLoaded = currentLocus.getContigIndex();
-
-            // Naive algorithm: find all elements in current contig for proper schedule creation.
-            List<GenomeLoc> lociInContig = new LinkedList<GenomeLoc>();
-            for(GenomeLoc locus: loci) {
-                if(locus.getContigIndex() == lastReferenceSequenceLoaded)
-                    lociInContig.add(locus);
-            }
-
-            bamScheduleIterator = new PeekableIterator<BAMScheduleEntry>(new BAMSchedule(indices,lociInContig));
-        }
-
-        if(!bamScheduleIterator.hasNext())
-            return null;
-
-        // Peek the iterator along until finding the first binTree at or following the current locus.
-        BAMScheduleEntry bamScheduleEntry = bamScheduleIterator.peek();
-        while(bamScheduleEntry != null && bamScheduleEntry.isBefore(currentLocus)) {
-            bamScheduleIterator.next();
-            bamScheduleEntry = bamScheduleIterator.hasNext() ? bamScheduleIterator.peek() : null;
-        }                                   
-
-        return (bamScheduleEntry != null && bamScheduleEntry.overlaps(currentLocus)) ? bamScheduleEntry : null;
+        FilePointer current = wrappedIterator.next();
+        while(wrappedIterator.hasNext() && current.minus(wrappedIterator.peek()) == 0)
+            current = current.combine(parser,wrappedIterator.next());
+        return current;
    }

+    public void remove() { throw new UnsupportedOperationException("Unable to remove from an interval sharder."); }
 }
--- a/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
+++ b/java/src/org/broadinstitute/sting/gatk/io/stubs/SAMFileWriterArgumentTypeDescriptor.java
@ -51,7 +51,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
    public static final String SIMPLIFY_BAM_FULLNAME = "simplifyBAM";
    public static final String SIMPLIFY_BAM_SHORTNAME = SIMPLIFY_BAM_FULLNAME;

-    public static final String CREATE_INDEX_FULLNAME = "disable_bam_indexing";
+    public static final String DISABLE_INDEXING_FULLNAME = "disable_bam_indexing";

    /**
     * The engine into which output stubs should be fed.
@ -82,7 +82,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
    public List<ArgumentDefinition> createArgumentDefinitions( ArgumentSource source ) {
        return Arrays.asList( createBAMArgumentDefinition(source),
                              createBAMCompressionArgumentDefinition(source),
-                              createWriteIndexArgumentDefinition(source),
+                              disableWriteIndexArgumentDefinition(source),
                              createSimplifyBAMArgumentDefinition(source));
    }

@ -117,7 +117,7 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
        if( compressionLevel != null )
            stub.setCompressionLevel(compressionLevel);

-        stub.setIndexOnTheFly(!argumentIsPresent(createWriteIndexArgumentDefinition(source),matches));
+        stub.setIndexOnTheFly(!argumentIsPresent(disableWriteIndexArgumentDefinition(source),matches));
        stub.setSimplifyBAM(argumentIsPresent(createSimplifyBAMArgumentDefinition(source),matches));

        // WARNING: Side effects required by engine!
@ -171,10 +171,10 @@ public class SAMFileWriterArgumentTypeDescriptor extends ArgumentTypeDescriptor
                                       null );
    }

-    private ArgumentDefinition createWriteIndexArgumentDefinition(ArgumentSource source) {
+    private ArgumentDefinition disableWriteIndexArgumentDefinition(ArgumentSource source) {
        return new ArgumentDefinition( ArgumentIOType.ARGUMENT,
                                       boolean.class,
-                                       CREATE_INDEX_FULLNAME,
+                                       DISABLE_INDEXING_FULLNAME,
                                       null,
                                       "Turn off on-the-fly creation of indices for output BAM files.",
                                       false,