Basic functionality for intervaled reads in new sharding system. Not currently filtering out cruft, so

the mode of operation is currently queryOverlapping rather than queryContained. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2899 348d0f76-0448-11de-a6fe-93d51630548a
2010-02-26 21:41:55 +00:00 · 2010-02-26 21:41:55 +00:00 · 30eb28886b
parent cfff486338
commit 30eb28886b
12 changed files with 197 additions and 531 deletions
--- a/java/src/net/sf/samtools/BAMBlockIterator.java
+++ b/java/src/net/sf/samtools/BAMBlockIterator.java
@ -1,95 +0,0 @@
-package net.sf.samtools;
-
-import net.sf.samtools.util.CloseableIterator;
-import net.sf.picard.PicardException;
-
-import java.util.NoSuchElementException;
-import java.io.IOException;
-import java.io.FileInputStream;
-import java.io.File;
-
-/**
- * Walks over a BAM file, discovering and returning the starting location of each block
- * in chunk format.
- *
- * @author mhanna
- * @version 0.1
- */
-public class BAMBlockIterator implements CloseableIterator<Block> {
-    /**
-     * The input stream for back files.
-     */
-    private FileInputStream inputStream;
-
-    /**
-     * File channel from which to read chunks.  
-     */
-    private BlockReader blockReader;
-
-    /**
-     * What is the current position of this block within the BAM file?
-     */
-    private long position = 0;
-
-    /**
-     * Iterate through the BAM chunks in a file.
-     * @param file stream File to use when accessing the BAM.
-     * @throws IOException in case of problems reading from the file.
-     */
-    public BAMBlockIterator(File file) throws IOException {
-        inputStream = new FileInputStream(file);
-        blockReader = new BlockReader(inputStream);
-    }
-
-    /**
-     * Close the existing file.
-     */
-    public void close() {
-        try {
-            inputStream.close();
-        }
-        catch(IOException ex) {
-            throw new PicardException("Unable to close file", ex);
-        }
-    }
-
-    /**
-     * Are there other chunks to retrieve in this file?
-     * @return True if other chunks are available, false otherwise.
-     */
-    public boolean hasNext() {
-        try {
-            return !blockReader.eof(position);
-        } catch(IOException ex) {
-            throw new SAMException("Unable to check file for a next BAM record", ex);
-        }
-    }
-
-    /**
-     * Get the next chunk from the iterator.
-     * @return The next chunk.
-     * @throws NoSuchElementException if no next chunk is available.
-     */
-    public Block next() {
-        if(!hasNext())
-            throw new NoSuchElementException("No next chunk is available.");
-
-        Block block = null;
-        try {
-            block = blockReader.getBlockAt(position);
-            position = block.position + block.compressedBlockSize;
-        }
-        catch(IOException ex) {
-            throw new SAMException("Unable to completely read chunk at end of file.", ex);            
-        }
-        return block;
-    }
-
-    /**
-     * Remove a chunk from the iterator.
-     * @throws UnsupportedOperationException always.
-     */
-    public void remove() {
-        throw new UnsupportedOperationException("BAMChunkIterator: Cannot remove a BAM chunk.");
-    }
-}
--- a/java/src/net/sf/samtools/BAMChunkIterator.java
+++ b/java/src/net/sf/samtools/BAMChunkIterator.java
@ -1,88 +0,0 @@
-package net.sf.samtools;
-
-import java.util.*;
-
-/**
- * Adapts a list of BAM blocks to BAM chunks.
- *
- * @author mhanna
- * @version 0.1
- */
-public class BAMChunkIterator implements Iterator<Chunk> {
-    /**
-     * The wrapped chunk iterator.
-     */
-    private final BAMBlockIterator blockIterator;
-
-    /**
-     * List of prefetched segments.  If prefetchedSegments is
-     * empty, this should mean that there's nothing left in the queue.
-     */
-    private Queue<BlockSegment> prefetchedSegments;
-
-    /**
-     * List of chunks to filter out of the final chunk list.
-     */
-    private final Queue<Chunk> filters;
-
-    /**
-     * Creates a new chunk iterator wrapping the given block, optionally filtering
-     * out a list of chunks from those blocks.
-     * TODO: This class is too smart.  Wrap filtering functionality in another class.
-     * @param blockIterator block iterator to wrap.
-     * @param filters List of chunks to filter out of the given input stream.
-     */
-    public BAMChunkIterator(BAMBlockIterator blockIterator, List<Chunk> filters) {
-        this.blockIterator = blockIterator;
-        this.prefetchedSegments = new LinkedList<BlockSegment>();
-        this.filters = new PriorityQueue<Chunk>(filters);
-        seedNextSegments();
-    }
-
-    /**
-     * Is a next chunk available.
-     * @return true if a next chunk is available; false otherwise.
-     */
-    public boolean hasNext() {
-        return !prefetchedSegments.isEmpty();    
-    }
-
-    /**
-     * Returns the next chunk in the list.
-     * @return Next block, adapted to a chunk.
-     */
-    public Chunk next() {
-        if(prefetchedSegments.isEmpty())
-            throw new NoSuchElementException("BAMChunkIterator is exhausted.");
-        // TODO: Maybe tie together adjacent chunks together?
-        Chunk nextChunk = prefetchedSegments.remove().toChunk();
-        seedNextSegments();
-        return nextChunk;
-    }
-
-    /**
-     * @throws UnsupportedOperationException always.
-     */
-    public void remove() {
-        throw new UnsupportedOperationException("Cannot remove from a BAM chunk iterator");
-    }
-
-    /**
-     * Seed the next value or set of values for the iterator, if necessary.
-     */
-    private void seedNextSegments() {
-        while(prefetchedSegments.isEmpty() && blockIterator.hasNext()) {
-            // Add the new segments to the prefetch...
-            prefetchedSegments.add(new BlockSegment(blockIterator.next()));
-
-            // And iteratively subtract the filtering chunks.
-            for(Chunk filter: filters) {
-                Queue<BlockSegment> filteredBlockSegments = new LinkedList<BlockSegment>();
-                for(BlockSegment blockSegment: prefetchedSegments)
-                    filteredBlockSegments.addAll(blockSegment.minus(filter));
-                prefetchedSegments = filteredBlockSegments;
-            }
-        }
-    }
-
-}
--- a/java/src/net/sf/samtools/BAMFileHeaderLoader.java
+++ b/java/src/net/sf/samtools/BAMFileHeaderLoader.java
@ -1,127 +0,0 @@
-package net.sf.samtools;
-
-import net.sf.samtools.util.BlockCompressedInputStream;
-import net.sf.samtools.util.BinaryCodec;
-import net.sf.samtools.util.StringLineReader;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.DataInputStream;
-import java.util.Arrays;
-import java.util.List;
-import java.util.ArrayList;
-
-/**
- * Loads a BAM file header from an file, optionally providing its position
- * within the file.
- *
- * @author mhanna
- * @version 0.1
- */
-public class BAMFileHeaderLoader {
-    /**
-     * The contents of the BAM file header.
-     */
-    private final SAMFileHeader header;
-
-    /**
-     * Location of the header within the BAM file.
-     */
-    private final Chunk location;
-
-    public static final Chunk preambleLocation = new Chunk(0<<16 | 0, 0<<16 | 3);
-
-    /**
-     * Load the header from the given file.
-     * @param header the parsed haeder for the BAM file.
-     * @param location the location of the header (start and stop) within the BAM.
-     */
-    private BAMFileHeaderLoader(SAMFileHeader header, Chunk location) {
-        this.header = header;
-        this.location = location;
-    }
-
-    /**
-     * Gets the header for the given BAM file.
-     * @return The header for this BAM file.
-     */
-    public SAMFileHeader getHeader() {
-        return header;
-    }
-
-    /**
-     * Gets the location of the header within the given BAM file, in chunk format.
-     * @return the location of the header, in chunk coordinates.
-     */
-    public Chunk getLocation() {
-        return location;
-    }
-
-    public static BAMFileHeaderLoader load(File file) throws IOException {
-        BlockCompressedInputStream inputStream = new BlockCompressedInputStream(file);
-        BinaryCodec binaryCodec = new BinaryCodec(new DataInputStream(inputStream));
-
-        final byte[] buffer = new byte[4];
-        binaryCodec.readBytes(buffer);
-        if (!Arrays.equals(buffer, BAMFileConstants.BAM_MAGIC)) {
-            throw new IOException("Invalid BAM file header");
-        }
-
-        final int headerTextLength = binaryCodec.readInt();
-        final String textHeader = binaryCodec.readString(headerTextLength);
-        final SAMTextHeaderCodec headerCodec = new SAMTextHeaderCodec();
-        headerCodec.setValidationStringency(SAMFileReader.ValidationStringency.SILENT);
-        SAMFileHeader header = headerCodec.decode(new StringLineReader(textHeader),file.getAbsolutePath());
-
-        // directly copied from BAMFileReader...
-        final int sequenceCount = binaryCodec.readInt();
-        if (header.getSequenceDictionary().size() > 0) {
-            // It is allowed to have binary sequences but no text sequences, so only validate if both are present
-            if (sequenceCount != header.getSequenceDictionary().size()) {
-                throw new SAMFormatException("Number of sequences in text header (" +
-                        header.getSequenceDictionary().size() +
-                        ") != number of sequences in binary header (" + sequenceCount + ") for file " + file);
-            }
-            for (int i = 0; i < sequenceCount; i++) {
-                final SAMSequenceRecord binarySequenceRecord = readSequenceRecord(binaryCodec,file);
-                final SAMSequenceRecord sequenceRecord = header.getSequence(i);
-                if (!sequenceRecord.getSequenceName().equals(binarySequenceRecord.getSequenceName())) {
-                    throw new SAMFormatException("For sequence " + i + ", text and binary have different names in file " +
-                            binaryCodec);
-                }
-                if (sequenceRecord.getSequenceLength() != binarySequenceRecord.getSequenceLength()) {
-                    throw new SAMFormatException("For sequence " + i + ", text and binary have different lengths in file " +
-                            binaryCodec);
-                }
-            }
-        } else {
-            // If only binary sequences are present, copy them into mFileHeader
-            final List<SAMSequenceRecord> sequences = new ArrayList<SAMSequenceRecord>(sequenceCount);
-            for (int i = 0; i < sequenceCount; i++) {
-                sequences.add(readSequenceRecord(binaryCodec,file));
-            }
-            header.setSequenceDictionary(new SAMSequenceDictionary(sequences));
-        }
-        inputStream.close();
-
-        return new BAMFileHeaderLoader(header,new Chunk(buffer.length,inputStream.getFilePointer()-1));
-    }
-
-    /**
-     * Reads a single binary sequence record from the file or stream
-     * @param binaryCodec stream to read from.
-     * @param file  Note that this is used only for reporting errors.
-     * @return an individual sequence record.
-     */
-    private static SAMSequenceRecord readSequenceRecord(final BinaryCodec binaryCodec, final File file) {
-        final int nameLength = binaryCodec.readInt();
-        if (nameLength <= 1) {
-            throw new SAMFormatException("Invalid BAM file header: missing sequence name in file " + file.getAbsolutePath());
-        }
-        final String sequenceName = binaryCodec.readString(nameLength - 1);
-        // Skip the null terminator
-        binaryCodec.readByte();
-        final int sequenceLength = binaryCodec.readInt();
-        return new SAMSequenceRecord(sequenceName, sequenceLength);
-    }
-}
--- a/java/src/net/sf/samtools/BlockTestHarness.java
+++ b/java/src/net/sf/samtools/BlockTestHarness.java
@ -1,56 +0,0 @@
-package net.sf.samtools;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.Collections;
-
-/**
- * Test harness for playing with sharding by BGZF block.
- *
- * @author mhanna
- * @version 0.1
- */
-public class BlockTestHarness {
-    private static void usage() {
-        System.out.println("Usage: ChunkTestHarness <filename>.bam");
-        System.exit(1);
-    }
-
-    public static void main(String args[]) throws IOException {
-        if(args.length == 0)
-            usage();
-
-        String bamFileName = args[0];
-        if(!bamFileName.endsWith(".bam"))
-            usage();
-
-        File bamFile = new File(bamFileName);
-        if(!bamFile.exists())
-            usage();
-
-        Chunk headerLocation = BAMFileHeaderLoader.load(bamFile).getLocation();
-        System.out.printf("Header location = %s%n", headerLocation);
-
-        BAMChunkIterator chunkIterator = new BAMChunkIterator(new BAMBlockIterator(bamFile),
-                                                              Collections.singletonList(headerLocation));
-        while(chunkIterator.hasNext()) {
-            Chunk chunk = chunkIterator.next();
-        }
-
-        // The following test code blocks a bunch of files together.
-        /*
-        BAMBlockIterator blockIterator = new BAMBlockIterator(bamFile);
-        long blockCount = 0;
-
-        long startTime = System.currentTimeMillis();
-        while(blockIterator.hasNext()) {
-            Block block = blockIterator.next();
-            blockCount++;
-            System.out.println(block);
-        }
-        long endTime = System.currentTimeMillis();
-
-        System.out.printf("Number of chunks: %d; elapsed time: %dms%n", blockCount, endTime-startTime);
-        */
-    }
-}
--- a/java/src/net/sf/samtools/Chunk.java
+++ b/java/src/net/sf/samtools/Chunk.java
@ -30,7 +30,7 @@ public class Chunk implements Cloneable,Comparable<Chunk> {
        return mChunkStart;
    }

-    void setChunkStart(final long value) {
+    public void setChunkStart(final long value) {
        mChunkStart = value;
    }

@ -38,7 +38,7 @@ public class Chunk implements Cloneable,Comparable<Chunk> {
        return mChunkEnd;
    }

-    void setChunkEnd(final long value) {
+    public void setChunkEnd(final long value) {
        mChunkEnd = value;
    }

--- a/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/BAMFileStat.java
@ -47,8 +47,7 @@ public class BAMFileStat extends CommandLineProgram {

        switch(command) {
            case ShowBlocks:
-                showBlocks(new File(bamFileName),startPosition,stopPosition);
-                break;
+                throw new StingException("The BAM block inspector has been disabled.");
            case ShowIndex:
                showIndexBins(new File(bamFileName+".bai"));
                break;
@ -70,26 +69,6 @@ public class BAMFileStat extends CommandLineProgram {
        }
    }

-    private void showBlocks(File bamFile, Integer startPosition, Integer stopPosition) {
-        int blockNumber = 0;
-
-        try {
-            BAMBlockIterator iterator = new BAMBlockIterator(bamFile);
-            while(iterator.hasNext()) {
-                Block block = iterator.next();
-                blockNumber++;
-
-                if(startPosition != null && startPosition > blockNumber) continue;
-                if(stopPosition != null && stopPosition < blockNumber) break;
-
-                System.out.printf("Block number = %d; position = %d; compressed size = %d; uncompressed size = %d%n", blockNumber, block.position, block.compressedBlockSize, block.uncompressedBlockSize);
-            }
-        }
-        catch(IOException ex) {
-            throw new StingException("Unable to open BAM file");
-        }
-    }
-
    private void showIndexBins(File bamFile) {
        BAMFileIndexContentInspector inspector = new BAMFileIndexContentInspector(bamFile);
        inspector.inspect(System.out,null,null);
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BlockDelimitedReadShard.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/BlockDelimitedReadShard.java
@ -34,9 +34,16 @@ public class BlockDelimitedReadShard extends ReadShard implements BAMFormatAware
     */
    private final Collection<SAMRecord> reads = new ArrayList<SAMRecord>(BlockDelimitedReadShardStrategy.MAX_READS);

-    public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks) {
+    /**
+     * An BlockDelimitedLocusShard can be used either for READ or READ shard types.
+     * Track which type is being used.
+     */
+    private final Shard.ShardType shardType;        
+
+    public BlockDelimitedReadShard(Reads sourceInfo, Map<SAMFileReader2,List<Chunk>> chunks, Shard.ShardType shardType) {
        this.sourceInfo = sourceInfo;
        this.chunks = chunks;
+        this.shardType = shardType;
    }

    /**
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/BlockDelimitedReadShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/BlockDelimitedReadShardStrategy.java
@ -5,6 +5,7 @@ import net.sf.samtools.*;
 import java.util.*;

 import org.broadinstitute.sting.utils.StingException;
+import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;

@ -26,6 +27,9 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
     */
    protected final BlockDrivenSAMDataSource dataSource;

+    /** our storage of the genomic locations they'd like to shard over */
+    private final List<FilePointer> filePointers = new ArrayList<FilePointer>();
+
    /**
     * Position of the last shard in the file.
     */
@ -40,12 +44,14 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
     * Create a new read shard strategy, loading read shards from the given BAM file.
     * @param dataSource Data source from which to load shards.
     */
-    public BlockDelimitedReadShardStrategy(SAMDataSource dataSource) {
+    public BlockDelimitedReadShardStrategy(SAMDataSource dataSource, GenomeLocSortedSet locations) {
        if(!(dataSource instanceof BlockDrivenSAMDataSource))
            throw new StingException("Block-delimited read shard strategy cannot work without a block-driven data source.");

        this.dataSource = (BlockDrivenSAMDataSource)dataSource;
-        this.position = this.dataSource.getCurrentPosition();        
+        this.position = this.dataSource.getCurrentPosition();
+        if(locations != null)
+            filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList(),this.dataSource.getNumIndexLevels()-1));
    }

    /**
@ -65,17 +71,41 @@ public class BlockDelimitedReadShardStrategy extends ReadShardStrategy {
        if(!hasNext())
            throw new NoSuchElementException("No such element available: SAM reader has arrived at last shard.");

-        // TODO: This level of processing should not be necessary.
-        Map<SAMFileReader2,List<Chunk>> boundedPosition = new HashMap<SAMFileReader2,List<Chunk>>();
-        for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
-            boundedPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
+        Map<SAMFileReader2,List<Chunk>> shardPosition = null;
+        if(!filePointers.isEmpty()) {
+            boolean foundData = false;
+            for(FilePointer filePointer: filePointers) {
+                shardPosition = dataSource.getFilePointersBounding(filePointer.bin);
+                for(SAMFileReader2 reader: shardPosition.keySet()) {
+                    List<Chunk> chunks = shardPosition.get(reader);
+                    Chunk filePosition = position.get(reader);
+                    for(Chunk chunk: chunks) {
+                        if(filePosition.getChunkStart() > chunk.getChunkEnd())
+                            chunks.remove(chunk);
+                        else {
+                            if(filePosition.getChunkStart() > chunk.getChunkStart())
+                                chunk.setChunkStart(filePosition.getChunkStart());
+                            foundData = true;
+                        }
+                    }
+                }
+                if(foundData)
+                    break;
+            }
+        }
+        else {
+            // TODO: This level of processing should not be necessary.
+            shardPosition = new HashMap<SAMFileReader2,List<Chunk>>();
+            for(Map.Entry<SAMFileReader2,Chunk> entry: position.entrySet())
+                shardPosition.put(entry.getKey(),Collections.singletonList(entry.getValue()));
+        }

-        BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),boundedPosition);
+        BAMFormatAwareShard shard = new BlockDelimitedReadShard(dataSource.getReadsInfo(),shardPosition,Shard.ShardType.READ);
        atEndOfStream = dataSource.fillShard(shard);

        this.position = dataSource.getCurrentPosition();

-        return shard;
+        return shard;        
    }

    /**
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IndexDelimitedLocusShardStrategy.java
@ -1,16 +1,13 @@
 package org.broadinstitute.sting.gatk.datasources.shards;

-import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocSortedSet;
 import org.broadinstitute.sting.utils.StingException;
-import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;

 import java.util.*;

 import net.sf.samtools.Chunk;
-import net.sf.samtools.Bin;
 import net.sf.samtools.SAMFileReader2;

 /*
@ -46,7 +43,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
    /**
     * The data source to use when performing this sharding.
     */
-    private final BlockDrivenSAMDataSource blockDrivenDataSource;
+    private final BlockDrivenSAMDataSource dataSource;

    /** our storage of the genomic locations they'd like to shard over */
    private final List<FilePointer> filePointers = new ArrayList<FilePointer>();
@ -65,108 +62,11 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
        if(!(dataSource instanceof BlockDrivenSAMDataSource))
            throw new StingException("Cannot power an IndexDelimitedLocusShardStrategy with this data source.");

-        blockDrivenDataSource = (BlockDrivenSAMDataSource)dataSource;
-        filePointers.addAll(batchLociIntoBins(locations.toList(),blockDrivenDataSource.getNumIndexLevels()-1));
+        this.dataSource = (BlockDrivenSAMDataSource)dataSource;
+        filePointers.addAll(IntervalSharder.shardIntervals(this.dataSource,locations.toList(),this.dataSource.getNumIndexLevels()-1));
        filePointerIterator = filePointers.iterator();
    }

-    private List<FilePointer> batchLociIntoBins(final List<GenomeLoc> loci, final int binsDeeperThan) {
-        // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
-        List<FilePointer> filePointers = new ArrayList<FilePointer>();
-        FilePointer filePointer = null;
-
-        for(GenomeLoc location: loci) {
-            int locationStart = (int)location.getStart();
-            final int locationStop = (int)location.getStop();
-
-            List<Bin> bins = findBinsAtLeastAsDeepAs(blockDrivenDataSource.getOverlappingBins(location),binsDeeperThan);
-
-            // Recursive stopping condition -- algorithm is at the zero point and no bins have been found.
-            if(binsDeeperThan == 0 && bins.size() == 0) {
-                filePointers.add(new FilePointer(location));
-                continue;
-            }
-
-            // No bins found; step up a level and search again.
-            if(bins.size() == 0) {
-                if(filePointer != null && filePointer.locations.size() > 0) {
-                    filePointers.add(filePointer);
-                    filePointer = null;
-                }                
-
-                filePointers.addAll(batchLociIntoBins(Collections.singletonList(location),binsDeeperThan-1));
-                continue;
-            }
-
-            // Bins found; try to match bins with locations.
-            Collections.sort(bins);
-            Iterator<Bin> binIterator = bins.iterator();
-
-            while(locationStop >= locationStart) {
-                int binStart = filePointer!=null ? blockDrivenDataSource.getFirstLocusInBin(filePointer.bin) : 0;
-                int binStop = filePointer!=null ? blockDrivenDataSource.getLastLocusInBin(filePointer.bin) : 0;
-
-                while(binStop < locationStart && binIterator.hasNext()) {
-                    if(filePointer != null && filePointer.locations.size() > 0)
-                        filePointers.add(filePointer);
-
-                    filePointer = new FilePointer(binIterator.next());
-                    binStart = blockDrivenDataSource.getFirstLocusInBin(filePointer.bin);
-                    binStop = blockDrivenDataSource.getLastLocusInBin(filePointer.bin);
-                }
-
-                if(locationStart < binStart) {
-                    // The region starts before the first bin in the sequence.  Add the region occurring before the sequence.
-                    if(filePointer != null && filePointer.locations.size() > 0) {
-                        filePointers.add(filePointer);
-                        filePointer = null;
-                    }
-
-                    final int regionStop = Math.min(locationStop,binStart-1);
-
-                    GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop);
-                    filePointers.addAll(batchLociIntoBins(Collections.singletonList(subset),binsDeeperThan-1));
-
-                    locationStart = regionStop + 1;
-                }
-                else if(locationStart > binStop) {
-                    // The region starts after the last bin in the sequence.  Add the region occurring after the sequence.
-                    if(filePointer != null && filePointer.locations.size() > 0) {
-                        filePointers.add(filePointer);
-                        filePointer = null;
-                    }
-
-                    GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop);
-                    filePointers.addAll(batchLociIntoBins(Collections.singletonList(subset),binsDeeperThan-1));
-
-                    locationStart = locationStop + 1;
-                }
-                else {
-                    // The start of the region overlaps the bin.  Add the overlapping subset.
-                    final int regionStop = Math.min(locationStop,binStop);
-                    filePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),
-                            locationStart,
-                            regionStop));
-                    locationStart = regionStop + 1;
-                }
-            }
-        }
-
-        if(filePointer != null && filePointer.locations.size() > 0)
-            filePointers.add(filePointer);
-
-        return filePointers;
-    }
-
-    private List<Bin> findBinsAtLeastAsDeepAs(final List<Bin> bins, final int deepestBinLevel) {
-        List<Bin> deepestBins = new ArrayList<Bin>();
-        for(Bin bin: bins) {
-            if(blockDrivenDataSource.getLevelForBin(bin) >= deepestBinLevel)
-                deepestBins.add(bin);
-        }
-        return deepestBins;
-    }
-
    /**
     * returns true if there are additional shards
     *
@ -183,7 +83,7 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
     */
    public IndexDelimitedLocusShard next() {
        FilePointer nextFilePointer = filePointerIterator.next();
-        Map<SAMFileReader2,List<Chunk>> chunksBounding = blockDrivenDataSource.getFilePointersBounding(nextFilePointer.bin);
+        Map<SAMFileReader2,List<Chunk>> chunksBounding = dataSource.getFilePointersBounding(nextFilePointer.bin);
        return new IndexDelimitedLocusShard(nextFilePointer.locations,chunksBounding,Shard.ShardType.LOCUS_INTERVAL);
    }

@ -200,28 +100,4 @@ public class IndexDelimitedLocusShardStrategy implements ShardStrategy {
    public Iterator<Shard> iterator() {
        return this;
    }
-
-    /**
-     * Represents a small section of a BAM file, and every associated interval.
-     */
-    private class FilePointer {
-        private final Bin bin;
-        private final List<GenomeLoc> locations;
-
-        public FilePointer(Bin bin) {
-            this.bin = bin;
-            this.locations = new ArrayList<GenomeLoc>();
-        }
-
-        public FilePointer(GenomeLoc location) {
-            bin = null;
-            locations = Collections.singletonList(location);
-        }
-
-        public void addLocation(GenomeLoc location) {
-            locations.add(location);
-        }
-        
-    }
-
 }
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/IntervalSharder.java
@ -0,0 +1,142 @@
+package org.broadinstitute.sting.gatk.datasources.shards;
+
+import org.broadinstitute.sting.utils.GenomeLoc;
+import org.broadinstitute.sting.utils.GenomeLocParser;
+import org.broadinstitute.sting.gatk.datasources.simpleDataSources.BlockDrivenSAMDataSource;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Iterator;
+
+import net.sf.samtools.Bin;
+
+/**
+ * Shard intervals based on position within the BAM file.
+ *
+ * @author mhanna
+ * @version 0.1
+ */
+public class IntervalSharder {
+    protected static List<FilePointer> shardIntervals(final BlockDrivenSAMDataSource dataSource, final List<GenomeLoc> loci, final int binsDeeperThan) {
+        // Gather bins for the given loci, splitting loci as necessary so that each falls into exactly one lowest-level bin.
+        List<FilePointer> filePointers = new ArrayList<FilePointer>();
+        FilePointer filePointer = null;
+
+        for(GenomeLoc location: loci) {
+            int locationStart = (int)location.getStart();
+            final int locationStop = (int)location.getStop();
+
+            List<Bin> bins = findBinsAtLeastAsDeepAs(dataSource,dataSource.getOverlappingBins(location),binsDeeperThan);
+
+            // Recursive stopping condition -- algorithm is at the zero point and no bins have been found.
+            if(binsDeeperThan == 0 && bins.size() == 0) {
+                filePointers.add(new FilePointer(location));
+                continue;
+            }
+
+            // No bins found; step up a level and search again.
+            if(bins.size() == 0) {
+                if(filePointer != null && filePointer.locations.size() > 0) {
+                    filePointers.add(filePointer);
+                    filePointer = null;
+                }
+
+                filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(location),binsDeeperThan-1));
+                continue;
+            }
+
+            // Bins found; try to match bins with locations.
+            Collections.sort(bins);
+            Iterator<Bin> binIterator = bins.iterator();
+
+            while(locationStop >= locationStart) {
+                int binStart = filePointer!=null ? dataSource.getFirstLocusInBin(filePointer.bin) : 0;
+                int binStop = filePointer!=null ? dataSource.getLastLocusInBin(filePointer.bin) : 0;
+
+                while(binStop < locationStart && binIterator.hasNext()) {
+                    if(filePointer != null && filePointer.locations.size() > 0)
+                        filePointers.add(filePointer);
+
+                    filePointer = new FilePointer(binIterator.next());
+                    binStart = dataSource.getFirstLocusInBin(filePointer.bin);
+                    binStop = dataSource.getLastLocusInBin(filePointer.bin);
+                }
+
+                if(locationStart < binStart) {
+                    // The region starts before the first bin in the sequence.  Add the region occurring before the sequence.
+                    if(filePointer != null && filePointer.locations.size() > 0) {
+                        filePointers.add(filePointer);
+                        filePointer = null;
+                    }
+
+                    final int regionStop = Math.min(locationStop,binStart-1);
+
+                    GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,regionStop);
+                    filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(subset),binsDeeperThan-1));
+
+                    locationStart = regionStop + 1;
+                }
+                else if(locationStart > binStop) {
+                    // The region starts after the last bin in the sequence.  Add the region occurring after the sequence.
+                    if(filePointer != null && filePointer.locations.size() > 0) {
+                        filePointers.add(filePointer);
+                        filePointer = null;
+                    }
+
+                    GenomeLoc subset = GenomeLocParser.createGenomeLoc(location.getContig(),locationStart,locationStop);
+                    filePointers.addAll(shardIntervals(dataSource,Collections.singletonList(subset),binsDeeperThan-1));
+
+                    locationStart = locationStop + 1;
+                }
+                else {
+                    // The start of the region overlaps the bin.  Add the overlapping subset.
+                    final int regionStop = Math.min(locationStop,binStop);
+                    filePointer.addLocation(GenomeLocParser.createGenomeLoc(location.getContig(),
+                            locationStart,
+                            regionStop));
+                    locationStart = regionStop + 1;
+                }
+            }
+        }
+
+        if(filePointer != null && filePointer.locations.size() > 0)
+            filePointers.add(filePointer);
+
+        return filePointers;
+    }
+
+    private static List<Bin> findBinsAtLeastAsDeepAs(final BlockDrivenSAMDataSource dataSource, final List<Bin> bins, final int deepestBinLevel) {
+        List<Bin> deepestBins = new ArrayList<Bin>();
+        for(Bin bin: bins) {
+            if(dataSource.getLevelForBin(bin) >= deepestBinLevel)
+                deepestBins.add(bin);
+        }
+        return deepestBins;
+    }
+}
+
+/**
+ * Represents a small section of a BAM file, and every associated interval.
+ */
+class FilePointer {
+    protected final Bin bin;
+    protected final List<GenomeLoc> locations;
+
+    public FilePointer(Bin bin) {
+        this.bin = bin;
+        this.locations = new ArrayList<GenomeLoc>();
+    }
+
+    public FilePointer(GenomeLoc location) {
+        bin = null;
+        locations = Collections.singletonList(location);
+    }
+
+    public void addLocation(GenomeLoc location) {
+        locations.add(location);
+    }
+
+}
+
+
--- a/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java
+++ b/java/src/org/broadinstitute/sting/gatk/datasources/shards/ShardStrategyFactory.java
@ -78,7 +78,7 @@ public class ShardStrategyFactory {
            case LOCUS_EXPERIMENTAL:
                throw new UnsupportedOperationException("Cannot do experimental locus sharding without intervals");
            case READS_EXPERIMENTAL:
-                return new BlockDelimitedReadShardStrategy(dataSource);
+                return new BlockDelimitedReadShardStrategy(dataSource,null);
            default:
                throw new StingException("Strategy: " + strat + " isn't implemented for this type of shatter request");
        }
@ -118,7 +118,7 @@ public class ShardStrategyFactory {
            case LOCUS_EXPERIMENTAL:
                return new IndexDelimitedLocusShardStrategy(dataSource,lst);
            case READS_EXPERIMENTAL:
-                throw new UnsupportedOperationException("Cannot do experimental read sharding with intervals");
+                return new BlockDelimitedReadShardStrategy(dataSource,lst);
            default:
                throw new StingException("Strategy: " + strat + " isn't implemented");
        }
--- a/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
+++ b/java/src/org/broadinstitute/sting/gatk/executive/LinearMicroScheduler.java
@ -3,7 +3,6 @@ package org.broadinstitute.sting.gatk.executive;
 import org.broadinstitute.sting.gatk.datasources.providers.ShardDataProvider;
 import org.broadinstitute.sting.gatk.datasources.shards.Shard;
 import org.broadinstitute.sting.gatk.datasources.shards.ShardStrategy;
-import org.broadinstitute.sting.gatk.datasources.shards.ReadShard;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.ReferenceOrderedDataSource;
 import org.broadinstitute.sting.gatk.datasources.simpleDataSources.SAMDataSource;
 import org.broadinstitute.sting.gatk.walkers.Walker;
@ -11,7 +10,6 @@ import org.broadinstitute.sting.gatk.io.DirectOutputTracker;
 import org.broadinstitute.sting.gatk.io.OutputTracker;
 import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
 import org.broadinstitute.sting.utils.fasta.IndexedFastaSequenceFile;
-import org.broadinstitute.sting.utils.GenomeLocSortedSet;

 import java.util.Collection;