diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexBinIterator.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexBinIterator.java index fbc7b8c3e..ef6f5f65e 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexBinIterator.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BAMIndexBinIterator.java @@ -66,6 +66,16 @@ public class BAMIndexBinIterator { */ private final int referenceSequence; + /** + * Position of the linear index in the BAM on disk for the given reference sequence. + */ + private final long linearIndexPosition; + + /** + * Size of the linear index for the given reference sequence. + */ + private final int linearIndexSize; + /** * Size of a long in bytes. */ @@ -95,28 +105,33 @@ public class BAMIndexBinIterator { FileChannel metaIndexChannel = metaIndex.getChannel(); // zero out the contents of the file. Arrays of primitives in java are always zeroed out by default. - byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*(Long.SIZE/8)]; // byte array is zeroed out by default. + byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*LONG_SIZE_IN_BYTES]; // byte array is zeroed out by default. metaIndexChannel.write(ByteBuffer.wrap(emptyContents)); - ByteBuffer binPositionBuffer = ByteBuffer.allocate(emptyContents.length); - binPositionBuffer.order(ByteOrder.LITTLE_ENDIAN); + ByteBuffer positionBuffer = ByteBuffer.allocate(emptyContents.length); + positionBuffer.order(ByteOrder.LITTLE_ENDIAN); + // Write the positions of the bins in the index for (int binNumber = 0; binNumber < binCount; binNumber++) { long position = index.position(); final int indexBin = index.readInteger(); metaIndexChannel.position(indexBin*LONG_SIZE_IN_BYTES); - binPositionBuffer.putLong(position); - binPositionBuffer.flip(); + positionBuffer.putLong(position); + positionBuffer.flip(); - metaIndexChannel.write(binPositionBuffer); - binPositionBuffer.flip(); + metaIndexChannel.write(positionBuffer); + positionBuffer.flip(); final int nChunks = index.readInteger(); index.skipBytes(16 * nChunks); } + // Read the size of the linear index and the first position. + linearIndexSize = index.readInteger(); + linearIndexPosition = index.position(); + metaIndexChannel.close(); metaIndex.close(); } @@ -133,6 +148,55 @@ public class BAMIndexBinIterator { return new LevelIterator(level); } + /** + * Gets the linear index entry for the specified genomic start. + * @param genomicStart Starting location for which to search. + * @return Linear index entry corresponding to this genomic start. + */ + public long getLinearIndexEntry(final int genomicStart) { + final int indexOffset = (genomicStart-1 >> 14); + // If the number of bins is exceeded, don't perform any trimming. + if(indexOffset >= linearIndexSize) + return 0L; + + FileInputStream indexInputStream; + try { + indexInputStream = new FileInputStream(indexFile); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to open index file for reading"); + } + + try { + indexInputStream.getChannel().position(linearIndexPosition+(indexOffset*LONG_SIZE_IN_BYTES)); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to position index file for reading"); + } + + ByteBuffer indexReader = ByteBuffer.allocate(LONG_SIZE_IN_BYTES); + indexReader.order(ByteOrder.LITTLE_ENDIAN); + + try { + indexInputStream.getChannel().read(indexReader); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to position index file for reading"); + } + + indexReader.flip(); + final long linearIndexEntry = indexReader.getLong(); + + try { + indexInputStream.close(); + } + catch(IOException ex) { + throw new ReviewedStingException("Unable to close index file."); + } + + return linearIndexEntry; + } + private class LevelIterator implements CloseableIterator { /** * The raw BAM index file with unordered bins. diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BinTree.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BinTree.java index 5f3e246d1..5d3a6238d 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/BinTree.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/BinTree.java @@ -53,10 +53,16 @@ public class BinTree { */ private final int binTreeStop; - public BinTree(final int binTreeStart, final int binTreeStop,final GATKBin[] bins) { + /** + * Linear index entry associated with this location. + */ + private final long linearIndexEntry; + + public BinTree(final int binTreeStart, final int binTreeStop,final GATKBin[] bins, final long linearIndexEntry) { this.binTreeStart = binTreeStart; this.binTreeStop = binTreeStop; this.bins = bins; + this.linearIndexEntry = linearIndexEntry; } /** @@ -91,6 +97,14 @@ public class BinTree { return binTreeStop; } + /** + * The linear index entry associated with this bin tree. + * @return Linear index entry. + */ + public long getLinearIndexEntry() { + return linearIndexEntry; + } + /** * Returns true if the location of this bin tree is before the given position. * @param locus Locus to test. @@ -127,6 +141,11 @@ class BinTreeIterator implements Iterator { */ private final GATKBAMIndex index; + /** + * Master iterator over the BAM index. + */ + private final BAMIndexBinIterator binIterator; + /** * Iterators over each individual level. */ @@ -146,7 +165,7 @@ class BinTreeIterator implements Iterator { public BinTreeIterator(final GATKBAMIndex index, final File indexFile, final int referenceSequence) { this.index = index; - BAMIndexBinIterator binIterator = new BAMIndexBinIterator(index,indexFile,referenceSequence); + binIterator = new BAMIndexBinIterator(index,indexFile,referenceSequence); levelIterators = new PeekableIterator[GATKBAMIndex.getNumIndexLevels()]; for(int level = 0; level < GATKBAMIndex.getNumIndexLevels(); level++) levelIterators[level] = new PeekableIterator(binIterator.getIteratorOverLevel(level)); @@ -158,6 +177,11 @@ class BinTreeIterator implements Iterator { advance(); } + public void close() { + for(PeekableIterator levelIterator: levelIterators) + levelIterator.close(); + } + public boolean hasNext() { return nextBinTree != null; } @@ -212,7 +236,10 @@ class BinTreeIterator implements Iterator { for(int level = 0; level <= lowestLevel; level++) { if(bins[level] != null) { Bin lowestLevelBin = new Bin(bins[level].getReferenceSequence(),currentBinInLowestLevel); - nextBinTree = new BinTree(index.getFirstLocusInBin(lowestLevelBin),index.getLastLocusInBin(lowestLevelBin),bins); + final int firstLocusInBin = index.getFirstLocusInBin(lowestLevelBin); + final int lastLocusInBin = index.getLastLocusInBin(lowestLevelBin); + final long linearIndexEntry = binIterator.getLinearIndexEntry(firstLocusInBin); + nextBinTree = new BinTree(firstLocusInBin,lastLocusInBin,bins,linearIndexEntry); break; } } diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java index 10d73216e..de02c9311 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/LowMemoryIntervalSharder.java @@ -31,6 +31,7 @@ import net.sf.samtools.GATKChunk; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocSortedSet; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.util.ArrayList; import java.util.Arrays; @@ -203,9 +204,13 @@ public class LowMemoryIntervalSharder implements Iterator { for(GATKChunk chunk: bin.getChunkList()) chunks.add(chunk.clone()); } + final long referenceLinearIndexMinimumOffset = index.getLinearIndex(initialRegion.getContigIndex()).getMinimumOffset(initialRegion.getStart()); + final long linearIndexMinimumOffset = binTree.getLinearIndexEntry(); + if(linearIndexMinimumOffset != referenceLinearIndexMinimumOffset) + throw new ReviewedStingException(String.format("Loaded linear index offset is incorrect; is %d, should be %d",linearIndexMinimumOffset,referenceLinearIndexMinimumOffset)); // Optimize the chunk list with a linear index optimization - chunks = index.optimizeChunkList(chunks,index.getLinearIndex(initialRegion.getContigIndex()).getMinimumOffset(initialRegion.getStart())); + chunks = index.optimizeChunkList(chunks,linearIndexMinimumOffset); GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunks.toArray(new GATKChunk[chunks.size()]));