Load the linear index without forcing the index for the entire contig to be
loaded into memory. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5349 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
a20ce1436d
commit
43567b7fe3
|
|
@ -66,6 +66,16 @@ public class BAMIndexBinIterator {
|
|||
*/
|
||||
private final int referenceSequence;
|
||||
|
||||
/**
|
||||
* Position of the linear index in the BAM on disk for the given reference sequence.
|
||||
*/
|
||||
private final long linearIndexPosition;
|
||||
|
||||
/**
|
||||
* Size of the linear index for the given reference sequence.
|
||||
*/
|
||||
private final int linearIndexSize;
|
||||
|
||||
/**
|
||||
* Size of a long in bytes.
|
||||
*/
|
||||
|
|
@ -95,28 +105,33 @@ public class BAMIndexBinIterator {
|
|||
FileChannel metaIndexChannel = metaIndex.getChannel();
|
||||
|
||||
// zero out the contents of the file. Arrays of primitives in java are always zeroed out by default.
|
||||
byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*(Long.SIZE/8)]; // byte array is zeroed out by default.
|
||||
byte[] emptyContents = new byte[GATKBAMIndex.MAX_BINS*LONG_SIZE_IN_BYTES]; // byte array is zeroed out by default.
|
||||
metaIndexChannel.write(ByteBuffer.wrap(emptyContents));
|
||||
|
||||
ByteBuffer binPositionBuffer = ByteBuffer.allocate(emptyContents.length);
|
||||
binPositionBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
ByteBuffer positionBuffer = ByteBuffer.allocate(emptyContents.length);
|
||||
positionBuffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
// Write the positions of the bins in the index
|
||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||
long position = index.position();
|
||||
|
||||
final int indexBin = index.readInteger();
|
||||
|
||||
metaIndexChannel.position(indexBin*LONG_SIZE_IN_BYTES);
|
||||
binPositionBuffer.putLong(position);
|
||||
binPositionBuffer.flip();
|
||||
positionBuffer.putLong(position);
|
||||
positionBuffer.flip();
|
||||
|
||||
metaIndexChannel.write(binPositionBuffer);
|
||||
binPositionBuffer.flip();
|
||||
metaIndexChannel.write(positionBuffer);
|
||||
positionBuffer.flip();
|
||||
|
||||
final int nChunks = index.readInteger();
|
||||
index.skipBytes(16 * nChunks);
|
||||
}
|
||||
|
||||
// Read the size of the linear index and the first position.
|
||||
linearIndexSize = index.readInteger();
|
||||
linearIndexPosition = index.position();
|
||||
|
||||
metaIndexChannel.close();
|
||||
metaIndex.close();
|
||||
}
|
||||
|
|
@ -133,6 +148,55 @@ public class BAMIndexBinIterator {
|
|||
return new LevelIterator(level);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the linear index entry for the specified genomic start.
|
||||
* @param genomicStart Starting location for which to search.
|
||||
* @return Linear index entry corresponding to this genomic start.
|
||||
*/
|
||||
public long getLinearIndexEntry(final int genomicStart) {
|
||||
final int indexOffset = (genomicStart-1 >> 14);
|
||||
// If the number of bins is exceeded, don't perform any trimming.
|
||||
if(indexOffset >= linearIndexSize)
|
||||
return 0L;
|
||||
|
||||
FileInputStream indexInputStream;
|
||||
try {
|
||||
indexInputStream = new FileInputStream(indexFile);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to open index file for reading");
|
||||
}
|
||||
|
||||
try {
|
||||
indexInputStream.getChannel().position(linearIndexPosition+(indexOffset*LONG_SIZE_IN_BYTES));
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to position index file for reading");
|
||||
}
|
||||
|
||||
ByteBuffer indexReader = ByteBuffer.allocate(LONG_SIZE_IN_BYTES);
|
||||
indexReader.order(ByteOrder.LITTLE_ENDIAN);
|
||||
|
||||
try {
|
||||
indexInputStream.getChannel().read(indexReader);
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to position index file for reading");
|
||||
}
|
||||
|
||||
indexReader.flip();
|
||||
final long linearIndexEntry = indexReader.getLong();
|
||||
|
||||
try {
|
||||
indexInputStream.close();
|
||||
}
|
||||
catch(IOException ex) {
|
||||
throw new ReviewedStingException("Unable to close index file.");
|
||||
}
|
||||
|
||||
return linearIndexEntry;
|
||||
}
|
||||
|
||||
private class LevelIterator implements CloseableIterator<GATKBin> {
|
||||
/**
|
||||
* The raw BAM index file with unordered bins.
|
||||
|
|
|
|||
|
|
@ -53,10 +53,16 @@ public class BinTree {
|
|||
*/
|
||||
private final int binTreeStop;
|
||||
|
||||
public BinTree(final int binTreeStart, final int binTreeStop,final GATKBin[] bins) {
|
||||
/**
|
||||
* Linear index entry associated with this location.
|
||||
*/
|
||||
private final long linearIndexEntry;
|
||||
|
||||
public BinTree(final int binTreeStart, final int binTreeStop,final GATKBin[] bins, final long linearIndexEntry) {
|
||||
this.binTreeStart = binTreeStart;
|
||||
this.binTreeStop = binTreeStop;
|
||||
this.bins = bins;
|
||||
this.linearIndexEntry = linearIndexEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -91,6 +97,14 @@ public class BinTree {
|
|||
return binTreeStop;
|
||||
}
|
||||
|
||||
/**
|
||||
* The linear index entry associated with this bin tree.
|
||||
* @return Linear index entry.
|
||||
*/
|
||||
public long getLinearIndexEntry() {
|
||||
return linearIndexEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the location of this bin tree is before the given position.
|
||||
* @param locus Locus to test.
|
||||
|
|
@ -127,6 +141,11 @@ class BinTreeIterator implements Iterator<BinTree> {
|
|||
*/
|
||||
private final GATKBAMIndex index;
|
||||
|
||||
/**
|
||||
* Master iterator over the BAM index.
|
||||
*/
|
||||
private final BAMIndexBinIterator binIterator;
|
||||
|
||||
/**
|
||||
* Iterators over each individual level.
|
||||
*/
|
||||
|
|
@ -146,7 +165,7 @@ class BinTreeIterator implements Iterator<BinTree> {
|
|||
public BinTreeIterator(final GATKBAMIndex index, final File indexFile, final int referenceSequence) {
|
||||
this.index = index;
|
||||
|
||||
BAMIndexBinIterator binIterator = new BAMIndexBinIterator(index,indexFile,referenceSequence);
|
||||
binIterator = new BAMIndexBinIterator(index,indexFile,referenceSequence);
|
||||
levelIterators = new PeekableIterator[GATKBAMIndex.getNumIndexLevels()];
|
||||
for(int level = 0; level < GATKBAMIndex.getNumIndexLevels(); level++)
|
||||
levelIterators[level] = new PeekableIterator<GATKBin>(binIterator.getIteratorOverLevel(level));
|
||||
|
|
@ -158,6 +177,11 @@ class BinTreeIterator implements Iterator<BinTree> {
|
|||
advance();
|
||||
}
|
||||
|
||||
public void close() {
|
||||
for(PeekableIterator<GATKBin> levelIterator: levelIterators)
|
||||
levelIterator.close();
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return nextBinTree != null;
|
||||
}
|
||||
|
|
@ -212,7 +236,10 @@ class BinTreeIterator implements Iterator<BinTree> {
|
|||
for(int level = 0; level <= lowestLevel; level++) {
|
||||
if(bins[level] != null) {
|
||||
Bin lowestLevelBin = new Bin(bins[level].getReferenceSequence(),currentBinInLowestLevel);
|
||||
nextBinTree = new BinTree(index.getFirstLocusInBin(lowestLevelBin),index.getLastLocusInBin(lowestLevelBin),bins);
|
||||
final int firstLocusInBin = index.getFirstLocusInBin(lowestLevelBin);
|
||||
final int lastLocusInBin = index.getLastLocusInBin(lowestLevelBin);
|
||||
final long linearIndexEntry = binIterator.getLinearIndexEntry(firstLocusInBin);
|
||||
nextBinTree = new BinTree(firstLocusInBin,lastLocusInBin,bins,linearIndexEntry);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import net.sf.samtools.GATKChunk;
|
|||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
||||
import org.broadinstitute.sting.utils.GenomeLocSortedSet;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
|
@ -203,9 +204,13 @@ public class LowMemoryIntervalSharder implements Iterator<FilePointer> {
|
|||
for(GATKChunk chunk: bin.getChunkList())
|
||||
chunks.add(chunk.clone());
|
||||
}
|
||||
final long referenceLinearIndexMinimumOffset = index.getLinearIndex(initialRegion.getContigIndex()).getMinimumOffset(initialRegion.getStart());
|
||||
final long linearIndexMinimumOffset = binTree.getLinearIndexEntry();
|
||||
if(linearIndexMinimumOffset != referenceLinearIndexMinimumOffset)
|
||||
throw new ReviewedStingException(String.format("Loaded linear index offset is incorrect; is %d, should be %d",linearIndexMinimumOffset,referenceLinearIndexMinimumOffset));
|
||||
|
||||
// Optimize the chunk list with a linear index optimization
|
||||
chunks = index.optimizeChunkList(chunks,index.getLinearIndex(initialRegion.getContigIndex()).getMinimumOffset(initialRegion.getStart()));
|
||||
chunks = index.optimizeChunkList(chunks,linearIndexMinimumOffset);
|
||||
|
||||
GATKBAMFileSpan fileSpan = new GATKBAMFileSpan(chunks.toArray(new GATKChunk[chunks.size()]));
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue