Performance enhancements in GATKBAMIndex. Not sure these will assist in a

normal use case, but they cut startup times and memory allocation noise in
the profiler, making my profiling time more productive.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5726 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-05-02 20:48:16 +00:00
parent 422d4ceeea
commit 411980a50a
1 changed files with 58 additions and 26 deletions

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.Bin;
import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.GATKBin;
import net.sf.samtools.GATKChunk;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.channels.FileChannel;
@ -84,8 +86,7 @@ public class GATKBAMIndex {
// Verify the magic number.
seek(fileChannel,0);
final byte[] buffer = new byte[4];
readBytes(fileChannel,buffer);
final byte[] buffer = readBytes(fileChannel,4);
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
throw new RuntimeException("Invalid file header in BAM index " + mFile +
": " + new String(buffer));
@ -104,10 +105,12 @@ public class GATKBAMIndex {
for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger(fileChannel);
final int nChunks = readInteger(fileChannel);
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
long[] rawChunkData = readLongs(fileChannel,nChunks*2);
for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong(fileChannel);
final long chunkEnd = readLong(fileChannel);
final long chunkBegin = rawChunkData[ci*2];
final long chunkEnd = rawChunkData[ci*2+1];
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
}
GATKBin bin = new GATKBin(referenceSequence, indexBin);
@ -118,9 +121,7 @@ public class GATKBAMIndex {
}
final int nLinearBins = readInteger(fileChannel);
long[] linearIndexEntries = new long[nLinearBins];
for(int linearIndexOffset = 0; linearIndexOffset < nLinearBins; linearIndexOffset++)
linearIndexEntries[linearIndexOffset] = readLong(fileChannel);
long[] linearIndexEntries = readLongs(fileChannel,nLinearBins);
linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
@ -293,8 +294,42 @@ public class GATKBAMIndex {
}
}
private void readBytes(final FileChannel fileChannel, final byte[] bytes) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
private byte[] readBytes(final FileChannel fileChannel, int count) {
ByteBuffer buffer = getBuffer(count);
read(fileChannel,buffer);
buffer.flip();
byte[] contents = new byte[count];
buffer.get(contents);
return contents;
}
private int readInteger(final FileChannel fileChannel) {
ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
read(fileChannel,buffer);
buffer.flip();
return buffer.getInt();
}
/**
* Reads an array of <count> longs from the file channel, returning the results as an array.
* @param fileChannel The file backing the schedule.
* @param count Number of longs to read.
* @return An array of longs. Size of array should match count.
*/
private long[] readLongs(final FileChannel fileChannel, final int count) {
ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
read(fileChannel,buffer);
buffer.flip();
long[] result = new long[count];
for(int i = 0; i < count; i++)
result[i] = buffer.getLong();
return result;
}
private void read(final FileChannel fileChannel, final ByteBuffer buffer) {
try {
fileChannel.read(buffer);
}
@ -303,27 +338,24 @@ public class GATKBAMIndex {
}
}
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
private ByteBuffer wrapBuffer(final byte[] bytes) {
ByteBuffer buffer = ByteBuffer.wrap(bytes);
buffer.order(ByteOrder.LITTLE_ENDIAN);
/**
* A reusable buffer for use by this index generator.
* TODO: Should this be a SoftReference?
*/
private ByteBuffer buffer = null;
private ByteBuffer getBuffer(final int size) {
if(buffer == null || buffer.capacity() < size) {
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
buffer = ByteBuffer.allocate(size);
buffer.order(ByteOrder.LITTLE_ENDIAN);
}
buffer.clear();
buffer.limit(size);
return buffer;
}
private int readInteger(final FileChannel fileChannel) {
byte[] bytes = new byte[INT_SIZE_IN_BYTES];
readBytes(fileChannel,bytes);
return wrapBuffer(bytes).getInt();
}
private long readLong(final FileChannel fileChannel) {
byte[] bytes = new byte[LONG_SIZE_IN_BYTES];
readBytes(fileChannel,bytes);
return wrapBuffer(bytes).getLong();
}
private void skipBytes(final FileChannel fileChannel, final int count) {
try {
fileChannel.position(fileChannel.position() + count);