Performance enhancements in GATKBAMIndex. Not sure these will assist in a

normal use case, but they cut startup times and memory allocation noise in
the profiler, making my profiling time more productive.


git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5726 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
hanna 2011-05-02 20:48:16 +00:00
parent 422d4ceeea
commit 411980a50a
1 changed files with 58 additions and 26 deletions

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.datasources.reads; package org.broadinstitute.sting.gatk.datasources.reads;
import net.sf.samtools.Bin; import net.sf.samtools.Bin;
import net.sf.samtools.GATKBAMFileSpan; import net.sf.samtools.GATKBAMFileSpan;
import net.sf.samtools.GATKBin; import net.sf.samtools.GATKBin;
import net.sf.samtools.GATKChunk; import net.sf.samtools.GATKChunk;
@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.IOException; import java.io.IOException;
import java.lang.ref.SoftReference;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.ByteOrder; import java.nio.ByteOrder;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
@ -84,8 +86,7 @@ public class GATKBAMIndex {
// Verify the magic number. // Verify the magic number.
seek(fileChannel,0); seek(fileChannel,0);
final byte[] buffer = new byte[4]; final byte[] buffer = readBytes(fileChannel,4);
readBytes(fileChannel,buffer);
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) { if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
throw new RuntimeException("Invalid file header in BAM index " + mFile + throw new RuntimeException("Invalid file header in BAM index " + mFile +
": " + new String(buffer)); ": " + new String(buffer));
@ -104,10 +105,12 @@ public class GATKBAMIndex {
for (int binNumber = 0; binNumber < binCount; binNumber++) { for (int binNumber = 0; binNumber < binCount; binNumber++) {
final int indexBin = readInteger(fileChannel); final int indexBin = readInteger(fileChannel);
final int nChunks = readInteger(fileChannel); final int nChunks = readInteger(fileChannel);
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks); List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
long[] rawChunkData = readLongs(fileChannel,nChunks*2);
for (int ci = 0; ci < nChunks; ci++) { for (int ci = 0; ci < nChunks; ci++) {
final long chunkBegin = readLong(fileChannel); final long chunkBegin = rawChunkData[ci*2];
final long chunkEnd = readLong(fileChannel); final long chunkEnd = rawChunkData[ci*2+1];
chunks.add(new GATKChunk(chunkBegin, chunkEnd)); chunks.add(new GATKChunk(chunkBegin, chunkEnd));
} }
GATKBin bin = new GATKBin(referenceSequence, indexBin); GATKBin bin = new GATKBin(referenceSequence, indexBin);
@ -118,9 +121,7 @@ public class GATKBAMIndex {
} }
final int nLinearBins = readInteger(fileChannel); final int nLinearBins = readInteger(fileChannel);
long[] linearIndexEntries = new long[nLinearBins]; long[] linearIndexEntries = readLongs(fileChannel,nLinearBins);
for(int linearIndexOffset = 0; linearIndexOffset < nLinearBins; linearIndexOffset++)
linearIndexEntries[linearIndexOffset] = readLong(fileChannel);
linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries); linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
@ -293,8 +294,42 @@ public class GATKBAMIndex {
} }
} }
private void readBytes(final FileChannel fileChannel, final byte[] bytes) { private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
ByteBuffer buffer = ByteBuffer.wrap(bytes); private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
private byte[] readBytes(final FileChannel fileChannel, int count) {
ByteBuffer buffer = getBuffer(count);
read(fileChannel,buffer);
buffer.flip();
byte[] contents = new byte[count];
buffer.get(contents);
return contents;
}
private int readInteger(final FileChannel fileChannel) {
ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
read(fileChannel,buffer);
buffer.flip();
return buffer.getInt();
}
/**
* Reads an array of <count> longs from the file channel, returning the results as an array.
* @param fileChannel The file backing the schedule.
* @param count Number of longs to read.
* @return An array of longs. Size of array should match count.
*/
private long[] readLongs(final FileChannel fileChannel, final int count) {
ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
read(fileChannel,buffer);
buffer.flip();
long[] result = new long[count];
for(int i = 0; i < count; i++)
result[i] = buffer.getLong();
return result;
}
private void read(final FileChannel fileChannel, final ByteBuffer buffer) {
try { try {
fileChannel.read(buffer); fileChannel.read(buffer);
} }
@ -303,27 +338,24 @@ public class GATKBAMIndex {
} }
} }
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
private ByteBuffer wrapBuffer(final byte[] bytes) { /**
ByteBuffer buffer = ByteBuffer.wrap(bytes); * A reusable buffer for use by this index generator.
buffer.order(ByteOrder.LITTLE_ENDIAN); * TODO: Should this be a SoftReference?
*/
private ByteBuffer buffer = null;
private ByteBuffer getBuffer(final int size) {
if(buffer == null || buffer.capacity() < size) {
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
buffer = ByteBuffer.allocate(size);
buffer.order(ByteOrder.LITTLE_ENDIAN);
}
buffer.clear();
buffer.limit(size);
return buffer; return buffer;
} }
private int readInteger(final FileChannel fileChannel) {
byte[] bytes = new byte[INT_SIZE_IN_BYTES];
readBytes(fileChannel,bytes);
return wrapBuffer(bytes).getInt();
}
private long readLong(final FileChannel fileChannel) {
byte[] bytes = new byte[LONG_SIZE_IN_BYTES];
readBytes(fileChannel,bytes);
return wrapBuffer(bytes).getLong();
}
private void skipBytes(final FileChannel fileChannel, final int count) { private void skipBytes(final FileChannel fileChannel, final int count) {
try { try {
fileChannel.position(fileChannel.position() + count); fileChannel.position(fileChannel.position() + count);