Performance enhancements in GATKBAMIndex. Not sure these will assist in a
normal use case, but they cut startup times and memory allocation noise in the profiler, making my profiling time more productive. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5726 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
422d4ceeea
commit
411980a50a
|
|
@ -24,6 +24,7 @@
|
||||||
package org.broadinstitute.sting.gatk.datasources.reads;
|
package org.broadinstitute.sting.gatk.datasources.reads;
|
||||||
|
|
||||||
import net.sf.samtools.Bin;
|
import net.sf.samtools.Bin;
|
||||||
|
|
||||||
import net.sf.samtools.GATKBAMFileSpan;
|
import net.sf.samtools.GATKBAMFileSpan;
|
||||||
import net.sf.samtools.GATKBin;
|
import net.sf.samtools.GATKBin;
|
||||||
import net.sf.samtools.GATKChunk;
|
import net.sf.samtools.GATKChunk;
|
||||||
|
|
@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.lang.ref.SoftReference;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.ByteOrder;
|
import java.nio.ByteOrder;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
|
|
@ -84,8 +86,7 @@ public class GATKBAMIndex {
|
||||||
|
|
||||||
// Verify the magic number.
|
// Verify the magic number.
|
||||||
seek(fileChannel,0);
|
seek(fileChannel,0);
|
||||||
final byte[] buffer = new byte[4];
|
final byte[] buffer = readBytes(fileChannel,4);
|
||||||
readBytes(fileChannel,buffer);
|
|
||||||
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
|
if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) {
|
||||||
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
throw new RuntimeException("Invalid file header in BAM index " + mFile +
|
||||||
": " + new String(buffer));
|
": " + new String(buffer));
|
||||||
|
|
@ -104,10 +105,12 @@ public class GATKBAMIndex {
|
||||||
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
for (int binNumber = 0; binNumber < binCount; binNumber++) {
|
||||||
final int indexBin = readInteger(fileChannel);
|
final int indexBin = readInteger(fileChannel);
|
||||||
final int nChunks = readInteger(fileChannel);
|
final int nChunks = readInteger(fileChannel);
|
||||||
|
|
||||||
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
List<GATKChunk> chunks = new ArrayList<GATKChunk>(nChunks);
|
||||||
|
long[] rawChunkData = readLongs(fileChannel,nChunks*2);
|
||||||
for (int ci = 0; ci < nChunks; ci++) {
|
for (int ci = 0; ci < nChunks; ci++) {
|
||||||
final long chunkBegin = readLong(fileChannel);
|
final long chunkBegin = rawChunkData[ci*2];
|
||||||
final long chunkEnd = readLong(fileChannel);
|
final long chunkEnd = rawChunkData[ci*2+1];
|
||||||
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
chunks.add(new GATKChunk(chunkBegin, chunkEnd));
|
||||||
}
|
}
|
||||||
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
GATKBin bin = new GATKBin(referenceSequence, indexBin);
|
||||||
|
|
@ -118,9 +121,7 @@ public class GATKBAMIndex {
|
||||||
}
|
}
|
||||||
|
|
||||||
final int nLinearBins = readInteger(fileChannel);
|
final int nLinearBins = readInteger(fileChannel);
|
||||||
long[] linearIndexEntries = new long[nLinearBins];
|
long[] linearIndexEntries = readLongs(fileChannel,nLinearBins);
|
||||||
for(int linearIndexOffset = 0; linearIndexOffset < nLinearBins; linearIndexOffset++)
|
|
||||||
linearIndexEntries[linearIndexOffset] = readLong(fileChannel);
|
|
||||||
|
|
||||||
linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries);
|
||||||
|
|
||||||
|
|
@ -293,8 +294,42 @@ public class GATKBAMIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readBytes(final FileChannel fileChannel, final byte[] bytes) {
|
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
||||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
||||||
|
|
||||||
|
private byte[] readBytes(final FileChannel fileChannel, int count) {
|
||||||
|
ByteBuffer buffer = getBuffer(count);
|
||||||
|
read(fileChannel,buffer);
|
||||||
|
buffer.flip();
|
||||||
|
byte[] contents = new byte[count];
|
||||||
|
buffer.get(contents);
|
||||||
|
return contents;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int readInteger(final FileChannel fileChannel) {
|
||||||
|
ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES);
|
||||||
|
read(fileChannel,buffer);
|
||||||
|
buffer.flip();
|
||||||
|
return buffer.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads an array of <count> longs from the file channel, returning the results as an array.
|
||||||
|
* @param fileChannel The file backing the schedule.
|
||||||
|
* @param count Number of longs to read.
|
||||||
|
* @return An array of longs. Size of array should match count.
|
||||||
|
*/
|
||||||
|
private long[] readLongs(final FileChannel fileChannel, final int count) {
|
||||||
|
ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES);
|
||||||
|
read(fileChannel,buffer);
|
||||||
|
buffer.flip();
|
||||||
|
long[] result = new long[count];
|
||||||
|
for(int i = 0; i < count; i++)
|
||||||
|
result[i] = buffer.getLong();
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void read(final FileChannel fileChannel, final ByteBuffer buffer) {
|
||||||
try {
|
try {
|
||||||
fileChannel.read(buffer);
|
fileChannel.read(buffer);
|
||||||
}
|
}
|
||||||
|
|
@ -303,27 +338,24 @@ public class GATKBAMIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8;
|
|
||||||
private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8;
|
|
||||||
|
|
||||||
private ByteBuffer wrapBuffer(final byte[] bytes) {
|
/**
|
||||||
ByteBuffer buffer = ByteBuffer.wrap(bytes);
|
* A reusable buffer for use by this index generator.
|
||||||
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
* TODO: Should this be a SoftReference?
|
||||||
|
*/
|
||||||
|
private ByteBuffer buffer = null;
|
||||||
|
|
||||||
|
private ByteBuffer getBuffer(final int size) {
|
||||||
|
if(buffer == null || buffer.capacity() < size) {
|
||||||
|
// Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging.
|
||||||
|
buffer = ByteBuffer.allocate(size);
|
||||||
|
buffer.order(ByteOrder.LITTLE_ENDIAN);
|
||||||
|
}
|
||||||
|
buffer.clear();
|
||||||
|
buffer.limit(size);
|
||||||
return buffer;
|
return buffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
private int readInteger(final FileChannel fileChannel) {
|
|
||||||
byte[] bytes = new byte[INT_SIZE_IN_BYTES];
|
|
||||||
readBytes(fileChannel,bytes);
|
|
||||||
return wrapBuffer(bytes).getInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
private long readLong(final FileChannel fileChannel) {
|
|
||||||
byte[] bytes = new byte[LONG_SIZE_IN_BYTES];
|
|
||||||
readBytes(fileChannel,bytes);
|
|
||||||
return wrapBuffer(bytes).getLong();
|
|
||||||
}
|
|
||||||
|
|
||||||
private void skipBytes(final FileChannel fileChannel, final int count) {
|
private void skipBytes(final FileChannel fileChannel, final int count) {
|
||||||
try {
|
try {
|
||||||
fileChannel.position(fileChannel.position() + count);
|
fileChannel.position(fileChannel.position() + count);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue