From 411980a50a46aed7b54c36cc02509213505a894f Mon Sep 17 00:00:00 2001 From: hanna Date: Mon, 2 May 2011 20:48:16 +0000 Subject: [PATCH] Performance enhancements in GATKBAMIndex. Not sure these will assist in a normal use case, but they cut startup times and memory allocation noise in the profiler, making my profiling time more productive. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5726 348d0f76-0448-11de-a6fe-93d51630548a --- .../gatk/datasources/reads/GATKBAMIndex.java | 84 +++++++++++++------ 1 file changed, 58 insertions(+), 26 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java index ad758719e..46d6d97de 100644 --- a/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java +++ b/java/src/org/broadinstitute/sting/gatk/datasources/reads/GATKBAMIndex.java @@ -24,6 +24,7 @@ package org.broadinstitute.sting.gatk.datasources.reads; import net.sf.samtools.Bin; + import net.sf.samtools.GATKBAMFileSpan; import net.sf.samtools.GATKBin; import net.sf.samtools.GATKChunk; @@ -35,6 +36,7 @@ import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; +import java.lang.ref.SoftReference; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.nio.channels.FileChannel; @@ -84,8 +86,7 @@ public class GATKBAMIndex { // Verify the magic number. seek(fileChannel,0); - final byte[] buffer = new byte[4]; - readBytes(fileChannel,buffer); + final byte[] buffer = readBytes(fileChannel,4); if (!Arrays.equals(buffer, GATKBAMFileConstants.BAM_INDEX_MAGIC)) { throw new RuntimeException("Invalid file header in BAM index " + mFile + ": " + new String(buffer)); @@ -104,10 +105,12 @@ public class GATKBAMIndex { for (int binNumber = 0; binNumber < binCount; binNumber++) { final int indexBin = readInteger(fileChannel); final int nChunks = readInteger(fileChannel); + List chunks = new ArrayList(nChunks); + long[] rawChunkData = readLongs(fileChannel,nChunks*2); for (int ci = 0; ci < nChunks; ci++) { - final long chunkBegin = readLong(fileChannel); - final long chunkEnd = readLong(fileChannel); + final long chunkBegin = rawChunkData[ci*2]; + final long chunkEnd = rawChunkData[ci*2+1]; chunks.add(new GATKChunk(chunkBegin, chunkEnd)); } GATKBin bin = new GATKBin(referenceSequence, indexBin); @@ -118,9 +121,7 @@ public class GATKBAMIndex { } final int nLinearBins = readInteger(fileChannel); - long[] linearIndexEntries = new long[nLinearBins]; - for(int linearIndexOffset = 0; linearIndexOffset < nLinearBins; linearIndexOffset++) - linearIndexEntries[linearIndexOffset] = readLong(fileChannel); + long[] linearIndexEntries = readLongs(fileChannel,nLinearBins); linearIndex = new LinearIndex(referenceSequence,0,linearIndexEntries); @@ -293,8 +294,42 @@ public class GATKBAMIndex { } } - private void readBytes(final FileChannel fileChannel, final byte[] bytes) { - ByteBuffer buffer = ByteBuffer.wrap(bytes); + private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8; + private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8; + + private byte[] readBytes(final FileChannel fileChannel, int count) { + ByteBuffer buffer = getBuffer(count); + read(fileChannel,buffer); + buffer.flip(); + byte[] contents = new byte[count]; + buffer.get(contents); + return contents; + } + + private int readInteger(final FileChannel fileChannel) { + ByteBuffer buffer = getBuffer(INT_SIZE_IN_BYTES); + read(fileChannel,buffer); + buffer.flip(); + return buffer.getInt(); + } + + /** + * Reads an array of longs from the file channel, returning the results as an array. + * @param fileChannel The file backing the schedule. + * @param count Number of longs to read. + * @return An array of longs. Size of array should match count. + */ + private long[] readLongs(final FileChannel fileChannel, final int count) { + ByteBuffer buffer = getBuffer(count*LONG_SIZE_IN_BYTES); + read(fileChannel,buffer); + buffer.flip(); + long[] result = new long[count]; + for(int i = 0; i < count; i++) + result[i] = buffer.getLong(); + return result; + } + + private void read(final FileChannel fileChannel, final ByteBuffer buffer) { try { fileChannel.read(buffer); } @@ -303,27 +338,24 @@ public class GATKBAMIndex { } } - private static final int INT_SIZE_IN_BYTES = Integer.SIZE / 8; - private static final int LONG_SIZE_IN_BYTES = Long.SIZE / 8; - private ByteBuffer wrapBuffer(final byte[] bytes) { - ByteBuffer buffer = ByteBuffer.wrap(bytes); - buffer.order(ByteOrder.LITTLE_ENDIAN); + /** + * A reusable buffer for use by this index generator. + * TODO: Should this be a SoftReference? + */ + private ByteBuffer buffer = null; + + private ByteBuffer getBuffer(final int size) { + if(buffer == null || buffer.capacity() < size) { + // Allocate a new byte buffer. For now, make it indirect to make sure it winds up on the heap for easier debugging. + buffer = ByteBuffer.allocate(size); + buffer.order(ByteOrder.LITTLE_ENDIAN); + } + buffer.clear(); + buffer.limit(size); return buffer; } - private int readInteger(final FileChannel fileChannel) { - byte[] bytes = new byte[INT_SIZE_IN_BYTES]; - readBytes(fileChannel,bytes); - return wrapBuffer(bytes).getInt(); - } - - private long readLong(final FileChannel fileChannel) { - byte[] bytes = new byte[LONG_SIZE_IN_BYTES]; - readBytes(fileChannel,bytes); - return wrapBuffer(bytes).getLong(); - } - private void skipBytes(final FileChannel fileChannel, final int count) { try { fileChannel.position(fileChannel.position() + count);