BCF2 optimizations

-- Added Write method to BCF2 types that directly converts int value to byte stream.  Deleted writeRawBytes(int)
-- encodeTypeDescriptor semi-inlined into encodeType so that the tests for overflow are done in just one place
-- Faster implementation of determineIntegerType for int[] values
This commit is contained in:
Mark DePristo 2012-08-09 15:34:07 -04:00
parent c6bd9b15ff
commit 06258c8a01
5 changed files with 35 additions and 55 deletions

View File

@ -295,7 +295,7 @@ public final class BCF2Decoder {
return decodeIntArray(size, type, null);
}
public final double rawFloatToFloat(final int rawFloat) {
private double rawFloatToFloat(final int rawFloat) {
return (double)Float.intBitsToFloat(rawFloat);
}

View File

@ -57,7 +57,7 @@ public enum BCF2Type {
@Override
public void write(final int value, final OutputStream out) throws IOException {
//To change body of implemented methods use File | Settings | File Templates.
out.write(0xFF & value); // TODO -- do we need this operation?
}
},
@ -71,7 +71,9 @@ public enum BCF2Type {
@Override
public void write(final int value, final OutputStream out) throws IOException {
//To change body of implemented methods use File | Settings | File Templates.
// TODO -- optimization -- should we put this in a local buffer?
out.write((0x00FF & value));
out.write((0xFF00 & value) >> 8);
}
},
@ -87,7 +89,10 @@ public enum BCF2Type {
@Override
public void write(final int value, final OutputStream out) throws IOException {
//To change body of implemented methods use File | Settings | File Templates.
out.write((0x000000FF & value));
out.write((0x0000FF00 & value) >> 8);
out.write((0x00FF0000 & value) >> 16);
out.write((0xFF000000 & value) >> 24);
}
},
@ -99,7 +104,7 @@ public enum BCF2Type {
@Override
public void write(final int value, final OutputStream out) throws IOException {
//To change body of implemented methods use File | Settings | File Templates.
INT32.write(value, out);
}
},
@ -111,7 +116,7 @@ public enum BCF2Type {
@Override
public void write(final int value, final OutputStream out) throws IOException {
//To change body of implemented methods use File | Settings | File Templates.
INT8.write(value, out);
}
};

View File

@ -95,10 +95,9 @@ public final class BCF2Utils {
return dict;
}
@Requires({"nElements >= 0", "type != null"})
@Requires({"nElements >= 0", "nElements <= OVERFLOW_ELEMENT_MARKER", "type != null"})
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements;
return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
return (byte)((0x0F & nElements) << 4 | (type.getID() & 0x0F));
}
@Ensures("result >= 0")
@ -216,18 +215,18 @@ public final class BCF2Utils {
@Ensures("result.isIntegerType()")
public static BCF2Type determineIntegerType(final int[] values) {
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
BCF2Type maxType = BCF2Type.INT8;
for ( final int value : values ) {
final BCF2Type type1 = determineIntegerType(value);
switch ( type1 ) {
case INT8: break;
case INT16: maxType = BCF2Type.INT16; break;
case INT32: return BCF2Type.INT32; // fast path for largest possible value
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
}
// find the min and max values in the array
int max = 0, min = 0;
for ( final int v : values ) {
if ( v > max ) max = v;
if ( v < min ) min = v;
}
return maxType;
final BCF2Type maxType = determineIntegerType(max);
final BCF2Type minType = determineIntegerType(min);
// INT8 < INT16 < INT32 so this returns the larger of the two
return maxType.compareTo(minType) >= 0 ? maxType : minType;
}
/**
@ -281,31 +280,4 @@ public final class BCF2Utils {
else if ( o instanceof List ) return (List<Object>)o;
else return Collections.singletonList(o);
}
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
switch ( type.getSizeInBytes() ) {
case 1:
encodeStream.write(0xFF & value);
break;
case 2:
encodeStream.write((0x00FF & value));
encodeStream.write((0xFF00 & value) >> 8);
break;
case 4:
encodeStream.write((0x000000FF & value));
encodeStream.write((0x0000FF00 & value) >> 8);
encodeStream.write((0x00FF0000 & value) >> 16);
encodeStream.write((0xFF000000 & value) >> 24);
break;
default:
throw new ReviewedStingException("BUG: unexpected type size " + type);
}
// general case for reference
// for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
// final int shift = i * 8;
// int mask = 0xFF << shift;
// int byteValue = (mask & value) >> shift;
// encodeStream.write(byteValue);
// }
}
}

View File

@ -191,9 +191,12 @@ public final class BCF2Encoder {
@Requires("size >= 0")
@Ensures("encodeStream.size() > old(encodeStream.size())")
public final void encodeType(final int size, final BCF2Type type) throws IOException {
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
encodeStream.write(typeByte);
if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) {
if ( size <= BCF2Utils.MAX_INLINE_ELEMENTS ) {
final int typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
encodeStream.write(typeByte);
} else {
final int typeByte = BCF2Utils.encodeTypeDescriptor(BCF2Utils.OVERFLOW_ELEMENT_MARKER, type);
encodeStream.write(typeByte);
// write in the overflow size
encodeTypedInt(size);
}
@ -201,12 +204,12 @@ public final class BCF2Encoder {
@Ensures("encodeStream.size() > old(encodeStream.size())")
public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
BCF2Utils.encodeRawBytes(value, type, encodeStream);
type.write(value, encodeStream);
}
@Ensures("encodeStream.size() > old(encodeStream.size())")
public final void encodeRawBytes(final int value, final BCF2Type type) throws IOException {
BCF2Utils.encodeRawBytes(value, type, encodeStream);
type.write(value, encodeStream);
}
// --------------------------------------------------------------------------------

View File

@ -158,7 +158,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
final byte[] headerBytes = capture.toByteArray();
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
BCF2Type.INT32.write(headerBytes.length, outputStream);
outputStream.write(headerBytes);
} catch (IOException e) {
throw new UserException.CouldNotCreateOutputFile("BCF2 stream", "Got IOException while trying to write BCF2 header", e);
@ -359,8 +359,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
*/
@Requires({"infoBlock.length > 0", "genotypesBlock.length >= 0"})
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
BCF2Utils.encodeRawBytes(infoBlock.length, BCF2Type.INT32, outputStream);
BCF2Utils.encodeRawBytes(genotypesBlock.length, BCF2Type.INT32, outputStream);
BCF2Type.INT32.write(infoBlock.length, outputStream);
BCF2Type.INT32.write(genotypesBlock.length, outputStream);
outputStream.write(infoBlock);
outputStream.write(genotypesBlock);
}