BCF2 optimizations

-- All low-level reads throw IOException instead of catching it directly.  This allows us to not try/catch in readByte, improving performance by 5% or so
-- Optimize encodeTypeDescriptor with final variables.  Avoid using Math.min instead do inline comparison
-- Inlined willOverflow directly in its single use
This commit is contained in:
Mark DePristo 2012-08-09 09:00:44 -04:00
parent 9887bc4410
commit 9a0dda71d4
7 changed files with 81 additions and 80 deletions

View File

@ -113,18 +113,22 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
@Override
public VariantContext decode( final PositionalBufferedStream inputStream ) {
recordNo++;
final VariantContextBuilder builder = new VariantContextBuilder();
try {
recordNo++;
final VariantContextBuilder builder = new VariantContextBuilder();
final int sitesBlockSize = decoder.readBlockSize(inputStream);
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
decoder.readNextBlock(sitesBlockSize, inputStream);
decodeSiteLoc(builder);
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
final int sitesBlockSize = decoder.readBlockSize(inputStream);
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
decoder.readNextBlock(sitesBlockSize, inputStream);
decodeSiteLoc(builder);
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
decoder.readNextBlock(genotypeBlockSize, inputStream);
createLazyGenotypesDecoder(info, builder);
return builder.fullyDecoded(true).make();
decoder.readNextBlock(genotypeBlockSize, inputStream);
createLazyGenotypesDecoder(info, builder);
return builder.fullyDecoded(true).make();
} catch ( IOException e ) {
throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e);
}
}
@Override
@ -234,7 +238,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
* @return
*/
@Requires({"builder != null"})
private final void decodeSiteLoc(final VariantContextBuilder builder) {
private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException {
final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
final String contig = lookupContigName(contigOffset);
builder.chr(contig);
@ -253,7 +257,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
*/
@Requires({"builder != null", "decoder != null"})
@Ensures({"result != null", "result.isValid()"})
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) {
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
if ( qual != null ) {
builder.log10PError(((Double)qual) / -10.0);
@ -309,7 +313,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
* Decode the id field in this BCF2 file and store it in the builder
* @param builder
*/
private void decodeID( final VariantContextBuilder builder ) {
private void decodeID( final VariantContextBuilder builder ) throws IOException {
final String id = (String)decoder.decodeTypedValue();
if ( id == null )
@ -326,7 +330,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
* @return the alleles
*/
@Requires("nAlleles > 0")
private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException {
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
List<Allele> alleles = new ArrayList<Allele>(nAlleles);
String ref = null;
@ -356,7 +360,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
* Decode the filter field of this BCF2 file and store the result in the builder
* @param builder
*/
private void decodeFilter( final VariantContextBuilder builder ) {
private void decodeFilter( final VariantContextBuilder builder ) throws IOException {
final Object value = decoder.decodeTypedValue();
if ( value == null )
@ -383,7 +387,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
* @param numInfoFields
*/
@Requires("numInfoFields >= 0")
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException {
if ( numInfoFields == 0 )
// fast path, don't bother doing any work if there are no fields
return;
@ -443,7 +447,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
}
@Ensures("result != null")
private final String getDictionaryString() {
private final String getDictionaryString() throws IOException {
return getDictionaryString((Integer) decoder.decodeTypedValue());
}

View File

@ -129,18 +129,18 @@ public final class BCF2Decoder {
//
// ----------------------------------------------------------------------
public final Object decodeTypedValue() {
public final Object decodeTypedValue() throws IOException {
final byte typeDescriptor = readTypeDescriptor();
return decodeTypedValue(typeDescriptor);
}
public final Object decodeTypedValue(final byte typeDescriptor) {
public final Object decodeTypedValue(final byte typeDescriptor) throws IOException {
final int size = decodeNumberOfElements(typeDescriptor);
return decodeTypedValue(typeDescriptor, size);
}
@Requires("size >= 0")
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException {
if ( size == 0 ) {
// missing value => null in java
return null;
@ -162,7 +162,7 @@ public final class BCF2Decoder {
}
}
public final Object decodeSingleValue(final BCF2Type type) {
public final Object decodeSingleValue(final BCF2Type type) throws IOException {
// TODO -- decodeTypedValue should integrate this routine
final int value = decodeInt(type);
@ -210,7 +210,7 @@ public final class BCF2Decoder {
}
@Ensures("result >= 0")
public final int decodeNumberOfElements(final byte typeDescriptor) {
public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException {
if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
// -1 ensures we explode immediately with a bad size if the result is missing
return decodeInt(readTypeDescriptor(), -1);
@ -228,14 +228,14 @@ public final class BCF2Decoder {
* @return
*/
@Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
public final int decodeInt(final byte typeDescriptor, final int missingValue) {
public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
final int i = decodeInt(type);
return i == type.getMissingBytes() ? missingValue : i;
}
@Requires("type != null")
public final int decodeInt(final BCF2Type type) {
public final int decodeInt(final BCF2Type type) throws IOException {
return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
}
@ -258,7 +258,7 @@ public final class BCF2Decoder {
* @return see description
*/
@Requires({"type != null", "type.isIntegerType()", "size >= 0"})
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException {
if ( size == 0 ) {
return null;
} else {
@ -290,7 +290,7 @@ public final class BCF2Decoder {
}
}
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
return decodeIntArray(size, type, null);
}
@ -311,7 +311,7 @@ public final class BCF2Decoder {
* @param inputStream
* @return
*/
public final int readBlockSize(final InputStream inputStream) {
public final int readBlockSize(final InputStream inputStream) throws IOException {
return BCF2Utils.readInt(4, inputStream);
}
@ -345,7 +345,7 @@ public final class BCF2Decoder {
}
}
public final byte readTypeDescriptor() {
public final byte readTypeDescriptor() throws IOException {
return BCF2Utils.readByte(recordStream);
}
}

View File

@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
import org.broadinstitute.sting.utils.variantcontext.Allele;
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
import java.io.IOException;
import java.util.*;
/**
@ -105,12 +106,12 @@ public class BCF2GenotypeFieldDecoders {
final BCF2Decoder decoder,
final byte typeDescriptor,
final int numElements,
final GenotypeBuilder[] gbs);
final GenotypeBuilder[] gbs) throws IOException;
}
private class GTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
else {
@ -135,7 +136,7 @@ public class BCF2GenotypeFieldDecoders {
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
final BCF2Decoder decoder,
final byte typeDescriptor,
final GenotypeBuilder[] gbs) {
final GenotypeBuilder[] gbs) throws IOException {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
final int nPossibleGenotypes = 3 * 3;
@ -177,7 +178,7 @@ public class BCF2GenotypeFieldDecoders {
final int ploidy,
final BCF2Decoder decoder,
final byte typeDescriptor,
final GenotypeBuilder[] gbs) {
final GenotypeBuilder[] gbs) throws IOException {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
// a single cache for the encoded genotypes, since we don't actually need this vector
@ -216,7 +217,7 @@ public class BCF2GenotypeFieldDecoders {
private class DPDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.DP(decoder.decodeInt(typeDescriptor, -1));
@ -226,7 +227,7 @@ public class BCF2GenotypeFieldDecoders {
private class GQDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
@ -236,7 +237,7 @@ public class BCF2GenotypeFieldDecoders {
private class ADDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
}
@ -245,7 +246,7 @@ public class BCF2GenotypeFieldDecoders {
private class PLDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
}
@ -254,7 +255,7 @@ public class BCF2GenotypeFieldDecoders {
private class GenericDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
if ( value != null ) { // don't add missing values
@ -273,7 +274,7 @@ public class BCF2GenotypeFieldDecoders {
private class FTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
assert value == null || value instanceof String;

View File

@ -26,9 +26,11 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.IOException;
import java.util.*;
/**
@ -64,33 +66,38 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
if ( logger.isDebugEnabled() )
logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
// load our byte[] data into the decoder
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
try {
for ( int i = 0; i < nSamples; i++ )
builders[i].reset(true);
// load our byte[] data into the decoder
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
for ( int i = 0; i < nFields; i++ ) {
// get the field name
final int offset = (Integer) decoder.decodeTypedValue();
final String field = codec.getDictionaryString(offset);
for ( int i = 0; i < nSamples; i++ )
builders[i].reset(true);
// the type of each element
final byte typeDescriptor = decoder.readTypeDescriptor();
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
try {
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
} catch ( ClassCastException e ) {
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+ " inconsistent with the value observed in the decoded value");
for ( int i = 0; i < nFields; i++ ) {
// get the field name
final int offset = (Integer) decoder.decodeTypedValue();
final String field = codec.getDictionaryString(offset);
// the type of each element
final byte typeDescriptor = decoder.readTypeDescriptor();
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
try {
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
} catch ( ClassCastException e ) {
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+ " inconsistent with the value observed in the decoded value");
}
}
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
for ( final GenotypeBuilder gb : builders )
genotypes.add(gb.make());
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
} catch ( IOException e ) {
throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e);
}
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
for ( final GenotypeBuilder gb : builders )
genotypes.add(gb.make());
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
}
}

View File

@ -97,9 +97,8 @@ public final class BCF2Utils {
@Requires({"nElements >= 0", "type != null"})
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
return typeByte;
final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements;
return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
}
@Ensures("result >= 0")
@ -121,18 +120,8 @@ public final class BCF2Utils {
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
}
@Requires("nElements >= 0")
public static boolean willOverflow(final long nElements) {
return nElements > MAX_INLINE_ELEMENTS;
}
public static byte readByte(final InputStream stream) {
// TODO -- shouldn't be capturing error here
try {
return (byte)(stream.read() & 0xFF);
} catch ( IOException e ) {
throw new ReviewedStingException("readByte failure", e);
}
public static byte readByte(final InputStream stream) throws IOException {
return (byte)(stream.read() & 0xFF);
}
/**
@ -295,7 +284,7 @@ public final class BCF2Utils {
@Requires({"stream != null", "bytesForEachInt > 0"})
public static int readInt(int bytesForEachInt, final InputStream stream) {
public static int readInt(int bytesForEachInt, final InputStream stream) throws IOException {
switch ( bytesForEachInt ) {
case 1: {
return (byte)(readByte(stream));

View File

@ -193,7 +193,7 @@ public final class BCF2Encoder {
public final void encodeType(final int size, final BCF2Type type) throws IOException {
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
encodeStream.write(typeByte);
if ( BCF2Utils.willOverflow(size) ) {
if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) {
// write in the overflow size
encodeTypedInt(size);
}

View File

@ -537,11 +537,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
return record;
}
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) {
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) throws IOException {
decodeRecord(toEncode, new BCF2Decoder(record));
}
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) {
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) throws IOException {
for ( final BCF2TypedValue tv : toEncode ) {
Assert.assertFalse(decoder.blockIsFullyDecoded());
final Object decoded = decoder.decodeTypedValue();