diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java index 3b9e86c8d..52a8ef0d0 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Codec.java @@ -113,18 +113,22 @@ public final class BCF2Codec implements FeatureCodec { @Override public VariantContext decode( final PositionalBufferedStream inputStream ) { - recordNo++; - final VariantContextBuilder builder = new VariantContextBuilder(); + try { + recordNo++; + final VariantContextBuilder builder = new VariantContextBuilder(); - final int sitesBlockSize = decoder.readBlockSize(inputStream); - final int genotypeBlockSize = decoder.readBlockSize(inputStream); - decoder.readNextBlock(sitesBlockSize, inputStream); - decodeSiteLoc(builder); - final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); + final int sitesBlockSize = decoder.readBlockSize(inputStream); + final int genotypeBlockSize = decoder.readBlockSize(inputStream); + decoder.readNextBlock(sitesBlockSize, inputStream); + decodeSiteLoc(builder); + final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder); - decoder.readNextBlock(genotypeBlockSize, inputStream); - createLazyGenotypesDecoder(info, builder); - return builder.fullyDecoded(true).make(); + decoder.readNextBlock(genotypeBlockSize, inputStream); + createLazyGenotypesDecoder(info, builder); + return builder.fullyDecoded(true).make(); + } catch ( IOException e ) { + throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e); + } } @Override @@ -234,7 +238,7 @@ public final class BCF2Codec implements FeatureCodec { * @return */ @Requires({"builder != null"}) - private final void decodeSiteLoc(final VariantContextBuilder builder) { + private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException { final int contigOffset = decoder.decodeInt(BCF2Type.INT32); final String contig = lookupContigName(contigOffset); builder.chr(contig); @@ -253,7 +257,7 @@ public final class BCF2Codec implements FeatureCodec { */ @Requires({"builder != null", "decoder != null"}) @Ensures({"result != null", "result.isValid()"}) - private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) { + private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException { final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT); if ( qual != null ) { builder.log10PError(((Double)qual) / -10.0); @@ -309,7 +313,7 @@ public final class BCF2Codec implements FeatureCodec { * Decode the id field in this BCF2 file and store it in the builder * @param builder */ - private void decodeID( final VariantContextBuilder builder ) { + private void decodeID( final VariantContextBuilder builder ) throws IOException { final String id = (String)decoder.decodeTypedValue(); if ( id == null ) @@ -326,7 +330,7 @@ public final class BCF2Codec implements FeatureCodec { * @return the alleles */ @Requires("nAlleles > 0") - private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) { + private List decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException { // TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes List alleles = new ArrayList(nAlleles); String ref = null; @@ -356,7 +360,7 @@ public final class BCF2Codec implements FeatureCodec { * Decode the filter field of this BCF2 file and store the result in the builder * @param builder */ - private void decodeFilter( final VariantContextBuilder builder ) { + private void decodeFilter( final VariantContextBuilder builder ) throws IOException { final Object value = decoder.decodeTypedValue(); if ( value == null ) @@ -383,7 +387,7 @@ public final class BCF2Codec implements FeatureCodec { * @param numInfoFields */ @Requires("numInfoFields >= 0") - private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) { + private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException { if ( numInfoFields == 0 ) // fast path, don't bother doing any work if there are no fields return; @@ -443,7 +447,7 @@ public final class BCF2Codec implements FeatureCodec { } @Ensures("result != null") - private final String getDictionaryString() { + private final String getDictionaryString() throws IOException { return getDictionaryString((Integer) decoder.decodeTypedValue()); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 2619a4dae..73137c794 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -129,18 +129,18 @@ public final class BCF2Decoder { // // ---------------------------------------------------------------------- - public final Object decodeTypedValue() { + public final Object decodeTypedValue() throws IOException { final byte typeDescriptor = readTypeDescriptor(); return decodeTypedValue(typeDescriptor); } - public final Object decodeTypedValue(final byte typeDescriptor) { + public final Object decodeTypedValue(final byte typeDescriptor) throws IOException { final int size = decodeNumberOfElements(typeDescriptor); return decodeTypedValue(typeDescriptor, size); } @Requires("size >= 0") - public final Object decodeTypedValue(final byte typeDescriptor, final int size) { + public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException { if ( size == 0 ) { // missing value => null in java return null; @@ -162,7 +162,7 @@ public final class BCF2Decoder { } } - public final Object decodeSingleValue(final BCF2Type type) { + public final Object decodeSingleValue(final BCF2Type type) throws IOException { // TODO -- decodeTypedValue should integrate this routine final int value = decodeInt(type); @@ -210,7 +210,7 @@ public final class BCF2Decoder { } @Ensures("result >= 0") - public final int decodeNumberOfElements(final byte typeDescriptor) { + public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException { if ( BCF2Utils.sizeIsOverflow(typeDescriptor) ) // -1 ensures we explode immediately with a bad size if the result is missing return decodeInt(readTypeDescriptor(), -1); @@ -228,14 +228,14 @@ public final class BCF2Decoder { * @return */ @Requires("BCF2Utils.decodeSize(typeDescriptor) == 1") - public final int decodeInt(final byte typeDescriptor, final int missingValue) { + public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int i = decodeInt(type); return i == type.getMissingBytes() ? missingValue : i; } @Requires("type != null") - public final int decodeInt(final BCF2Type type) { + public final int decodeInt(final BCF2Type type) throws IOException { return BCF2Utils.readInt(type.getSizeInBytes(), recordStream); } @@ -258,7 +258,7 @@ public final class BCF2Decoder { * @return see description */ @Requires({"type != null", "type.isIntegerType()", "size >= 0"}) - public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) { + public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException { if ( size == 0 ) { return null; } else { @@ -290,7 +290,7 @@ public final class BCF2Decoder { } } - public final int[] decodeIntArray(final byte typeDescriptor, final int size) { + public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); return decodeIntArray(size, type, null); } @@ -311,7 +311,7 @@ public final class BCF2Decoder { * @param inputStream * @return */ - public final int readBlockSize(final InputStream inputStream) { + public final int readBlockSize(final InputStream inputStream) throws IOException { return BCF2Utils.readInt(4, inputStream); } @@ -345,7 +345,7 @@ public final class BCF2Decoder { } } - public final byte readTypeDescriptor() { + public final byte readTypeDescriptor() throws IOException { return BCF2Utils.readByte(recordStream); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 0dadc49f9..e4ae96262 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; import org.broadinstitute.sting.utils.variantcontext.Allele; import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder; +import java.io.IOException; import java.util.*; /** @@ -105,12 +106,12 @@ public class BCF2GenotypeFieldDecoders { final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, - final GenotypeBuilder[] gbs); + final GenotypeBuilder[] gbs) throws IOException; } private class GTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs); else { @@ -135,7 +136,7 @@ public class BCF2GenotypeFieldDecoders { private final void fastBiallelicDiploidDecode(final List siteAlleles, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); final int nPossibleGenotypes = 3 * 3; @@ -177,7 +178,7 @@ public class BCF2GenotypeFieldDecoders { final int ploidy, final BCF2Decoder decoder, final byte typeDescriptor, - final GenotypeBuilder[] gbs) { + final GenotypeBuilder[] gbs) throws IOException { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); // a single cache for the encoded genotypes, since we don't actually need this vector @@ -216,7 +217,7 @@ public class BCF2GenotypeFieldDecoders { private class DPDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.DP(decoder.decodeInt(typeDescriptor, -1)); @@ -226,7 +227,7 @@ public class BCF2GenotypeFieldDecoders { private class GQDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.GQ(decoder.decodeInt(typeDescriptor, -1)); @@ -236,7 +237,7 @@ public class BCF2GenotypeFieldDecoders { private class ADDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.AD(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -245,7 +246,7 @@ public class BCF2GenotypeFieldDecoders { private class PLDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { gb.PL(decoder.decodeIntArray(typeDescriptor, numElements)); } @@ -254,7 +255,7 @@ public class BCF2GenotypeFieldDecoders { private class GenericDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); if ( value != null ) { // don't add missing values @@ -273,7 +274,7 @@ public class BCF2GenotypeFieldDecoders { private class FTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException { for ( final GenotypeBuilder gb : gbs ) { Object value = decoder.decodeTypedValue(typeDescriptor, numElements); assert value == null || value instanceof String; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index 35fb2e97a..cf34a8b48 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -26,9 +26,11 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import com.google.java.contract.Requires; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.exceptions.UserException; import org.broadinstitute.sting.utils.variantcontext.*; +import java.io.IOException; import java.util.*; /** @@ -64,33 +66,38 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { if ( logger.isDebugEnabled() ) logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each"); - // load our byte[] data into the decoder - final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); + try { - for ( int i = 0; i < nSamples; i++ ) - builders[i].reset(true); + // load our byte[] data into the decoder + final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes); - for ( int i = 0; i < nFields; i++ ) { - // get the field name - final int offset = (Integer) decoder.decodeTypedValue(); - final String field = codec.getDictionaryString(offset); + for ( int i = 0; i < nSamples; i++ ) + builders[i].reset(true); - // the type of each element - final byte typeDescriptor = decoder.readTypeDescriptor(); - final int numElements = decoder.decodeNumberOfElements(typeDescriptor); - final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); - try { - fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); - } catch ( ClassCastException e ) { - throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field - + " inconsistent with the value observed in the decoded value"); + for ( int i = 0; i < nFields; i++ ) { + // get the field name + final int offset = (Integer) decoder.decodeTypedValue(); + final String field = codec.getDictionaryString(offset); + + // the type of each element + final byte typeDescriptor = decoder.readTypeDescriptor(); + final int numElements = decoder.decodeNumberOfElements(typeDescriptor); + final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); + try { + fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); + } catch ( ClassCastException e ) { + throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field + + " inconsistent with the value observed in the decoded value"); + } } + + final ArrayList genotypes = new ArrayList(nSamples); + for ( final GenotypeBuilder gb : builders ) + genotypes.add(gb.make()); + + return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); + } catch ( IOException e ) { + throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e); } - - final ArrayList genotypes = new ArrayList(nSamples); - for ( final GenotypeBuilder gb : builders ) - genotypes.add(gb.make()); - - return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset()); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index c79abe2ae..3454d0c3c 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -97,9 +97,8 @@ public final class BCF2Utils { @Requires({"nElements >= 0", "type != null"}) public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) { - int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER); - byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); - return typeByte; + final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements; + return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F)); } @Ensures("result >= 0") @@ -121,18 +120,8 @@ public final class BCF2Utils { return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER; } - @Requires("nElements >= 0") - public static boolean willOverflow(final long nElements) { - return nElements > MAX_INLINE_ELEMENTS; - } - - public static byte readByte(final InputStream stream) { - // TODO -- shouldn't be capturing error here - try { - return (byte)(stream.read() & 0xFF); - } catch ( IOException e ) { - throw new ReviewedStingException("readByte failure", e); - } + public static byte readByte(final InputStream stream) throws IOException { + return (byte)(stream.read() & 0xFF); } /** @@ -295,7 +284,7 @@ public final class BCF2Utils { @Requires({"stream != null", "bytesForEachInt > 0"}) - public static int readInt(int bytesForEachInt, final InputStream stream) { + public static int readInt(int bytesForEachInt, final InputStream stream) throws IOException { switch ( bytesForEachInt ) { case 1: { return (byte)(readByte(stream)); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 2c1d99546..a2bbfb391 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -193,7 +193,7 @@ public final class BCF2Encoder { public final void encodeType(final int size, final BCF2Type type) throws IOException { final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type); encodeStream.write(typeByte); - if ( BCF2Utils.willOverflow(size) ) { + if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) { // write in the overflow size encodeTypedInt(size); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index 7569ce90d..77050c069 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -537,11 +537,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { return record; } - private final void decodeRecord(final List toEncode, final byte[] record) { + private final void decodeRecord(final List toEncode, final byte[] record) throws IOException { decodeRecord(toEncode, new BCF2Decoder(record)); } - private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) { + private final void decodeRecord(final List toEncode, final BCF2Decoder decoder) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { Assert.assertFalse(decoder.blockIsFullyDecoded()); final Object decoded = decoder.decodeTypedValue();