BCF2 optimizations
-- All low-level reads throw IOException instead of catching it directly. This allows us to not try/catch in readByte, improving performance by 5% or so -- Optimize encodeTypeDescriptor with final variables. Avoid using Math.min instead do inline comparison -- Inlined willOverflow directly in its single use
This commit is contained in:
parent
9887bc4410
commit
9a0dda71d4
|
|
@ -113,18 +113,22 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public VariantContext decode( final PositionalBufferedStream inputStream ) {
|
public VariantContext decode( final PositionalBufferedStream inputStream ) {
|
||||||
recordNo++;
|
try {
|
||||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
recordNo++;
|
||||||
|
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||||
|
|
||||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
||||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||||
decodeSiteLoc(builder);
|
decodeSiteLoc(builder);
|
||||||
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
|
final SitesInfoForDecoding info = decodeSitesExtendedInfo(builder);
|
||||||
|
|
||||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||||
createLazyGenotypesDecoder(info, builder);
|
createLazyGenotypesDecoder(info, builder);
|
||||||
return builder.fullyDecoded(true).make();
|
return builder.fullyDecoded(true).make();
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new UserException.CouldNotReadInputFile("Failed to read BCF file", e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -234,7 +238,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Requires({"builder != null"})
|
@Requires({"builder != null"})
|
||||||
private final void decodeSiteLoc(final VariantContextBuilder builder) {
|
private final void decodeSiteLoc(final VariantContextBuilder builder) throws IOException {
|
||||||
final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
|
final int contigOffset = decoder.decodeInt(BCF2Type.INT32);
|
||||||
final String contig = lookupContigName(contigOffset);
|
final String contig = lookupContigName(contigOffset);
|
||||||
builder.chr(contig);
|
builder.chr(contig);
|
||||||
|
|
@ -253,7 +257,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
*/
|
*/
|
||||||
@Requires({"builder != null", "decoder != null"})
|
@Requires({"builder != null", "decoder != null"})
|
||||||
@Ensures({"result != null", "result.isValid()"})
|
@Ensures({"result != null", "result.isValid()"})
|
||||||
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) {
|
private final SitesInfoForDecoding decodeSitesExtendedInfo(final VariantContextBuilder builder) throws IOException {
|
||||||
final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
|
final Object qual = decoder.decodeSingleValue(BCF2Type.FLOAT);
|
||||||
if ( qual != null ) {
|
if ( qual != null ) {
|
||||||
builder.log10PError(((Double)qual) / -10.0);
|
builder.log10PError(((Double)qual) / -10.0);
|
||||||
|
|
@ -309,7 +313,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
* Decode the id field in this BCF2 file and store it in the builder
|
* Decode the id field in this BCF2 file and store it in the builder
|
||||||
* @param builder
|
* @param builder
|
||||||
*/
|
*/
|
||||||
private void decodeID( final VariantContextBuilder builder ) {
|
private void decodeID( final VariantContextBuilder builder ) throws IOException {
|
||||||
final String id = (String)decoder.decodeTypedValue();
|
final String id = (String)decoder.decodeTypedValue();
|
||||||
|
|
||||||
if ( id == null )
|
if ( id == null )
|
||||||
|
|
@ -326,7 +330,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
* @return the alleles
|
* @return the alleles
|
||||||
*/
|
*/
|
||||||
@Requires("nAlleles > 0")
|
@Requires("nAlleles > 0")
|
||||||
private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
private List<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) throws IOException {
|
||||||
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
||||||
List<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
List<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
||||||
String ref = null;
|
String ref = null;
|
||||||
|
|
@ -356,7 +360,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
* Decode the filter field of this BCF2 file and store the result in the builder
|
* Decode the filter field of this BCF2 file and store the result in the builder
|
||||||
* @param builder
|
* @param builder
|
||||||
*/
|
*/
|
||||||
private void decodeFilter( final VariantContextBuilder builder ) {
|
private void decodeFilter( final VariantContextBuilder builder ) throws IOException {
|
||||||
final Object value = decoder.decodeTypedValue();
|
final Object value = decoder.decodeTypedValue();
|
||||||
|
|
||||||
if ( value == null )
|
if ( value == null )
|
||||||
|
|
@ -383,7 +387,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
* @param numInfoFields
|
* @param numInfoFields
|
||||||
*/
|
*/
|
||||||
@Requires("numInfoFields >= 0")
|
@Requires("numInfoFields >= 0")
|
||||||
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
|
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) throws IOException {
|
||||||
if ( numInfoFields == 0 )
|
if ( numInfoFields == 0 )
|
||||||
// fast path, don't bother doing any work if there are no fields
|
// fast path, don't bother doing any work if there are no fields
|
||||||
return;
|
return;
|
||||||
|
|
@ -443,7 +447,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
private final String getDictionaryString() {
|
private final String getDictionaryString() throws IOException {
|
||||||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -129,18 +129,18 @@ public final class BCF2Decoder {
|
||||||
//
|
//
|
||||||
// ----------------------------------------------------------------------
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
public final Object decodeTypedValue() {
|
public final Object decodeTypedValue() throws IOException {
|
||||||
final byte typeDescriptor = readTypeDescriptor();
|
final byte typeDescriptor = readTypeDescriptor();
|
||||||
return decodeTypedValue(typeDescriptor);
|
return decodeTypedValue(typeDescriptor);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
public final Object decodeTypedValue(final byte typeDescriptor) throws IOException {
|
||||||
final int size = decodeNumberOfElements(typeDescriptor);
|
final int size = decodeNumberOfElements(typeDescriptor);
|
||||||
return decodeTypedValue(typeDescriptor, size);
|
return decodeTypedValue(typeDescriptor, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("size >= 0")
|
@Requires("size >= 0")
|
||||||
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
public final Object decodeTypedValue(final byte typeDescriptor, final int size) throws IOException {
|
||||||
if ( size == 0 ) {
|
if ( size == 0 ) {
|
||||||
// missing value => null in java
|
// missing value => null in java
|
||||||
return null;
|
return null;
|
||||||
|
|
@ -162,7 +162,7 @@ public final class BCF2Decoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final Object decodeSingleValue(final BCF2Type type) {
|
public final Object decodeSingleValue(final BCF2Type type) throws IOException {
|
||||||
// TODO -- decodeTypedValue should integrate this routine
|
// TODO -- decodeTypedValue should integrate this routine
|
||||||
final int value = decodeInt(type);
|
final int value = decodeInt(type);
|
||||||
|
|
||||||
|
|
@ -210,7 +210,7 @@ public final class BCF2Decoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public final int decodeNumberOfElements(final byte typeDescriptor) {
|
public final int decodeNumberOfElements(final byte typeDescriptor) throws IOException {
|
||||||
if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
|
if ( BCF2Utils.sizeIsOverflow(typeDescriptor) )
|
||||||
// -1 ensures we explode immediately with a bad size if the result is missing
|
// -1 ensures we explode immediately with a bad size if the result is missing
|
||||||
return decodeInt(readTypeDescriptor(), -1);
|
return decodeInt(readTypeDescriptor(), -1);
|
||||||
|
|
@ -228,14 +228,14 @@ public final class BCF2Decoder {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
|
@Requires("BCF2Utils.decodeSize(typeDescriptor) == 1")
|
||||||
public final int decodeInt(final byte typeDescriptor, final int missingValue) {
|
public final int decodeInt(final byte typeDescriptor, final int missingValue) throws IOException {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
final int i = decodeInt(type);
|
final int i = decodeInt(type);
|
||||||
return i == type.getMissingBytes() ? missingValue : i;
|
return i == type.getMissingBytes() ? missingValue : i;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("type != null")
|
@Requires("type != null")
|
||||||
public final int decodeInt(final BCF2Type type) {
|
public final int decodeInt(final BCF2Type type) throws IOException {
|
||||||
return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
return BCF2Utils.readInt(type.getSizeInBytes(), recordStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -258,7 +258,7 @@ public final class BCF2Decoder {
|
||||||
* @return see description
|
* @return see description
|
||||||
*/
|
*/
|
||||||
@Requires({"type != null", "type.isIntegerType()", "size >= 0"})
|
@Requires({"type != null", "type.isIntegerType()", "size >= 0"})
|
||||||
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) throws IOException {
|
||||||
if ( size == 0 ) {
|
if ( size == 0 ) {
|
||||||
return null;
|
return null;
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -290,7 +290,7 @@ public final class BCF2Decoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
|
public final int[] decodeIntArray(final byte typeDescriptor, final int size) throws IOException {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
return decodeIntArray(size, type, null);
|
return decodeIntArray(size, type, null);
|
||||||
}
|
}
|
||||||
|
|
@ -311,7 +311,7 @@ public final class BCF2Decoder {
|
||||||
* @param inputStream
|
* @param inputStream
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public final int readBlockSize(final InputStream inputStream) {
|
public final int readBlockSize(final InputStream inputStream) throws IOException {
|
||||||
return BCF2Utils.readInt(4, inputStream);
|
return BCF2Utils.readInt(4, inputStream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -345,7 +345,7 @@ public final class BCF2Decoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final byte readTypeDescriptor() {
|
public final byte readTypeDescriptor() throws IOException {
|
||||||
return BCF2Utils.readByte(recordStream);
|
return BCF2Utils.readByte(recordStream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,7 @@ import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
import org.broadinstitute.sting.utils.variantcontext.GenotypeBuilder;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -105,12 +106,12 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
final BCF2Decoder decoder,
|
final BCF2Decoder decoder,
|
||||||
final byte typeDescriptor,
|
final byte typeDescriptor,
|
||||||
final int numElements,
|
final int numElements,
|
||||||
final GenotypeBuilder[] gbs);
|
final GenotypeBuilder[] gbs) throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class GTDecoder implements Decoder {
|
private class GTDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||||
else {
|
else {
|
||||||
|
|
@ -135,7 +136,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
|
private final void fastBiallelicDiploidDecode(final List<Allele> siteAlleles,
|
||||||
final BCF2Decoder decoder,
|
final BCF2Decoder decoder,
|
||||||
final byte typeDescriptor,
|
final byte typeDescriptor,
|
||||||
final GenotypeBuilder[] gbs) {
|
final GenotypeBuilder[] gbs) throws IOException {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
|
|
||||||
final int nPossibleGenotypes = 3 * 3;
|
final int nPossibleGenotypes = 3 * 3;
|
||||||
|
|
@ -177,7 +178,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
final int ploidy,
|
final int ploidy,
|
||||||
final BCF2Decoder decoder,
|
final BCF2Decoder decoder,
|
||||||
final byte typeDescriptor,
|
final byte typeDescriptor,
|
||||||
final GenotypeBuilder[] gbs) {
|
final GenotypeBuilder[] gbs) throws IOException {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
|
|
||||||
// a single cache for the encoded genotypes, since we don't actually need this vector
|
// a single cache for the encoded genotypes, since we don't actually need this vector
|
||||||
|
|
@ -216,7 +217,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class DPDecoder implements Decoder {
|
private class DPDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
// the -1 is for missing
|
// the -1 is for missing
|
||||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||||
|
|
@ -226,7 +227,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class GQDecoder implements Decoder {
|
private class GQDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
// the -1 is for missing
|
// the -1 is for missing
|
||||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||||
|
|
@ -236,7 +237,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class ADDecoder implements Decoder {
|
private class ADDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
|
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||||
}
|
}
|
||||||
|
|
@ -245,7 +246,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class PLDecoder implements Decoder {
|
private class PLDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
|
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||||
}
|
}
|
||||||
|
|
@ -254,7 +255,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class GenericDecoder implements Decoder {
|
private class GenericDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||||
if ( value != null ) { // don't add missing values
|
if ( value != null ) { // don't add missing values
|
||||||
|
|
@ -273,7 +274,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class FTDecoder implements Decoder {
|
private class FTDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) throws IOException {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||||
assert value == null || value instanceof String;
|
assert value == null || value instanceof String;
|
||||||
|
|
|
||||||
|
|
@ -26,9 +26,11 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||||
|
|
||||||
import com.google.java.contract.Requires;
|
import com.google.java.contract.Requires;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -64,33 +66,38 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||||
if ( logger.isDebugEnabled() )
|
if ( logger.isDebugEnabled() )
|
||||||
logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
logger.debug("Decoding BCF genotypes for " + nSamples + " samples with " + nFields + " fields each");
|
||||||
|
|
||||||
// load our byte[] data into the decoder
|
try {
|
||||||
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
|
||||||
|
|
||||||
for ( int i = 0; i < nSamples; i++ )
|
// load our byte[] data into the decoder
|
||||||
builders[i].reset(true);
|
final BCF2Decoder decoder = new BCF2Decoder(((BCF2Codec.LazyData)data).bytes);
|
||||||
|
|
||||||
for ( int i = 0; i < nFields; i++ ) {
|
for ( int i = 0; i < nSamples; i++ )
|
||||||
// get the field name
|
builders[i].reset(true);
|
||||||
final int offset = (Integer) decoder.decodeTypedValue();
|
|
||||||
final String field = codec.getDictionaryString(offset);
|
|
||||||
|
|
||||||
// the type of each element
|
for ( int i = 0; i < nFields; i++ ) {
|
||||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
// get the field name
|
||||||
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
|
final int offset = (Integer) decoder.decodeTypedValue();
|
||||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
final String field = codec.getDictionaryString(offset);
|
||||||
try {
|
|
||||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
|
// the type of each element
|
||||||
} catch ( ClassCastException e ) {
|
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
|
||||||
+ " inconsistent with the value observed in the decoded value");
|
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||||
|
try {
|
||||||
|
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
|
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||||
|
+ " inconsistent with the value observed in the decoded value");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||||
|
for ( final GenotypeBuilder gb : builders )
|
||||||
|
genotypes.add(gb.make());
|
||||||
|
|
||||||
|
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
||||||
|
} catch ( IOException e ) {
|
||||||
|
throw new ReviewedStingException("Unexpected IOException parsing already read genotypes data block", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
|
||||||
for ( final GenotypeBuilder gb : builders )
|
|
||||||
genotypes.add(gb.make());
|
|
||||||
|
|
||||||
return new LazyGenotypesContext.LazyData(genotypes, codec.getHeader().getSampleNamesInOrder(), codec.getHeader().getSampleNameToOffset());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -97,9 +97,8 @@ public final class BCF2Utils {
|
||||||
|
|
||||||
@Requires({"nElements >= 0", "type != null"})
|
@Requires({"nElements >= 0", "type != null"})
|
||||||
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
final int encodeSize = nElements > MAX_INLINE_ELEMENTS ? OVERFLOW_ELEMENT_MARKER : nElements;
|
||||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
return (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||||
return typeByte;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
|
|
@ -121,18 +120,8 @@ public final class BCF2Utils {
|
||||||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("nElements >= 0")
|
public static byte readByte(final InputStream stream) throws IOException {
|
||||||
public static boolean willOverflow(final long nElements) {
|
return (byte)(stream.read() & 0xFF);
|
||||||
return nElements > MAX_INLINE_ELEMENTS;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static byte readByte(final InputStream stream) {
|
|
||||||
// TODO -- shouldn't be capturing error here
|
|
||||||
try {
|
|
||||||
return (byte)(stream.read() & 0xFF);
|
|
||||||
} catch ( IOException e ) {
|
|
||||||
throw new ReviewedStingException("readByte failure", e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -295,7 +284,7 @@ public final class BCF2Utils {
|
||||||
|
|
||||||
|
|
||||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||||
public static int readInt(int bytesForEachInt, final InputStream stream) {
|
public static int readInt(int bytesForEachInt, final InputStream stream) throws IOException {
|
||||||
switch ( bytesForEachInt ) {
|
switch ( bytesForEachInt ) {
|
||||||
case 1: {
|
case 1: {
|
||||||
return (byte)(readByte(stream));
|
return (byte)(readByte(stream));
|
||||||
|
|
|
||||||
|
|
@ -193,7 +193,7 @@ public final class BCF2Encoder {
|
||||||
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
||||||
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
||||||
encodeStream.write(typeByte);
|
encodeStream.write(typeByte);
|
||||||
if ( BCF2Utils.willOverflow(size) ) {
|
if ( size > BCF2Utils.MAX_INLINE_ELEMENTS ) {
|
||||||
// write in the overflow size
|
// write in the overflow size
|
||||||
encodeTypedInt(size);
|
encodeTypedInt(size);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -537,11 +537,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
||||||
return record;
|
return record;
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) {
|
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) throws IOException {
|
||||||
decodeRecord(toEncode, new BCF2Decoder(record));
|
decodeRecord(toEncode, new BCF2Decoder(record));
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) {
|
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) throws IOException {
|
||||||
for ( final BCF2TypedValue tv : toEncode ) {
|
for ( final BCF2TypedValue tv : toEncode ) {
|
||||||
Assert.assertFalse(decoder.blockIsFullyDecoded());
|
Assert.assertFalse(decoder.blockIsFullyDecoded());
|
||||||
final Object decoded = decoder.decodeTypedValue();
|
final Object decoded = decoder.decodeTypedValue();
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue