GenotypeLikelihood PLs are capped at Short.MAX_INT now
-- UserExceptions in BCF2 now where appropriate -- Asserts for code safety -- Public -> protected encode(Object v) method is for testing only
This commit is contained in:
parent
d52bc31a47
commit
6301572009
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
|
|||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
|
@ -96,7 +97,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
try {
|
||||
// note that this reads the magic as well, and so does double duty
|
||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||
throw new IllegalArgumentException("Input stream does not begin with BCF2 magic");
|
||||
throw new UserException.MalformedBCF2("Input stream does not begin with BCF2 magic");
|
||||
|
||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||
|
||||
|
|
@ -224,15 +225,13 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
private void decodeID( final VariantContextBuilder builder ) {
|
||||
final String id = (String)decoder.decodeTypedValue();
|
||||
|
||||
if ( id == null ) {
|
||||
if ( id == null )
|
||||
builder.noID();
|
||||
}
|
||||
else {
|
||||
else
|
||||
builder.id(id);
|
||||
}
|
||||
}
|
||||
|
||||
public static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||
|
|
@ -298,15 +297,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
||||
final int nSamples = siteInfo.nSamples;
|
||||
final int nFields = siteInfo.nFormatFields;
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
// all of the information we need for each genotype, with default values
|
||||
final String sampleName = samples.get(i);
|
||||
List<Allele> alleles = null;
|
||||
boolean isPhased = false;
|
||||
|
|
@ -318,31 +318,37 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
final List<Object> values = entry.getValue();
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)values.get(i));
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
final Integer value = (Integer)values.get(i);
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
log10PError = value / -10.0;
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final List<Integer> pls = (List<Integer>)values.get(i);
|
||||
if ( pls != null ) { // we have a PL field
|
||||
log10Likelihoods = new double[pls.size()];
|
||||
for ( int j = 0; j < log10Likelihoods.length; j++ )
|
||||
log10Likelihoods[j] = pls.get(j) / -10.0;
|
||||
}
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( values.get(i) != null ) { // don't add missing values
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
attributes.put(field, values.get(i));
|
||||
try {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)values.get(i));
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
final Integer value = (Integer)values.get(i);
|
||||
if ( value != BCF2Type.INT8.getMissingJavaValue() )
|
||||
log10PError = value / -10.0;
|
||||
} else if ( field.equals(VCFConstants.PHRED_GENOTYPE_LIKELIHOODS_KEY) ) {
|
||||
final List<Integer> pls = (List<Integer>)values.get(i);
|
||||
if ( pls != null ) { // we have a PL field
|
||||
log10Likelihoods = new double[pls.size()];
|
||||
for ( int j = 0; j < log10Likelihoods.length; j++ )
|
||||
log10Likelihoods[j] = pls.get(j) / -10.0;
|
||||
}
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( values.get(i) != null ) { // don't add missing values
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
attributes.put(field, values.get(i));
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value in the "
|
||||
+ " BCF file. Value was " + Utils.join(",", values));
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleles == null ) throw new ReviewedStingException("BUG: no alleles found");
|
||||
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||
|
||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
genotypes.add(g);
|
||||
|
|
@ -368,18 +374,24 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
}
|
||||
|
||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||
assert (nFields > 0 && nSamples > 0) || (nFields == 0 && nSamples == 0);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final String field = getDictionaryString();
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
map.put(field, values);
|
||||
if ( nFields == 0 ) // fast path exit for sites only file
|
||||
return Collections.emptyMap();
|
||||
else {
|
||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final String field = getDictionaryString();
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
map.put(field, values);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private final String getDictionaryString() {
|
||||
|
|
@ -387,6 +399,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
}
|
||||
|
||||
private final String getDictionaryString(final int offset) {
|
||||
if ( offset >= dictionary.size() ) throw new UserException.MalformedBCF2("BUG: no dictionary field found at offset " + offset);
|
||||
final String field = dictionary.get(offset);
|
||||
return field;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -113,6 +113,8 @@ public class BCF2Decoder {
|
|||
* @param recordBytes
|
||||
*/
|
||||
public void setRecordBytes(final byte[] recordBytes) {
|
||||
assert recordBytes != null;
|
||||
|
||||
this.recordBytes = recordBytes;
|
||||
this.recordStream = new ByteArrayInputStream(recordBytes);
|
||||
}
|
||||
|
|
@ -145,7 +147,7 @@ public class BCF2Decoder {
|
|||
for ( int i = 0; i < size; i++ ) {
|
||||
ints.add(decodeSingleValue(type));
|
||||
}
|
||||
return ints.get(0) == null ? null : ints;
|
||||
return ints.get(0) == null ? null : ints; // return null when all of the values are null
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -175,6 +177,7 @@ public class BCF2Decoder {
|
|||
|
||||
private final Object decodeLiteralString(final int size) {
|
||||
assert size > 0;
|
||||
|
||||
// TODO -- assumes size > 0
|
||||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||
try {
|
||||
|
|
@ -227,6 +230,8 @@ public class BCF2Decoder {
|
|||
* @return
|
||||
*/
|
||||
private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
||||
assert blockSizeInBytes >= 0;
|
||||
|
||||
final byte[] record = new byte[blockSizeInBytes];
|
||||
try {
|
||||
final int bytesRead = inputStream.read(record);
|
||||
|
|
@ -239,6 +244,8 @@ public class BCF2Decoder {
|
|||
}
|
||||
|
||||
private final static void validateReadBytes(final int actuallyRead, final int expected) {
|
||||
assert expected >= 0;
|
||||
|
||||
if ( actuallyRead < expected ) {
|
||||
throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
|
||||
actuallyRead == -1 ?
|
||||
|
|
|
|||
|
|
@ -58,31 +58,6 @@ public class BCF2Encoder {
|
|||
return bytes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Super-high level interface
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Totally generic encoder that examines o, determines the best way to encode it, and encodes it
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final BCF2Type encode(final Object o) throws IOException {
|
||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||
|
||||
if ( o instanceof List ) {
|
||||
final BCF2Type type = determineBCFType(((List) o).get(0));
|
||||
encodeTyped((List) o, type);
|
||||
return type;
|
||||
} else {
|
||||
final BCF2Type type = determineBCFType(o);
|
||||
encodeTyped(o, type);
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Writing typed values (have type byte)
|
||||
|
|
@ -108,12 +83,6 @@ public class BCF2Encoder {
|
|||
encodeRawValues(v, type);
|
||||
}
|
||||
|
||||
public final BCF2Type encodeTypedIntOfBestSize(final int value) throws IOException {
|
||||
final BCF2Type type = determineIntegerType(value);
|
||||
encodeTyped(value, type);
|
||||
return type;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Writing raw values (don't have a type byte)
|
||||
|
|
@ -127,17 +96,21 @@ public class BCF2Encoder {
|
|||
}
|
||||
|
||||
public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException {
|
||||
if ( value == type.getMissingJavaValue() )
|
||||
encodeRawMissingValue(type);
|
||||
else {
|
||||
switch (type) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodePrimitive((Integer)value, type); break;
|
||||
case FLOAT: encodeRawFloat((Double) value, type); break;
|
||||
case CHAR: encodeRawChar((Byte) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
try {
|
||||
if ( value == type.getMissingJavaValue() )
|
||||
encodeRawMissingValue(type);
|
||||
else {
|
||||
switch (type) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodePrimitive((Integer)value, type); break;
|
||||
case FLOAT: encodeRawFloat((Double) value); break;
|
||||
case CHAR: encodeRawChar((Byte) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("BUG: invalid type cast to " + type + " from " + value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -146,6 +119,8 @@ public class BCF2Encoder {
|
|||
}
|
||||
|
||||
public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
|
||||
if ( size <= 0 ) throw new ReviewedStingException("BUG: size <= 0");
|
||||
|
||||
for ( int i = 0; i < size; i++ )
|
||||
encodeRawMissingValue(type);
|
||||
}
|
||||
|
|
@ -160,15 +135,19 @@ public class BCF2Encoder {
|
|||
encodeStream.write(c);
|
||||
}
|
||||
|
||||
public final void encodeRawFloat(final double value, final BCF2Type type) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits((float)value), type);
|
||||
public final void encodeRawFloat(final double value) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits((float)value), BCF2Type.FLOAT);
|
||||
}
|
||||
|
||||
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
||||
if ( size < 0 ) throw new ReviewedStingException("BUG: size < 0");
|
||||
|
||||
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
||||
encodeStream.write(typeByte);
|
||||
if ( BCF2Utils.willOverflow(size) )
|
||||
encodeTypedIntOfBestSize(size);
|
||||
if ( BCF2Utils.willOverflow(size) ) {
|
||||
// write in the overflow size
|
||||
encodeTyped(size, determineIntegerType(size));
|
||||
}
|
||||
}
|
||||
|
||||
public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
|
||||
|
|
@ -223,6 +202,28 @@ public class BCF2Encoder {
|
|||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Totally generic encoder that examines o, determines the best way to encode it, and encodes it
|
||||
*
|
||||
* This method is incredibly slow, but it's only used for UnitTests so it doesn't matter
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
protected final BCF2Type encode(final Object o) throws IOException {
|
||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||
|
||||
if ( o instanceof List ) {
|
||||
final BCF2Type type = determineBCFType(((List) o).get(0));
|
||||
encodeTyped((List) o, type);
|
||||
return type;
|
||||
} else {
|
||||
final BCF2Type type = determineBCFType(o);
|
||||
encodeTyped(o, type);
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
private final BCF2Type determineBCFType(final Object arg) {
|
||||
final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
|
||||
|
||||
|
|
@ -246,6 +247,8 @@ public class BCF2Encoder {
|
|||
}
|
||||
|
||||
private final List<Byte> stringToBytes(final String v) throws IOException {
|
||||
assert v != null && !v.equals("");
|
||||
|
||||
// TODO -- this needs to be optimized away for efficiency
|
||||
final byte[] bytes = v.getBytes();
|
||||
final List<Byte> l = new ArrayList<Byte>(bytes.length);
|
||||
|
|
|
|||
|
|
@ -33,6 +33,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
|||
import java.util.EnumMap;
|
||||
|
||||
public class GenotypeLikelihoods {
|
||||
public final static int MAX_PL = Short.MAX_VALUE;
|
||||
|
||||
//
|
||||
// There are two objects here because we are lazy in creating both representations
|
||||
// for this object: a vector of log10 Probs and the PL phred-scaled string. Supports
|
||||
|
|
@ -209,7 +211,7 @@ public class GenotypeLikelihoods {
|
|||
final double adjust = maxPL(GLs);
|
||||
|
||||
for ( int i = 0; i < GLs.length; i++ ) {
|
||||
pls[i] = (int)Math.round(-10 * (GLs[i] - adjust));
|
||||
pls[i] = (int)Math.round(Math.min(-10 * (GLs[i] - adjust), MAX_PL));
|
||||
}
|
||||
|
||||
return pls;
|
||||
|
|
|
|||
|
|
@ -146,7 +146,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
// qual
|
||||
if ( vc.hasLog10PError() )
|
||||
encoder.encodeRawFloat((float) vc.getPhredScaledQual(), BCF2Type.FLOAT);
|
||||
encoder.encodeRawFloat((float) vc.getPhredScaledQual());
|
||||
else
|
||||
encoder.encodeRawMissingValue(BCF2Type.FLOAT);
|
||||
|
||||
|
|
@ -183,7 +183,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
if ( vc.isFiltered() ) {
|
||||
encodeStringsByRef(vc.getFilters());
|
||||
} else {
|
||||
encoder.encodeTypedMissing(BCF2Type.INT32);
|
||||
encoder.encodeTypedMissing(BCF2Type.INT8);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -198,7 +198,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
// write size
|
||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
|
|
@ -219,6 +218,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
||||
assert metaData != null; // field is supposed to be in header
|
||||
|
||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||
|
|
@ -266,7 +267,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
throw new ReviewedStingException("BUG: header for " + field +
|
||||
" has inconsistent number of values " + numInFormatField +
|
||||
" compared to values in VariantContext " + ((List) val).size());
|
||||
final Collection<Object> vals = numInFormatField == 1 ? Collections.singleton(val) : (Collection)val;
|
||||
final List<Object> vals = numInFormatField == 1 ? Collections.singletonList(val) : (List<Object>)val;
|
||||
encoder.encodeRawValues(vals, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
|
|
@ -275,12 +276,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private final class VCFToBCFEncoding {
|
||||
VCFHeaderLineType vcfType;
|
||||
BCF2Type BCF2Type;
|
||||
List<Object> valuesToEncode;
|
||||
List<? extends Object> valuesToEncode;
|
||||
|
||||
private VCFToBCFEncoding(final VCFHeaderLineType vcfType, final BCF2Type BCF2Type, final List<? extends Object> valuesToEncode) {
|
||||
this.vcfType = vcfType;
|
||||
this.BCF2Type = BCF2Type;
|
||||
this.valuesToEncode = (List<Object>)valuesToEncode;
|
||||
this.valuesToEncode = valuesToEncode;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -292,33 +293,39 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
final boolean isList = value instanceof List;
|
||||
final Object toType = isList ? ((List)value).get(0) : value;
|
||||
|
||||
switch ( metaData.getType() ) {
|
||||
case Character:
|
||||
assert toType instanceof String;
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, Collections.singletonList(value));
|
||||
case Flag:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
||||
case String:
|
||||
final List<String> s = isList ? (List<String>)value : Collections.singletonList((String) value);
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, s);
|
||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||
List<Integer> l;
|
||||
BCF2Type intType;
|
||||
if ( isList ) {
|
||||
l = (List<Integer>)value;
|
||||
intType = encoder.determineIntegerType(l);
|
||||
} else if ( value != null ) {
|
||||
intType = encoder.determineIntegerType((Integer)value);
|
||||
l = Collections.singletonList((Integer)value);
|
||||
} else {
|
||||
intType = BCF2Type.INT8;
|
||||
l = Collections.singletonList((Integer) null);
|
||||
}
|
||||
return new VCFToBCFEncoding(metaData.getType(), intType, l);
|
||||
case Float:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.FLOAT, isList ? (List<Double>)value : Collections.singletonList(value));
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
try {
|
||||
switch ( metaData.getType() ) {
|
||||
case Character:
|
||||
assert toType instanceof String;
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, Collections.singletonList(value));
|
||||
case Flag:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
||||
case String:
|
||||
final List<String> s = isList ? (List<String>)value : Collections.singletonList((String) value);
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, s);
|
||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||
List<Integer> l;
|
||||
BCF2Type intType;
|
||||
if ( isList ) {
|
||||
l = (List<Integer>)value;
|
||||
intType = encoder.determineIntegerType(l);
|
||||
} else if ( value != null ) {
|
||||
intType = encoder.determineIntegerType((Integer)value);
|
||||
l = Collections.singletonList((Integer)value);
|
||||
} else {
|
||||
intType = BCF2Type.INT8;
|
||||
l = Collections.singletonList((Integer) null);
|
||||
}
|
||||
return new VCFToBCFEncoding(metaData.getType(), intType, l);
|
||||
case Float:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.FLOAT, isList ? (List<Double>)value : Collections.singletonList(value));
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Error computing VCF -> BCF encoding. Received cast class exception"
|
||||
+ " indicating that the VCF header for " + metaData + " is inconsistent with the" +
|
||||
" value seen in the VariantContext object = " + value, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -404,17 +411,26 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
* @throws IOException
|
||||
*/
|
||||
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
|
||||
assert infoBlock.length > 0;
|
||||
assert genotypesBlock.length >= 0;
|
||||
|
||||
BCF2Encoder.encodePrimitive(infoBlock.length, BCF2Type.INT32, outputStream);
|
||||
BCF2Encoder.encodePrimitive(genotypesBlock.length, BCF2Type.INT32, outputStream);
|
||||
outputStream.write(infoBlock);
|
||||
outputStream.write(genotypesBlock);
|
||||
}
|
||||
|
||||
public final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||
return encodeStringsByRef(Collections.singleton(string));
|
||||
// TODO -- obvious optimization case
|
||||
private final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||
assert string != null;
|
||||
|
||||
return encodeStringsByRef(Collections.singletonList(string));
|
||||
}
|
||||
|
||||
public final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
// TODO -- in size == 1 case branch to singleoton fast-path
|
||||
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
assert ! strings.isEmpty();
|
||||
|
||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||
BCF2Type maxType = BCF2Type.INT8; // start with the smallest size
|
||||
|
||||
|
|
@ -424,12 +440,15 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||
final int offset = got;
|
||||
offsets.add(offset);
|
||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: if ( maxType == BCF2Type.INT8 ) maxType = BCF2Type.INT16; break;
|
||||
case INT32: maxType = BCF2Type.INT32; break;
|
||||
default: throw new ReviewedStingException("Unexpected type " + type1);
|
||||
|
||||
if ( maxType != BCF2Type.INT32) { // don't bother looking if we already are at 32 bit ints
|
||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: if ( maxType == BCF2Type.INT8 ) maxType = BCF2Type.INT16; break;
|
||||
case INT32: maxType = BCF2Type.INT32; break;
|
||||
default: throw new ReviewedStingException("Unexpected type " + type1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -438,7 +457,10 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
return maxType;
|
||||
}
|
||||
|
||||
public final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
||||
private final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
||||
assert key != null && ! key.equals("");
|
||||
assert size >= 0;
|
||||
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeType(size, valueType);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue