GenotypeLikelihood PLs are capped at Short.MAX_INT now
-- UserExceptions in BCF2 now where appropriate -- Asserts for code safety -- Public -> protected encode(Object v) method is for testing only
This commit is contained in:
parent
d52bc31a47
commit
6301572009
|
|
@ -30,6 +30,7 @@ import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.FeatureCodecHeader;
|
import org.broad.tribble.FeatureCodecHeader;
|
||||||
import org.broad.tribble.readers.AsciiLineReader;
|
import org.broad.tribble.readers.AsciiLineReader;
|
||||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -96,7 +97,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
try {
|
try {
|
||||||
// note that this reads the magic as well, and so does double duty
|
// note that this reads the magic as well, and so does double duty
|
||||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
||||||
throw new IllegalArgumentException("Input stream does not begin with BCF2 magic");
|
throw new UserException.MalformedBCF2("Input stream does not begin with BCF2 magic");
|
||||||
|
|
||||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||||
|
|
||||||
|
|
@ -224,15 +225,13 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
private void decodeID( final VariantContextBuilder builder ) {
|
private void decodeID( final VariantContextBuilder builder ) {
|
||||||
final String id = (String)decoder.decodeTypedValue();
|
final String id = (String)decoder.decodeTypedValue();
|
||||||
|
|
||||||
if ( id == null ) {
|
if ( id == null )
|
||||||
builder.noID();
|
builder.noID();
|
||||||
}
|
else
|
||||||
else {
|
|
||||||
builder.id(id);
|
builder.id(id);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
public static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
protected static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||||
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
if ( ! AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||||
|
|
@ -298,15 +297,16 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
||||||
final int nSamples = siteInfo.nSamples;
|
final int nSamples = siteInfo.nSamples;
|
||||||
final int nFields = siteInfo.nFormatFields;
|
final int nFields = siteInfo.nFormatFields;
|
||||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
|
||||||
|
|
||||||
if ( samples.size() != nSamples )
|
if ( samples.size() != nSamples )
|
||||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||||
"different numbers of samples per record. Saw " + samples.size() +
|
"different numbers of samples per record. Saw " + samples.size() +
|
||||||
" samples in header but have a record with " + nSamples + " samples");
|
" samples in header but have a record with " + nSamples + " samples");
|
||||||
|
|
||||||
|
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||||
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||||
for ( int i = 0; i < nSamples; i++ ) {
|
for ( int i = 0; i < nSamples; i++ ) {
|
||||||
|
// all of the information we need for each genotype, with default values
|
||||||
final String sampleName = samples.get(i);
|
final String sampleName = samples.get(i);
|
||||||
List<Allele> alleles = null;
|
List<Allele> alleles = null;
|
||||||
boolean isPhased = false;
|
boolean isPhased = false;
|
||||||
|
|
@ -318,6 +318,7 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||||
final String field = entry.getKey();
|
final String field = entry.getKey();
|
||||||
final List<Object> values = entry.getValue();
|
final List<Object> values = entry.getValue();
|
||||||
|
try {
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)values.get(i));
|
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)values.get(i));
|
||||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||||
|
|
@ -340,9 +341,14 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
attributes.put(field, values.get(i));
|
attributes.put(field, values.get(i));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
|
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||||
|
+ " inconsistent with the value observed in the decoded value in the "
|
||||||
|
+ " BCF file. Value was " + Utils.join(",", values));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( alleles == null ) throw new ReviewedStingException("BUG: no alleles found");
|
if ( alleles == null ) throw new UserException.MalformedBCF2("BUG: no alleles found");
|
||||||
|
|
||||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||||
genotypes.add(g);
|
genotypes.add(g);
|
||||||
|
|
@ -368,6 +374,11 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
||||||
|
assert (nFields > 0 && nSamples > 0) || (nFields == 0 && nSamples == 0);
|
||||||
|
|
||||||
|
if ( nFields == 0 ) // fast path exit for sites only file
|
||||||
|
return Collections.emptyMap();
|
||||||
|
else {
|
||||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||||
|
|
||||||
for ( int i = 0; i < nFields; i++ ) {
|
for ( int i = 0; i < nFields; i++ ) {
|
||||||
|
|
@ -381,12 +392,14 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
|
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private final String getDictionaryString() {
|
private final String getDictionaryString() {
|
||||||
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
return getDictionaryString((Integer) decoder.decodeTypedValue());
|
||||||
}
|
}
|
||||||
|
|
||||||
private final String getDictionaryString(final int offset) {
|
private final String getDictionaryString(final int offset) {
|
||||||
|
if ( offset >= dictionary.size() ) throw new UserException.MalformedBCF2("BUG: no dictionary field found at offset " + offset);
|
||||||
final String field = dictionary.get(offset);
|
final String field = dictionary.get(offset);
|
||||||
return field;
|
return field;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,8 @@ public class BCF2Decoder {
|
||||||
* @param recordBytes
|
* @param recordBytes
|
||||||
*/
|
*/
|
||||||
public void setRecordBytes(final byte[] recordBytes) {
|
public void setRecordBytes(final byte[] recordBytes) {
|
||||||
|
assert recordBytes != null;
|
||||||
|
|
||||||
this.recordBytes = recordBytes;
|
this.recordBytes = recordBytes;
|
||||||
this.recordStream = new ByteArrayInputStream(recordBytes);
|
this.recordStream = new ByteArrayInputStream(recordBytes);
|
||||||
}
|
}
|
||||||
|
|
@ -145,7 +147,7 @@ public class BCF2Decoder {
|
||||||
for ( int i = 0; i < size; i++ ) {
|
for ( int i = 0; i < size; i++ ) {
|
||||||
ints.add(decodeSingleValue(type));
|
ints.add(decodeSingleValue(type));
|
||||||
}
|
}
|
||||||
return ints.get(0) == null ? null : ints;
|
return ints.get(0) == null ? null : ints; // return null when all of the values are null
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -175,6 +177,7 @@ public class BCF2Decoder {
|
||||||
|
|
||||||
private final Object decodeLiteralString(final int size) {
|
private final Object decodeLiteralString(final int size) {
|
||||||
assert size > 0;
|
assert size > 0;
|
||||||
|
|
||||||
// TODO -- assumes size > 0
|
// TODO -- assumes size > 0
|
||||||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||||
try {
|
try {
|
||||||
|
|
@ -227,6 +230,8 @@ public class BCF2Decoder {
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
||||||
|
assert blockSizeInBytes >= 0;
|
||||||
|
|
||||||
final byte[] record = new byte[blockSizeInBytes];
|
final byte[] record = new byte[blockSizeInBytes];
|
||||||
try {
|
try {
|
||||||
final int bytesRead = inputStream.read(record);
|
final int bytesRead = inputStream.read(record);
|
||||||
|
|
@ -239,6 +244,8 @@ public class BCF2Decoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static void validateReadBytes(final int actuallyRead, final int expected) {
|
private final static void validateReadBytes(final int actuallyRead, final int expected) {
|
||||||
|
assert expected >= 0;
|
||||||
|
|
||||||
if ( actuallyRead < expected ) {
|
if ( actuallyRead < expected ) {
|
||||||
throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
|
throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
|
||||||
actuallyRead == -1 ?
|
actuallyRead == -1 ?
|
||||||
|
|
|
||||||
|
|
@ -58,31 +58,6 @@ public class BCF2Encoder {
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// Super-high level interface
|
|
||||||
//
|
|
||||||
// --------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Totally generic encoder that examines o, determines the best way to encode it, and encodes it
|
|
||||||
* @param o
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public final BCF2Type encode(final Object o) throws IOException {
|
|
||||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
|
||||||
|
|
||||||
if ( o instanceof List ) {
|
|
||||||
final BCF2Type type = determineBCFType(((List) o).get(0));
|
|
||||||
encodeTyped((List) o, type);
|
|
||||||
return type;
|
|
||||||
} else {
|
|
||||||
final BCF2Type type = determineBCFType(o);
|
|
||||||
encodeTyped(o, type);
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Writing typed values (have type byte)
|
// Writing typed values (have type byte)
|
||||||
|
|
@ -108,12 +83,6 @@ public class BCF2Encoder {
|
||||||
encodeRawValues(v, type);
|
encodeRawValues(v, type);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final BCF2Type encodeTypedIntOfBestSize(final int value) throws IOException {
|
|
||||||
final BCF2Type type = determineIntegerType(value);
|
|
||||||
encodeTyped(value, type);
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
//
|
//
|
||||||
// Writing raw values (don't have a type byte)
|
// Writing raw values (don't have a type byte)
|
||||||
|
|
@ -127,6 +96,7 @@ public class BCF2Encoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException {
|
public final <T extends Object> void encodeRawValue(final T value, final BCF2Type type) throws IOException {
|
||||||
|
try {
|
||||||
if ( value == type.getMissingJavaValue() )
|
if ( value == type.getMissingJavaValue() )
|
||||||
encodeRawMissingValue(type);
|
encodeRawMissingValue(type);
|
||||||
else {
|
else {
|
||||||
|
|
@ -134,11 +104,14 @@ public class BCF2Encoder {
|
||||||
case INT8:
|
case INT8:
|
||||||
case INT16:
|
case INT16:
|
||||||
case INT32: encodePrimitive((Integer)value, type); break;
|
case INT32: encodePrimitive((Integer)value, type); break;
|
||||||
case FLOAT: encodeRawFloat((Double) value, type); break;
|
case FLOAT: encodeRawFloat((Double) value); break;
|
||||||
case CHAR: encodeRawChar((Byte) value); break;
|
case CHAR: encodeRawChar((Byte) value); break;
|
||||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
|
throw new ReviewedStingException("BUG: invalid type cast to " + type + " from " + value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
|
public final void encodeRawMissingValue(final BCF2Type type) throws IOException {
|
||||||
|
|
@ -146,6 +119,8 @@ public class BCF2Encoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
|
public final void encodeRawMissingValues(final int size, final BCF2Type type) throws IOException {
|
||||||
|
if ( size <= 0 ) throw new ReviewedStingException("BUG: size <= 0");
|
||||||
|
|
||||||
for ( int i = 0; i < size; i++ )
|
for ( int i = 0; i < size; i++ )
|
||||||
encodeRawMissingValue(type);
|
encodeRawMissingValue(type);
|
||||||
}
|
}
|
||||||
|
|
@ -160,15 +135,19 @@ public class BCF2Encoder {
|
||||||
encodeStream.write(c);
|
encodeStream.write(c);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void encodeRawFloat(final double value, final BCF2Type type) throws IOException {
|
public final void encodeRawFloat(final double value) throws IOException {
|
||||||
encodePrimitive(Float.floatToIntBits((float)value), type);
|
encodePrimitive(Float.floatToIntBits((float)value), BCF2Type.FLOAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
||||||
|
if ( size < 0 ) throw new ReviewedStingException("BUG: size < 0");
|
||||||
|
|
||||||
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
final byte typeByte = BCF2Utils.encodeTypeDescriptor(size, type);
|
||||||
encodeStream.write(typeByte);
|
encodeStream.write(typeByte);
|
||||||
if ( BCF2Utils.willOverflow(size) )
|
if ( BCF2Utils.willOverflow(size) ) {
|
||||||
encodeTypedIntOfBestSize(size);
|
// write in the overflow size
|
||||||
|
encodeTyped(size, determineIntegerType(size));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
|
public final void encodeRawInt(final int value, final BCF2Type type) throws IOException {
|
||||||
|
|
@ -223,6 +202,28 @@ public class BCF2Encoder {
|
||||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Totally generic encoder that examines o, determines the best way to encode it, and encodes it
|
||||||
|
*
|
||||||
|
* This method is incredibly slow, but it's only used for UnitTests so it doesn't matter
|
||||||
|
*
|
||||||
|
* @param o
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
protected final BCF2Type encode(final Object o) throws IOException {
|
||||||
|
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||||
|
|
||||||
|
if ( o instanceof List ) {
|
||||||
|
final BCF2Type type = determineBCFType(((List) o).get(0));
|
||||||
|
encodeTyped((List) o, type);
|
||||||
|
return type;
|
||||||
|
} else {
|
||||||
|
final BCF2Type type = determineBCFType(o);
|
||||||
|
encodeTyped(o, type);
|
||||||
|
return type;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private final BCF2Type determineBCFType(final Object arg) {
|
private final BCF2Type determineBCFType(final Object arg) {
|
||||||
final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
|
final Object toType = arg instanceof List ? ((List)arg).get(0) : arg;
|
||||||
|
|
||||||
|
|
@ -246,6 +247,8 @@ public class BCF2Encoder {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final List<Byte> stringToBytes(final String v) throws IOException {
|
private final List<Byte> stringToBytes(final String v) throws IOException {
|
||||||
|
assert v != null && !v.equals("");
|
||||||
|
|
||||||
// TODO -- this needs to be optimized away for efficiency
|
// TODO -- this needs to be optimized away for efficiency
|
||||||
final byte[] bytes = v.getBytes();
|
final byte[] bytes = v.getBytes();
|
||||||
final List<Byte> l = new ArrayList<Byte>(bytes.length);
|
final List<Byte> l = new ArrayList<Byte>(bytes.length);
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,8 @@ import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
import java.util.EnumMap;
|
import java.util.EnumMap;
|
||||||
|
|
||||||
public class GenotypeLikelihoods {
|
public class GenotypeLikelihoods {
|
||||||
|
public final static int MAX_PL = Short.MAX_VALUE;
|
||||||
|
|
||||||
//
|
//
|
||||||
// There are two objects here because we are lazy in creating both representations
|
// There are two objects here because we are lazy in creating both representations
|
||||||
// for this object: a vector of log10 Probs and the PL phred-scaled string. Supports
|
// for this object: a vector of log10 Probs and the PL phred-scaled string. Supports
|
||||||
|
|
@ -209,7 +211,7 @@ public class GenotypeLikelihoods {
|
||||||
final double adjust = maxPL(GLs);
|
final double adjust = maxPL(GLs);
|
||||||
|
|
||||||
for ( int i = 0; i < GLs.length; i++ ) {
|
for ( int i = 0; i < GLs.length; i++ ) {
|
||||||
pls[i] = (int)Math.round(-10 * (GLs[i] - adjust));
|
pls[i] = (int)Math.round(Math.min(-10 * (GLs[i] - adjust), MAX_PL));
|
||||||
}
|
}
|
||||||
|
|
||||||
return pls;
|
return pls;
|
||||||
|
|
|
||||||
|
|
@ -146,7 +146,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
|
|
||||||
// qual
|
// qual
|
||||||
if ( vc.hasLog10PError() )
|
if ( vc.hasLog10PError() )
|
||||||
encoder.encodeRawFloat((float) vc.getPhredScaledQual(), BCF2Type.FLOAT);
|
encoder.encodeRawFloat((float) vc.getPhredScaledQual());
|
||||||
else
|
else
|
||||||
encoder.encodeRawMissingValue(BCF2Type.FLOAT);
|
encoder.encodeRawMissingValue(BCF2Type.FLOAT);
|
||||||
|
|
||||||
|
|
@ -183,7 +183,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
if ( vc.isFiltered() ) {
|
if ( vc.isFiltered() ) {
|
||||||
encodeStringsByRef(vc.getFilters());
|
encodeStringsByRef(vc.getFilters());
|
||||||
} else {
|
} else {
|
||||||
encoder.encodeTypedMissing(BCF2Type.INT32);
|
encoder.encodeTypedMissing(BCF2Type.INT8);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -198,7 +198,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||||
// write size
|
|
||||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc);
|
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc);
|
||||||
for ( final String field : genotypeFields ) {
|
for ( final String field : genotypeFields ) {
|
||||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
|
|
@ -219,6 +218,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
|
|
||||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
||||||
|
assert metaData != null; // field is supposed to be in header
|
||||||
|
|
||||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||||
|
|
@ -266,7 +267,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
throw new ReviewedStingException("BUG: header for " + field +
|
throw new ReviewedStingException("BUG: header for " + field +
|
||||||
" has inconsistent number of values " + numInFormatField +
|
" has inconsistent number of values " + numInFormatField +
|
||||||
" compared to values in VariantContext " + ((List) val).size());
|
" compared to values in VariantContext " + ((List) val).size());
|
||||||
final Collection<Object> vals = numInFormatField == 1 ? Collections.singleton(val) : (Collection)val;
|
final List<Object> vals = numInFormatField == 1 ? Collections.singletonList(val) : (List<Object>)val;
|
||||||
encoder.encodeRawValues(vals, encoding.BCF2Type);
|
encoder.encodeRawValues(vals, encoding.BCF2Type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -275,12 +276,12 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
private final class VCFToBCFEncoding {
|
private final class VCFToBCFEncoding {
|
||||||
VCFHeaderLineType vcfType;
|
VCFHeaderLineType vcfType;
|
||||||
BCF2Type BCF2Type;
|
BCF2Type BCF2Type;
|
||||||
List<Object> valuesToEncode;
|
List<? extends Object> valuesToEncode;
|
||||||
|
|
||||||
private VCFToBCFEncoding(final VCFHeaderLineType vcfType, final BCF2Type BCF2Type, final List<? extends Object> valuesToEncode) {
|
private VCFToBCFEncoding(final VCFHeaderLineType vcfType, final BCF2Type BCF2Type, final List<? extends Object> valuesToEncode) {
|
||||||
this.vcfType = vcfType;
|
this.vcfType = vcfType;
|
||||||
this.BCF2Type = BCF2Type;
|
this.BCF2Type = BCF2Type;
|
||||||
this.valuesToEncode = (List<Object>)valuesToEncode;
|
this.valuesToEncode = valuesToEncode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -292,6 +293,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
final boolean isList = value instanceof List;
|
final boolean isList = value instanceof List;
|
||||||
final Object toType = isList ? ((List)value).get(0) : value;
|
final Object toType = isList ? ((List)value).get(0) : value;
|
||||||
|
|
||||||
|
try {
|
||||||
switch ( metaData.getType() ) {
|
switch ( metaData.getType() ) {
|
||||||
case Character:
|
case Character:
|
||||||
assert toType instanceof String;
|
assert toType instanceof String;
|
||||||
|
|
@ -320,6 +322,11 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Unexpected type for field " + field);
|
throw new ReviewedStingException("Unexpected type for field " + field);
|
||||||
}
|
}
|
||||||
|
} catch ( ClassCastException e ) {
|
||||||
|
throw new ReviewedStingException("Error computing VCF -> BCF encoding. Received cast class exception"
|
||||||
|
+ " indicating that the VCF header for " + metaData + " is inconsistent with the" +
|
||||||
|
" value seen in the VariantContext object = " + value, e);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void addGenotypeFilters(final VariantContext vc) throws IOException {
|
private final void addGenotypeFilters(final VariantContext vc) throws IOException {
|
||||||
|
|
@ -404,17 +411,26 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
|
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
|
||||||
|
assert infoBlock.length > 0;
|
||||||
|
assert genotypesBlock.length >= 0;
|
||||||
|
|
||||||
BCF2Encoder.encodePrimitive(infoBlock.length, BCF2Type.INT32, outputStream);
|
BCF2Encoder.encodePrimitive(infoBlock.length, BCF2Type.INT32, outputStream);
|
||||||
BCF2Encoder.encodePrimitive(genotypesBlock.length, BCF2Type.INT32, outputStream);
|
BCF2Encoder.encodePrimitive(genotypesBlock.length, BCF2Type.INT32, outputStream);
|
||||||
outputStream.write(infoBlock);
|
outputStream.write(infoBlock);
|
||||||
outputStream.write(genotypesBlock);
|
outputStream.write(genotypesBlock);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final BCF2Type encodeStringByRef(final String string) throws IOException {
|
// TODO -- obvious optimization case
|
||||||
return encodeStringsByRef(Collections.singleton(string));
|
private final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||||
|
assert string != null;
|
||||||
|
|
||||||
|
return encodeStringsByRef(Collections.singletonList(string));
|
||||||
}
|
}
|
||||||
|
|
||||||
public final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
// TODO -- in size == 1 case branch to singleoton fast-path
|
||||||
|
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||||
|
assert ! strings.isEmpty();
|
||||||
|
|
||||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||||
BCF2Type maxType = BCF2Type.INT8; // start with the smallest size
|
BCF2Type maxType = BCF2Type.INT8; // start with the smallest size
|
||||||
|
|
||||||
|
|
@ -424,6 +440,8 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
if ( got == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||||
final int offset = got;
|
final int offset = got;
|
||||||
offsets.add(offset);
|
offsets.add(offset);
|
||||||
|
|
||||||
|
if ( maxType != BCF2Type.INT32) { // don't bother looking if we already are at 32 bit ints
|
||||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||||
switch ( type1 ) {
|
switch ( type1 ) {
|
||||||
case INT8: break;
|
case INT8: break;
|
||||||
|
|
@ -432,13 +450,17 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
default: throw new ReviewedStingException("Unexpected type " + type1);
|
default: throw new ReviewedStingException("Unexpected type " + type1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// we've checked the types for all strings, so write them out
|
// we've checked the types for all strings, so write them out
|
||||||
encoder.encodeTyped(offsets, maxType);
|
encoder.encodeTyped(offsets, maxType);
|
||||||
return maxType;
|
return maxType;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
private final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
||||||
|
assert key != null && ! key.equals("");
|
||||||
|
assert size >= 0;
|
||||||
|
|
||||||
encodeStringByRef(key);
|
encodeStringByRef(key);
|
||||||
encoder.encodeType(size, valueType);
|
encoder.encodeType(size, valueType);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue