Cleanup the interface for BCF2FieldEncoder
-- Now uses a much clearer approach. Update all user classes to new interface
This commit is contained in:
parent
dd6aee347a
commit
ea1b699778
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
import java.io.ByteArrayOutputStream;
|
||||||
|
|
@ -181,6 +182,7 @@ public final class BCF2Encoder {
|
||||||
//
|
//
|
||||||
// --------------------------------------------------------------------------------
|
// --------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Requires({"s != null", "sizeToWrite >= 0"})
|
||||||
public void encodeString(final String s, final int sizeToWrite) throws IOException {
|
public void encodeString(final String s, final int sizeToWrite) throws IOException {
|
||||||
final byte[] bytes = s.getBytes();
|
final byte[] bytes = s.getBytes();
|
||||||
for ( int i = 0; i < sizeToWrite; i++ )
|
for ( int i = 0; i < sizeToWrite; i++ )
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,13 @@ public abstract class BCF2FieldEncoder {
|
||||||
final int dictionaryOffset;
|
final int dictionaryOffset;
|
||||||
final BCF2Type dictionaryOffsetType;
|
final BCF2Type dictionaryOffsetType;
|
||||||
|
|
||||||
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Constructor
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
||||||
this.headerLine = headerLine;
|
this.headerLine = headerLine;
|
||||||
this.fixedType = fixedType;
|
this.fixedType = fixedType;
|
||||||
|
|
||||||
|
|
@ -67,147 +73,218 @@ public abstract class BCF2FieldEncoder {
|
||||||
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
|
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
|
||||||
}
|
}
|
||||||
|
|
||||||
public VCFHeaderLineCount getCountType() {
|
// ----------------------------------------------------------------------
|
||||||
return headerLine.getCountType();
|
|
||||||
}
|
|
||||||
|
|
||||||
public VCFCompoundHeaderLine getHeaderLine() {
|
|
||||||
return headerLine;
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean hasFixedCount() { return getCountType() == VCFHeaderLineCount.INTEGER; }
|
|
||||||
public boolean hasUnboundedCount() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; }
|
|
||||||
public boolean hasContextDeterminedCount() { return ! hasFixedCount() && ! hasUnboundedCount(); }
|
|
||||||
|
|
||||||
//
|
//
|
||||||
// TODO -- this class should own two clean methods
|
// Basic accessors
|
||||||
//
|
|
||||||
// Tell us whether the type and size are static, determined by from the VC itself,
|
|
||||||
// or from the actual encoded values. If the last case, provide a function that tell us
|
|
||||||
// the encoding type and size of the underlying data, given a single value.
|
|
||||||
//
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
// TODO -- cleanup logic of counts
|
public final String getField() { return headerLine.getID(); }
|
||||||
// todo -- differentiate between the VCF header declared size and the encoded size
|
|
||||||
// TODO -- for example, getUnboundedCount should be getCountFromSizeOfValue()
|
|
||||||
//
|
|
||||||
// GenotypeEncoders need to inspect the size properties of the underlying encoder
|
|
||||||
// and determine how (and whether) they need to iterate once through the data to
|
|
||||||
// determine max size (for padding)
|
|
||||||
//
|
|
||||||
|
|
||||||
@Requires("hasFixedCount()")
|
/**
|
||||||
public int getFixedCount() {
|
* Write the field key (dictionary offset and type) into the BCF2Encoder stream
|
||||||
return headerLine.getCount();
|
*
|
||||||
|
* @param encoder where we write our dictionary offset
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
|
||||||
|
encoder.encodeTyped(dictionaryOffset, dictionaryOffsetType);
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getUnboundedCount(final Object value) {
|
|
||||||
return value instanceof List ? ((List) value).size() : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getContextDeterminedCount(final VariantContext vc) {
|
|
||||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
|
||||||
if ( hasFixedCount() )
|
|
||||||
return getFixedCount();
|
|
||||||
else if ( hasUnboundedCount() )
|
|
||||||
return getUnboundedCount(value);
|
|
||||||
else
|
|
||||||
return getContextDeterminedCount(vc);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getField() { return headerLine.getID(); }
|
|
||||||
|
|
||||||
public int getDictionaryOffset() { return dictionaryOffset; }
|
|
||||||
public BCF2Type getDictionaryOffsetType() { return dictionaryOffsetType; }
|
|
||||||
|
|
||||||
public boolean isFixedTyped() { return ! isDynamicallyTyped(); }
|
|
||||||
public boolean isDynamicallyTyped() { return fixedType == null; }
|
|
||||||
public BCF2Type getType(final Object value) { return isDynamicallyTyped() ? getDynamicType(value) : getFixedType(); }
|
|
||||||
public BCF2Type getFixedType() {
|
|
||||||
if ( fixedType != null )
|
|
||||||
return fixedType;
|
|
||||||
else
|
|
||||||
throw new ReviewedStingException("Not a fixed type encoder: " + getField());
|
|
||||||
}
|
|
||||||
public BCF2Type getDynamicType(final Object value) { throw new ReviewedStingException("Function getDynamicType() not implemented"); }
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
||||||
}
|
}
|
||||||
|
|
||||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// methods to determine the number of encoded elements
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
protected final VCFHeaderLineCount getCountType() {
|
||||||
|
return headerLine.getCountType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())")
|
||||||
|
public boolean hasConstantNumElements() {
|
||||||
|
return getCountType() == VCFHeaderLineCount.INTEGER;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())")
|
||||||
|
public boolean hasValueDeterminedNumElements() {
|
||||||
|
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())")
|
||||||
|
public boolean hasContextDeterminedNumElements() {
|
||||||
|
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("hasConstantNumElements()")
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public int numElements() {
|
||||||
|
return headerLine.getCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("hasValueDeterminedNumElements()")
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public int numElements(final Object value) {
|
||||||
|
return numElementsFromValue(value);
|
||||||
|
//return value instanceof List ? ((List) value).size() : 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("hasContextDeterminedNumElements()")
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public int numElements(final VariantContext vc) {
|
||||||
|
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
public final int numElements(final VariantContext vc, final Object value) {
|
||||||
|
if ( hasConstantNumElements() ) return numElements();
|
||||||
|
else if ( hasContextDeterminedNumElements() ) return numElements(vc);
|
||||||
|
else return numElements(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a value, return the number of elements we will encode for it.
|
||||||
|
*
|
||||||
|
* Assumes the value is encoded as a List
|
||||||
|
*
|
||||||
|
* @param value
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
@Requires("hasValueDeterminedNumElements()")
|
||||||
|
@Ensures("result >= 0")
|
||||||
|
protected int numElementsFromValue(final Object value) {
|
||||||
|
if ( value == null ) return 0;
|
||||||
|
else if ( value instanceof List ) return ((List) value).size();
|
||||||
|
else return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// methods to determine the BCF2 type of the encoded values
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Ensures("result || isDynamicallyTyped()")
|
||||||
|
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
|
||||||
|
|
||||||
|
@Ensures("result || isStaticallyTyped()")
|
||||||
|
public final boolean isDynamicallyTyped() { return fixedType == null; }
|
||||||
|
|
||||||
|
public final BCF2Type getType(final Object value) {
|
||||||
|
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("isStaticallyTyped()")
|
||||||
|
@Ensures("result != null")
|
||||||
|
public final BCF2Type getStaticType() {
|
||||||
|
return fixedType;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Requires("isDynamicallyTyped()")
|
||||||
|
@Ensures("result != null")
|
||||||
|
public BCF2Type getDynamicType(final Object value) {
|
||||||
|
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field");
|
||||||
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// methods to encode values, including the key abstract method
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||||
|
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||||
encodeValue(encoder, value, type, 0);
|
encodeValue(encoder, value, type, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"})
|
||||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
/**
|
//
|
||||||
* Helper function that takes an object and returns a list representation
|
// Subclass to encode Strings
|
||||||
* of it:
|
//
|
||||||
*
|
// ----------------------------------------------------------------------
|
||||||
* o == null => []
|
|
||||||
* o is a list => o
|
|
||||||
* else => [o]
|
|
||||||
*
|
|
||||||
* @param o
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
|
||||||
if ( o == null ) return Collections.emptyList();
|
|
||||||
else if ( o instanceof List ) return (List<T>)o;
|
|
||||||
else return Collections.singletonList((T)o);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class StringOrCharacter extends BCF2FieldEncoder {
|
public static class StringOrCharacter extends BCF2FieldEncoder {
|
||||||
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||||
super(headerLine, encoder, dict, BCF2Type.CHAR);
|
super(headerLine, dict, BCF2Type.CHAR);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||||
if ( value != null ) {
|
final String s = javaStringToBCF2String(value);
|
||||||
final String s = encodeString(value);
|
encoder.encodeString(s, Math.max(s.length(), minValues));
|
||||||
encoder.encodeString(s, Math.max(s.length(), minValues));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
//
|
||||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
// Regardless of what the header says, BCF2 strings and characters are always encoded
|
||||||
return value == null ? 0 : encodeString(value).length();
|
// as arrays of CHAR type, which has a variable number of elements depending on the
|
||||||
|
// exact string being encoded
|
||||||
|
//
|
||||||
|
@Override public boolean hasConstantNumElements() { return false; }
|
||||||
|
@Override public boolean hasContextDeterminedNumElements() { return false; }
|
||||||
|
@Override public boolean hasValueDeterminedNumElements() { return true; }
|
||||||
|
@Override protected int numElementsFromValue(final Object value) {
|
||||||
|
return value == null ? 0 : javaStringToBCF2String(value).length();
|
||||||
}
|
}
|
||||||
|
|
||||||
private String encodeString(final Object value) {
|
/**
|
||||||
return value instanceof List ? BCF2Utils.collapseStringList((List<String>)value) : (String)value;
|
* Recode the incoming object to a String, compacting it into a
|
||||||
|
* BCF2 string if the value is a list.
|
||||||
|
*
|
||||||
|
* @param value a String or List<String> to encode, or null
|
||||||
|
* @return a non-null string to encode
|
||||||
|
*/
|
||||||
|
@Ensures("result != null")
|
||||||
|
private String javaStringToBCF2String(final Object value) {
|
||||||
|
return value == null
|
||||||
|
? ""
|
||||||
|
: (value instanceof List
|
||||||
|
? BCF2Utils.collapseStringList((List<String>)value)
|
||||||
|
: (String)value);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Subclass to encode FLAG
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
public static class Flag extends BCF2FieldEncoder {
|
public static class Flag extends BCF2FieldEncoder {
|
||||||
public Flag(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||||
super(headerLine, encoder, dict, BCF2Type.INT8);
|
super(headerLine, dict, BCF2Type.INT8);
|
||||||
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
||||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getFixedCount() {
|
public int numElements() {
|
||||||
return 1; // the header says 0 but we will write 1 value
|
return 1; // the header says 0 but we will write 1 value
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Requires("minValues <= 1")
|
@Requires("minValues <= 1")
|
||||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||||
encoder.encodePrimitive(1, getFixedType());
|
encoder.encodePrimitive(1, getStaticType());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Subclass to encode FLOAT
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
public static class Float extends BCF2FieldEncoder {
|
public static class Float extends BCF2FieldEncoder {
|
||||||
public Float(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
public Float(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||||
super(headerLine, encoder, dict, BCF2Type.FLOAT);
|
super(headerLine, dict, BCF2Type.FLOAT);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -222,13 +299,19 @@ public abstract class BCF2FieldEncoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
//
|
||||||
|
// Subclass to encode int[]
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
public static class IntArray extends BCF2FieldEncoder {
|
public static class IntArray extends BCF2FieldEncoder {
|
||||||
public IntArray(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
public IntArray(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||||
super(headerLine, encoder, dict, null);
|
super(headerLine, dict, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getUnboundedCount(final Object value) {
|
protected int numElementsFromValue(final Object value) {
|
||||||
return value == null ? 0 : ((int[])value).length;
|
return value == null ? 0 : ((int[])value).length;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -250,9 +333,15 @@ public abstract class BCF2FieldEncoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class IntList extends BCF2FieldEncoder {
|
// ----------------------------------------------------------------------
|
||||||
public IntList(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
//
|
||||||
super(headerLine, encoder, dict, null);
|
// Subclass to encode List<Integer>
|
||||||
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
|
public static class GenericInts extends BCF2FieldEncoder {
|
||||||
|
public GenericInts(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||||
|
super(headerLine, dict, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -271,20 +360,27 @@ public abstract class BCF2FieldEncoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class AtomicInt extends BCF2FieldEncoder {
|
|
||||||
public AtomicInt(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
|
||||||
super(headerLine, encoder, dict, null);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
// ----------------------------------------------------------------------
|
||||||
public BCF2Type getDynamicType(final Object value) {
|
//
|
||||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
|
// Helper methods
|
||||||
}
|
//
|
||||||
|
// ----------------------------------------------------------------------
|
||||||
|
|
||||||
@Override
|
/**
|
||||||
@Requires("minValues <= 1") // 0 is ok as this means no values need to be encoded
|
* Helper function that takes an object and returns a list representation
|
||||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
* of it:
|
||||||
encoder.encodeRawInt(value == null ? type.getMissingBytes() : (Integer)value, type);
|
*
|
||||||
}
|
* o == null => []
|
||||||
|
* o is a list => o
|
||||||
|
* else => [o]
|
||||||
|
*
|
||||||
|
* @param o
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
||||||
|
if ( o == null ) return Collections.emptyList();
|
||||||
|
else if ( o instanceof List ) return (List<T>)o;
|
||||||
|
else return Collections.singletonList((T)o);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||||
|
|
||||||
|
import com.google.java.contract.Requires;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||||
|
|
@ -40,31 +41,10 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* [Short one sentence description of this walker]
|
|
||||||
* <p/>
|
|
||||||
* <p>
|
|
||||||
* [Functionality of this walker]
|
|
||||||
* </p>
|
|
||||||
* <p/>
|
|
||||||
* <h2>Input</h2>
|
|
||||||
* <p>
|
|
||||||
* [Input description]
|
|
||||||
* </p>
|
|
||||||
* <p/>
|
|
||||||
* <h2>Output</h2>
|
|
||||||
* <p>
|
|
||||||
* [Output description]
|
|
||||||
* </p>
|
|
||||||
* <p/>
|
|
||||||
* <h2>Examples</h2>
|
|
||||||
* <pre>
|
|
||||||
* java
|
|
||||||
* -jar GenomeAnalysisTK.jar
|
|
||||||
* -T $WalkerName
|
|
||||||
* </pre>
|
|
||||||
*
|
*
|
||||||
* @author Your Name
|
*
|
||||||
* @since Date created
|
* @author Mark DePristo
|
||||||
|
* @since 6/12
|
||||||
*/
|
*/
|
||||||
public abstract class BCF2FieldWriter {
|
public abstract class BCF2FieldWriter {
|
||||||
private final VCFHeader header;
|
private final VCFHeader header;
|
||||||
|
|
@ -82,7 +62,7 @@ public abstract class BCF2FieldWriter {
|
||||||
protected String getField() { return getFieldEncoder().getField(); }
|
protected String getField() { return getFieldEncoder().getField(); }
|
||||||
|
|
||||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||||
encoder.encodeTyped(fieldEncoder.getDictionaryOffset(), fieldEncoder.getDictionaryOffsetType());
|
fieldEncoder.writeFieldKey(encoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
|
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
|
||||||
|
|
@ -119,9 +99,9 @@ public abstract class BCF2FieldWriter {
|
||||||
// the value is missing, just write in null
|
// the value is missing, just write in null
|
||||||
encoder.encodeType(0, type);
|
encoder.encodeType(0, type);
|
||||||
} else {
|
} else {
|
||||||
final int valueCount = getFieldEncoder().getBCFFieldCount(vc, rawValue);
|
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
|
||||||
encoder.encodeType(valueCount, type);
|
encoder.encodeType(valueCount, type);
|
||||||
getFieldEncoder().encodeValue(encoder, rawValue, type);
|
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -139,8 +119,8 @@ public abstract class BCF2FieldWriter {
|
||||||
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||||
super(header, fieldEncoder);
|
super(header, fieldEncoder);
|
||||||
|
|
||||||
if ( fieldEncoder.hasFixedCount() ) {
|
if ( fieldEncoder.hasConstantNumElements() ) {
|
||||||
nValuesPerGenotype = getFieldEncoder().getFixedCount();
|
nValuesPerGenotype = getFieldEncoder().numElements();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -150,10 +130,10 @@ public abstract class BCF2FieldWriter {
|
||||||
super.start(encoder, vc);
|
super.start(encoder, vc);
|
||||||
|
|
||||||
// only update if we need to
|
// only update if we need to
|
||||||
if ( ! getFieldEncoder().hasFixedCount() ) {
|
if ( ! getFieldEncoder().hasConstantNumElements() ) {
|
||||||
if ( getFieldEncoder().hasContextDeterminedCount() )
|
if ( getFieldEncoder().hasContextDeterminedNumElements() )
|
||||||
// we are cheap -- just depends on genotype of allele counts
|
// we are cheap -- just depends on genotype of allele counts
|
||||||
nValuesPerGenotype = getFieldEncoder().getContextDeterminedCount(vc);
|
nValuesPerGenotype = getFieldEncoder().numElements(vc);
|
||||||
else
|
else
|
||||||
// we have to go fishing through the values themselves (expensive)
|
// we have to go fishing through the values themselves (expensive)
|
||||||
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
|
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
|
||||||
|
|
@ -167,27 +147,25 @@ public abstract class BCF2FieldWriter {
|
||||||
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object getGenotypeValue(final Genotype g) {
|
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||||
return g.getAttribute(getField());
|
return getFieldEncoder().numElements(vc, g.getAttribute(getField()));
|
||||||
}
|
}
|
||||||
|
|
||||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
||||||
int size = -1;
|
int size = -1;
|
||||||
|
|
||||||
for ( final Genotype g : vc.getGenotypes() ) {
|
for ( final Genotype g : vc.getGenotypes() ) {
|
||||||
final Object o = getGenotypeValue(g);
|
size = Math.max(size, numElements(vc, g));
|
||||||
size = Math.max(size, getFieldEncoder().getBCFFieldCount(vc, o));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class FixedTypeGenotypesWriter extends GenotypesWriter {
|
public static class StaticallyTypeGenotypesWriter extends GenotypesWriter {
|
||||||
public FixedTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
public StaticallyTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||||
super(header, fieldEncoder);
|
super(header, fieldEncoder);
|
||||||
|
encodingType = getFieldEncoder().getStaticType();
|
||||||
encodingType = getFieldEncoder().getFixedType();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -211,12 +189,6 @@ public abstract class BCF2FieldWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO TODO TODO TODO TODO
|
|
||||||
// TODO
|
|
||||||
// TODO THIS ROUTINE NEEDS TO BE OPTIMIZED. IT ACCOUNTS FOR A SIGNIFICANT AMOUNT OF THE
|
|
||||||
// TODO RUNTIME FOR WRITING OUT BCF FILES WITH MANY GENOTYPES
|
|
||||||
// TODO
|
|
||||||
// TODO TODO TODO TODO TODO
|
|
||||||
public static class IGFGenotypesWriter extends GenotypesWriter {
|
public static class IGFGenotypesWriter extends GenotypesWriter {
|
||||||
final IntGenotypeFieldAccessors.Accessor ige;
|
final IntGenotypeFieldAccessors.Accessor ige;
|
||||||
|
|
||||||
|
|
@ -248,19 +220,14 @@ public abstract class BCF2FieldWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Object getGenotypeValue(final Genotype g) {
|
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||||
return ige.getValues(g);
|
return ige.getSize(g);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO TODO TODO TODO TODO
|
|
||||||
// TODO
|
|
||||||
// TODO we should really have a fast path for encoding diploid genotypes where
|
|
||||||
// TODO we don't pay the overhead of creating the allele maps
|
|
||||||
// TODO
|
|
||||||
// TODO TODO TODO TODO TODO
|
|
||||||
public static class GTWriter extends GenotypesWriter {
|
public static class GTWriter extends GenotypesWriter {
|
||||||
Map<Allele, Integer> alleleMap = null;
|
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
|
||||||
|
Allele ref, alt1;
|
||||||
|
|
||||||
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||||
super(header, fieldEncoder);
|
super(header, fieldEncoder);
|
||||||
|
|
@ -274,20 +241,20 @@ public abstract class BCF2FieldWriter {
|
||||||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||||
|
|
||||||
encodingType = BCF2Type.INT8;
|
encodingType = BCF2Type.INT8;
|
||||||
alleleMap = buildAlleleMap(vc);
|
buildAlleleMap(vc);
|
||||||
nValuesPerGenotype = vc.getMaxPloidy();
|
nValuesPerGenotype = vc.getMaxPloidy();
|
||||||
super.start(encoder, vc); //To change body of overridden methods use File | Settings | File Templates.
|
|
||||||
|
super.start(encoder, vc);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||||
final List<Allele> alleles = g.getAlleles();
|
final int samplePloidy = g.getPloidy();
|
||||||
final int samplePloidy = alleles.size();
|
|
||||||
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
|
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
|
||||||
if ( i < samplePloidy ) {
|
if ( i < samplePloidy ) {
|
||||||
// we encode the actual allele
|
// we encode the actual allele
|
||||||
final Allele a = alleles.get(i);
|
final Allele a = g.getAllele(i);
|
||||||
final int offset = alleleMap.get(a);
|
final int offset = getAlleleOffset(a);
|
||||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||||
encoder.encodePrimitive(encoded, encodingType);
|
encoder.encodePrimitive(encoded, encodingType);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -297,16 +264,44 @@ public abstract class BCF2FieldWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final static Map<Allele, Integer> buildAlleleMap(final VariantContext vc) {
|
/**
|
||||||
final Map<Allele, Integer> alleleMap = new HashMap<Allele, Integer>(vc.getAlleles().size()+1);
|
* Fast path code to determine the offset.
|
||||||
alleleMap.put(Allele.NO_CALL, -1); // convenience for lookup
|
*
|
||||||
|
* Inline tests for == against ref (most common, first test)
|
||||||
final List<Allele> alleles = vc.getAlleles();
|
* == alt1 (second most common, second test)
|
||||||
for ( int i = 0; i < alleles.size(); i++ ) {
|
* == NO_CALL (third)
|
||||||
alleleMap.put(alleles.get(i), i);
|
* and finally in the map from allele => offset for all alt 2+ alleles
|
||||||
|
*
|
||||||
|
* @param a the allele whose offset we wish to determine
|
||||||
|
* @return the offset (from 0) of the allele in the list of variant context alleles (-1 means NO_CALL)
|
||||||
|
*/
|
||||||
|
@Requires("a != null")
|
||||||
|
private final int getAlleleOffset(final Allele a) {
|
||||||
|
if ( a == ref ) return 0;
|
||||||
|
else if ( a == alt1 ) return 1;
|
||||||
|
else if ( a == Allele.NO_CALL ) return -1;
|
||||||
|
else {
|
||||||
|
final Integer o = alleleMapForTriPlus.get(a);
|
||||||
|
if ( o == null ) throw new ReviewedStingException("BUG: Couldn't find allele offset for allele " + a);
|
||||||
|
return o;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return alleleMap;
|
private final void buildAlleleMap(final VariantContext vc) {
|
||||||
|
// these are fast path options to determine the offsets for
|
||||||
|
final int nAlleles = vc.getNAlleles();
|
||||||
|
ref = vc.getReference();
|
||||||
|
alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
|
||||||
|
|
||||||
|
if ( nAlleles > 2 ) {
|
||||||
|
// for multi-allelics we need to clear the map, and add additional looks
|
||||||
|
alleleMapForTriPlus.clear();
|
||||||
|
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||||
|
final List<Allele> alleles = vc.getAlleles();
|
||||||
|
for ( int i = 2; i < alleles.size(); i++ ) {
|
||||||
|
alleleMapForTriPlus.put(alleles.get(i), i);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -84,7 +84,7 @@ public class BCF2FieldWriterManager {
|
||||||
}
|
}
|
||||||
|
|
||||||
private final void log(final String field, final BCF2FieldWriter writer) {
|
private final void log(final String field, final BCF2FieldWriter writer) {
|
||||||
logger.info("Using writer " + writer);
|
logger.info(writer);
|
||||||
}
|
}
|
||||||
|
|
||||||
// -----------------------------------------------------------------
|
// -----------------------------------------------------------------
|
||||||
|
|
@ -109,23 +109,20 @@ public class BCF2FieldWriterManager {
|
||||||
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
||||||
if ( line.getType() != VCFHeaderLineType.Integer )
|
if ( line.getType() != VCFHeaderLineType.Integer )
|
||||||
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||||
return new BCF2FieldEncoder.IntArray(line, encoder, dict);
|
return new BCF2FieldEncoder.IntArray(line, dict);
|
||||||
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||||
} else {
|
} else {
|
||||||
switch ( line.getType() ) {
|
switch ( line.getType() ) {
|
||||||
case Character:
|
case Character:
|
||||||
case String:
|
case String:
|
||||||
return new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
|
return new BCF2FieldEncoder.StringOrCharacter(line, dict);
|
||||||
case Flag:
|
case Flag:
|
||||||
return new BCF2FieldEncoder.Flag(line, encoder, dict);
|
return new BCF2FieldEncoder.Flag(line, dict);
|
||||||
case Float:
|
case Float:
|
||||||
return new BCF2FieldEncoder.Float(line, encoder, dict);
|
return new BCF2FieldEncoder.Float(line, dict);
|
||||||
case Integer:
|
case Integer:
|
||||||
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
|
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||||
return new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
|
|
||||||
else
|
|
||||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
|
||||||
default:
|
default:
|
||||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||||
}
|
}
|
||||||
|
|
@ -153,7 +150,7 @@ public class BCF2FieldWriterManager {
|
||||||
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
||||||
return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
|
return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
|
||||||
} else {
|
} else {
|
||||||
return new BCF2FieldWriter.FixedTypeGenotypesWriter(header, fieldEncoder);
|
return new BCF2FieldWriter.StaticallyTypeGenotypesWriter(header, fieldEncoder);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,8 +39,7 @@ import java.util.HashMap;
|
||||||
*/
|
*/
|
||||||
class IntGenotypeFieldAccessors {
|
class IntGenotypeFieldAccessors {
|
||||||
// initialized once per writer to allow parallel writers to work
|
// initialized once per writer to allow parallel writers to work
|
||||||
private final HashMap<String, Accessor> intGenotypeFieldEncoders =
|
private final HashMap<String, Accessor> intGenotypeFieldEncoders = new HashMap<String, Accessor>();
|
||||||
new HashMap<String, Accessor>();
|
|
||||||
|
|
||||||
public IntGenotypeFieldAccessors() {
|
public IntGenotypeFieldAccessors() {
|
||||||
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
||||||
|
|
@ -61,7 +60,7 @@ class IntGenotypeFieldAccessors {
|
||||||
public static abstract class Accessor {
|
public static abstract class Accessor {
|
||||||
public abstract int[] getValues(final Genotype g);
|
public abstract int[] getValues(final Genotype g);
|
||||||
|
|
||||||
public int getSize(final Genotype g) {
|
public final int getSize(final Genotype g) {
|
||||||
final int[] v = getValues(g);
|
final int[] v = getValues(g);
|
||||||
return v == null ? 0 : v.length;
|
return v == null ? 0 : v.length;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue