Cleanup the interface for BCF2FieldEncoder
-- Now uses a much clearer approach. Update all user classes to new interface
This commit is contained in:
parent
dd6aee347a
commit
ea1b699778
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
|
@ -181,6 +182,7 @@ public final class BCF2Encoder {
|
|||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Requires({"s != null", "sizeToWrite >= 0"})
|
||||
public void encodeString(final String s, final int sizeToWrite) throws IOException {
|
||||
final byte[] bytes = s.getBytes();
|
||||
for ( int i = 0; i < sizeToWrite; i++ )
|
||||
|
|
|
|||
|
|
@ -57,7 +57,13 @@ public abstract class BCF2FieldEncoder {
|
|||
final int dictionaryOffset;
|
||||
final BCF2Type dictionaryOffsetType;
|
||||
|
||||
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Constructor
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
||||
this.headerLine = headerLine;
|
||||
this.fixedType = fixedType;
|
||||
|
||||
|
|
@ -67,147 +73,218 @@ public abstract class BCF2FieldEncoder {
|
|||
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
|
||||
}
|
||||
|
||||
public VCFHeaderLineCount getCountType() {
|
||||
return headerLine.getCountType();
|
||||
}
|
||||
|
||||
public VCFCompoundHeaderLine getHeaderLine() {
|
||||
return headerLine;
|
||||
}
|
||||
|
||||
public boolean hasFixedCount() { return getCountType() == VCFHeaderLineCount.INTEGER; }
|
||||
public boolean hasUnboundedCount() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; }
|
||||
public boolean hasContextDeterminedCount() { return ! hasFixedCount() && ! hasUnboundedCount(); }
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// TODO -- this class should own two clean methods
|
||||
//
|
||||
// Tell us whether the type and size are static, determined by from the VC itself,
|
||||
// or from the actual encoded values. If the last case, provide a function that tell us
|
||||
// the encoding type and size of the underlying data, given a single value.
|
||||
// Basic accessors
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
// TODO -- cleanup logic of counts
|
||||
// todo -- differentiate between the VCF header declared size and the encoded size
|
||||
// TODO -- for example, getUnboundedCount should be getCountFromSizeOfValue()
|
||||
//
|
||||
// GenotypeEncoders need to inspect the size properties of the underlying encoder
|
||||
// and determine how (and whether) they need to iterate once through the data to
|
||||
// determine max size (for padding)
|
||||
//
|
||||
public final String getField() { return headerLine.getID(); }
|
||||
|
||||
@Requires("hasFixedCount()")
|
||||
public int getFixedCount() {
|
||||
return headerLine.getCount();
|
||||
/**
|
||||
* Write the field key (dictionary offset and type) into the BCF2Encoder stream
|
||||
*
|
||||
* @param encoder where we write our dictionary offset
|
||||
* @throws IOException
|
||||
*/
|
||||
public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
|
||||
encoder.encodeTyped(dictionaryOffset, dictionaryOffsetType);
|
||||
}
|
||||
|
||||
public int getUnboundedCount(final Object value) {
|
||||
return value instanceof List ? ((List) value).size() : 1;
|
||||
}
|
||||
|
||||
public int getContextDeterminedCount(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
|
||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
||||
if ( hasFixedCount() )
|
||||
return getFixedCount();
|
||||
else if ( hasUnboundedCount() )
|
||||
return getUnboundedCount(value);
|
||||
else
|
||||
return getContextDeterminedCount(vc);
|
||||
}
|
||||
|
||||
public String getField() { return headerLine.getID(); }
|
||||
|
||||
public int getDictionaryOffset() { return dictionaryOffset; }
|
||||
public BCF2Type getDictionaryOffsetType() { return dictionaryOffsetType; }
|
||||
|
||||
public boolean isFixedTyped() { return ! isDynamicallyTyped(); }
|
||||
public boolean isDynamicallyTyped() { return fixedType == null; }
|
||||
public BCF2Type getType(final Object value) { return isDynamicallyTyped() ? getDynamicType(value) : getFixedType(); }
|
||||
public BCF2Type getFixedType() {
|
||||
if ( fixedType != null )
|
||||
return fixedType;
|
||||
else
|
||||
throw new ReviewedStingException("Not a fixed type encoder: " + getField());
|
||||
}
|
||||
public BCF2Type getDynamicType(final Object value) { throw new ReviewedStingException("Function getDynamicType() not implemented"); }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
||||
}
|
||||
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to determine the number of encoded elements
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
protected final VCFHeaderLineCount getCountType() {
|
||||
return headerLine.getCountType();
|
||||
}
|
||||
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasConstantNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.INTEGER;
|
||||
}
|
||||
|
||||
@Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasValueDeterminedNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
|
||||
}
|
||||
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())")
|
||||
public boolean hasContextDeterminedNumElements() {
|
||||
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
|
||||
}
|
||||
|
||||
@Requires("hasConstantNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements() {
|
||||
return headerLine.getCount();
|
||||
}
|
||||
|
||||
@Requires("hasValueDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final Object value) {
|
||||
return numElementsFromValue(value);
|
||||
//return value instanceof List ? ((List) value).size() : 1;
|
||||
}
|
||||
|
||||
@Requires("hasContextDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
|
||||
@Ensures("result >= 0")
|
||||
public final int numElements(final VariantContext vc, final Object value) {
|
||||
if ( hasConstantNumElements() ) return numElements();
|
||||
else if ( hasContextDeterminedNumElements() ) return numElements(vc);
|
||||
else return numElements(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a value, return the number of elements we will encode for it.
|
||||
*
|
||||
* Assumes the value is encoded as a List
|
||||
*
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasValueDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
protected int numElementsFromValue(final Object value) {
|
||||
if ( value == null ) return 0;
|
||||
else if ( value instanceof List ) return ((List) value).size();
|
||||
else return 1;
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to determine the BCF2 type of the encoded values
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Ensures("result || isDynamicallyTyped()")
|
||||
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
|
||||
|
||||
@Ensures("result || isStaticallyTyped()")
|
||||
public final boolean isDynamicallyTyped() { return fixedType == null; }
|
||||
|
||||
public final BCF2Type getType(final Object value) {
|
||||
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
|
||||
}
|
||||
|
||||
@Requires("isStaticallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public final BCF2Type getStaticType() {
|
||||
return fixedType;
|
||||
}
|
||||
|
||||
@Requires("isDynamicallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
throw new ReviewedStingException("BUG: cannot get dynamic type for statically typed BCF2 field");
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// methods to encode values, including the key abstract method
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"})
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<T>)o;
|
||||
else return Collections.singletonList((T)o);
|
||||
}
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode Strings
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class StringOrCharacter extends BCF2FieldEncoder {
|
||||
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.CHAR);
|
||||
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.CHAR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
if ( value != null ) {
|
||||
final String s = encodeString(value);
|
||||
encoder.encodeString(s, Math.max(s.length(), minValues));
|
||||
}
|
||||
final String s = javaStringToBCF2String(value);
|
||||
encoder.encodeString(s, Math.max(s.length(), minValues));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
||||
return value == null ? 0 : encodeString(value).length();
|
||||
//
|
||||
// Regardless of what the header says, BCF2 strings and characters are always encoded
|
||||
// as arrays of CHAR type, which has a variable number of elements depending on the
|
||||
// exact string being encoded
|
||||
//
|
||||
@Override public boolean hasConstantNumElements() { return false; }
|
||||
@Override public boolean hasContextDeterminedNumElements() { return false; }
|
||||
@Override public boolean hasValueDeterminedNumElements() { return true; }
|
||||
@Override protected int numElementsFromValue(final Object value) {
|
||||
return value == null ? 0 : javaStringToBCF2String(value).length();
|
||||
}
|
||||
|
||||
private String encodeString(final Object value) {
|
||||
return value instanceof List ? BCF2Utils.collapseStringList((List<String>)value) : (String)value;
|
||||
/**
|
||||
* Recode the incoming object to a String, compacting it into a
|
||||
* BCF2 string if the value is a list.
|
||||
*
|
||||
* @param value a String or List<String> to encode, or null
|
||||
* @return a non-null string to encode
|
||||
*/
|
||||
@Ensures("result != null")
|
||||
private String javaStringToBCF2String(final Object value) {
|
||||
return value == null
|
||||
? ""
|
||||
: (value instanceof List
|
||||
? BCF2Utils.collapseStringList((List<String>)value)
|
||||
: (String)value);
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode FLAG
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class Flag extends BCF2FieldEncoder {
|
||||
public Flag(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.INT8);
|
||||
public Flag(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.INT8);
|
||||
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFixedCount() {
|
||||
public int numElements() {
|
||||
return 1; // the header says 0 but we will write 1 value
|
||||
}
|
||||
|
||||
@Override
|
||||
@Requires("minValues <= 1")
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodePrimitive(1, getFixedType());
|
||||
encoder.encodePrimitive(1, getStaticType());
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode FLOAT
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class Float extends BCF2FieldEncoder {
|
||||
public Float(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.FLOAT);
|
||||
public Float(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, BCF2Type.FLOAT);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -222,13 +299,19 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode int[]
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class IntArray extends BCF2FieldEncoder {
|
||||
public IntArray(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
public IntArray(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getUnboundedCount(final Object value) {
|
||||
protected int numElementsFromValue(final Object value) {
|
||||
return value == null ? 0 : ((int[])value).length;
|
||||
}
|
||||
|
||||
|
|
@ -250,9 +333,15 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
}
|
||||
|
||||
public static class IntList extends BCF2FieldEncoder {
|
||||
public IntList(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Subclass to encode List<Integer>
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public static class GenericInts extends BCF2FieldEncoder {
|
||||
public GenericInts(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict ) {
|
||||
super(headerLine, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
@ -271,20 +360,27 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
}
|
||||
|
||||
public static class AtomicInt extends BCF2FieldEncoder {
|
||||
public AtomicInt(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
|
||||
}
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Helper methods
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
@Requires("minValues <= 1") // 0 is ok as this means no values need to be encoded
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodeRawInt(value == null ? type.getMissingBytes() : (Integer)value, type);
|
||||
}
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<T>)o;
|
||||
else return Collections.singletonList((T)o);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
|
|
@ -40,31 +41,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
*/
|
||||
public abstract class BCF2FieldWriter {
|
||||
private final VCFHeader header;
|
||||
|
|
@ -82,7 +62,7 @@ public abstract class BCF2FieldWriter {
|
|||
protected String getField() { return getFieldEncoder().getField(); }
|
||||
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
encoder.encodeTyped(fieldEncoder.getDictionaryOffset(), fieldEncoder.getDictionaryOffsetType());
|
||||
fieldEncoder.writeFieldKey(encoder);
|
||||
}
|
||||
|
||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
|
||||
|
|
@ -119,9 +99,9 @@ public abstract class BCF2FieldWriter {
|
|||
// the value is missing, just write in null
|
||||
encoder.encodeType(0, type);
|
||||
} else {
|
||||
final int valueCount = getFieldEncoder().getBCFFieldCount(vc, rawValue);
|
||||
final int valueCount = getFieldEncoder().numElements(vc, rawValue);
|
||||
encoder.encodeType(valueCount, type);
|
||||
getFieldEncoder().encodeValue(encoder, rawValue, type);
|
||||
getFieldEncoder().encodeOneValue(encoder, rawValue, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -139,8 +119,8 @@ public abstract class BCF2FieldWriter {
|
|||
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
||||
if ( fieldEncoder.hasFixedCount() ) {
|
||||
nValuesPerGenotype = getFieldEncoder().getFixedCount();
|
||||
if ( fieldEncoder.hasConstantNumElements() ) {
|
||||
nValuesPerGenotype = getFieldEncoder().numElements();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -150,10 +130,10 @@ public abstract class BCF2FieldWriter {
|
|||
super.start(encoder, vc);
|
||||
|
||||
// only update if we need to
|
||||
if ( ! getFieldEncoder().hasFixedCount() ) {
|
||||
if ( getFieldEncoder().hasContextDeterminedCount() )
|
||||
if ( ! getFieldEncoder().hasConstantNumElements() ) {
|
||||
if ( getFieldEncoder().hasContextDeterminedNumElements() )
|
||||
// we are cheap -- just depends on genotype of allele counts
|
||||
nValuesPerGenotype = getFieldEncoder().getContextDeterminedCount(vc);
|
||||
nValuesPerGenotype = getFieldEncoder().numElements(vc);
|
||||
else
|
||||
// we have to go fishing through the values themselves (expensive)
|
||||
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
|
||||
|
|
@ -167,27 +147,25 @@ public abstract class BCF2FieldWriter {
|
|||
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||
}
|
||||
|
||||
public Object getGenotypeValue(final Genotype g) {
|
||||
return g.getAttribute(getField());
|
||||
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||
return getFieldEncoder().numElements(vc, g.getAttribute(getField()));
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
||||
int size = -1;
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final Object o = getGenotypeValue(g);
|
||||
size = Math.max(size, getFieldEncoder().getBCFFieldCount(vc, o));
|
||||
size = Math.max(size, numElements(vc, g));
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
public static class FixedTypeGenotypesWriter extends GenotypesWriter {
|
||||
public FixedTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
public static class StaticallyTypeGenotypesWriter extends GenotypesWriter {
|
||||
public StaticallyTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
||||
encodingType = getFieldEncoder().getFixedType();
|
||||
encodingType = getFieldEncoder().getStaticType();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -211,12 +189,6 @@ public abstract class BCF2FieldWriter {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO THIS ROUTINE NEEDS TO BE OPTIMIZED. IT ACCOUNTS FOR A SIGNIFICANT AMOUNT OF THE
|
||||
// TODO RUNTIME FOR WRITING OUT BCF FILES WITH MANY GENOTYPES
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
public static class IGFGenotypesWriter extends GenotypesWriter {
|
||||
final IntGenotypeFieldAccessors.Accessor ige;
|
||||
|
||||
|
|
@ -248,19 +220,14 @@ public abstract class BCF2FieldWriter {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Object getGenotypeValue(final Genotype g) {
|
||||
return ige.getValues(g);
|
||||
protected int numElements(final VariantContext vc, final Genotype g) {
|
||||
return ige.getSize(g);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO we should really have a fast path for encoding diploid genotypes where
|
||||
// TODO we don't pay the overhead of creating the allele maps
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
public static class GTWriter extends GenotypesWriter {
|
||||
Map<Allele, Integer> alleleMap = null;
|
||||
final Map<Allele, Integer> alleleMapForTriPlus = new HashMap<Allele, Integer>(5);
|
||||
Allele ref, alt1;
|
||||
|
||||
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
|
@ -274,20 +241,20 @@ public abstract class BCF2FieldWriter {
|
|||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
encodingType = BCF2Type.INT8;
|
||||
alleleMap = buildAlleleMap(vc);
|
||||
buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
super.start(encoder, vc); //To change body of overridden methods use File | Settings | File Templates.
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
final List<Allele> alleles = g.getAlleles();
|
||||
final int samplePloidy = alleles.size();
|
||||
final int samplePloidy = g.getPloidy();
|
||||
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
|
||||
if ( i < samplePloidy ) {
|
||||
// we encode the actual allele
|
||||
final Allele a = alleles.get(i);
|
||||
final int offset = alleleMap.get(a);
|
||||
final Allele a = g.getAllele(i);
|
||||
final int offset = getAlleleOffset(a);
|
||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||
encoder.encodePrimitive(encoded, encodingType);
|
||||
} else {
|
||||
|
|
@ -297,16 +264,44 @@ public abstract class BCF2FieldWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private final static Map<Allele, Integer> buildAlleleMap(final VariantContext vc) {
|
||||
final Map<Allele, Integer> alleleMap = new HashMap<Allele, Integer>(vc.getAlleles().size()+1);
|
||||
alleleMap.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 0; i < alleles.size(); i++ ) {
|
||||
alleleMap.put(alleles.get(i), i);
|
||||
/**
|
||||
* Fast path code to determine the offset.
|
||||
*
|
||||
* Inline tests for == against ref (most common, first test)
|
||||
* == alt1 (second most common, second test)
|
||||
* == NO_CALL (third)
|
||||
* and finally in the map from allele => offset for all alt 2+ alleles
|
||||
*
|
||||
* @param a the allele whose offset we wish to determine
|
||||
* @return the offset (from 0) of the allele in the list of variant context alleles (-1 means NO_CALL)
|
||||
*/
|
||||
@Requires("a != null")
|
||||
private final int getAlleleOffset(final Allele a) {
|
||||
if ( a == ref ) return 0;
|
||||
else if ( a == alt1 ) return 1;
|
||||
else if ( a == Allele.NO_CALL ) return -1;
|
||||
else {
|
||||
final Integer o = alleleMapForTriPlus.get(a);
|
||||
if ( o == null ) throw new ReviewedStingException("BUG: Couldn't find allele offset for allele " + a);
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
return alleleMap;
|
||||
private final void buildAlleleMap(final VariantContext vc) {
|
||||
// these are fast path options to determine the offsets for
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
ref = vc.getReference();
|
||||
alt1 = nAlleles > 1 ? vc.getAlternateAllele(0) : null;
|
||||
|
||||
if ( nAlleles > 2 ) {
|
||||
// for multi-allelics we need to clear the map, and add additional looks
|
||||
alleleMapForTriPlus.clear();
|
||||
alleleMapForTriPlus.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 2; i < alleles.size(); i++ ) {
|
||||
alleleMapForTriPlus.put(alleles.get(i), i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ public class BCF2FieldWriterManager {
|
|||
}
|
||||
|
||||
private final void log(final String field, final BCF2FieldWriter writer) {
|
||||
logger.info("Using writer " + writer);
|
||||
logger.info(writer);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
|
|
@ -109,23 +109,20 @@ public class BCF2FieldWriterManager {
|
|||
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
||||
if ( line.getType() != VCFHeaderLineType.Integer )
|
||||
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||
return new BCF2FieldEncoder.IntArray(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.IntArray(line, dict);
|
||||
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
} else {
|
||||
switch ( line.getType() ) {
|
||||
case Character:
|
||||
case String:
|
||||
return new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.StringOrCharacter(line, dict);
|
||||
case Flag:
|
||||
return new BCF2FieldEncoder.Flag(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.Flag(line, dict);
|
||||
case Float:
|
||||
return new BCF2FieldEncoder.Float(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.Float(line, dict);
|
||||
case Integer:
|
||||
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
|
||||
return new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
|
||||
else
|
||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
return new BCF2FieldEncoder.GenericInts(line, dict);
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
|
|
@ -153,7 +150,7 @@ public class BCF2FieldWriterManager {
|
|||
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
||||
return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
|
||||
} else {
|
||||
return new BCF2FieldWriter.FixedTypeGenotypesWriter(header, fieldEncoder);
|
||||
return new BCF2FieldWriter.StaticallyTypeGenotypesWriter(header, fieldEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -39,8 +39,7 @@ import java.util.HashMap;
|
|||
*/
|
||||
class IntGenotypeFieldAccessors {
|
||||
// initialized once per writer to allow parallel writers to work
|
||||
private final HashMap<String, Accessor> intGenotypeFieldEncoders =
|
||||
new HashMap<String, Accessor>();
|
||||
private final HashMap<String, Accessor> intGenotypeFieldEncoders = new HashMap<String, Accessor>();
|
||||
|
||||
public IntGenotypeFieldAccessors() {
|
||||
intGenotypeFieldEncoders.put(VCFConstants.DEPTH_KEY, new IntGenotypeFieldAccessors.DPAccessor());
|
||||
|
|
@ -61,7 +60,7 @@ class IntGenotypeFieldAccessors {
|
|||
public static abstract class Accessor {
|
||||
public abstract int[] getValues(final Genotype g);
|
||||
|
||||
public int getSize(final Genotype g) {
|
||||
public final int getSize(final Genotype g) {
|
||||
final int[] v = getValues(g);
|
||||
return v == null ? 0 : v.length;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue