Genotype encoding uses the BCF2FieldEncoder system
This commit is contained in:
parent
9ac4203254
commit
dd6aee347a
|
|
@ -220,6 +220,7 @@ public final class BCF2Utils {
|
|||
return new File( path + ".bcf" );
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
|
|
@ -229,6 +230,7 @@ public final class BCF2Utils {
|
|||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
|
|
@ -244,6 +246,27 @@ public final class BCF2Utils {
|
|||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the maximum BCF2 integer size of t1 and t2
|
||||
*
|
||||
* For example, if t1 == INT8 and t2 == INT16 returns INT16
|
||||
*
|
||||
* @param t1
|
||||
* @param t2
|
||||
* @return
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"})
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||
case INT32: return t1;
|
||||
default: throw new ReviewedStingException("BUG: unexpected BCF2Type " + t1);
|
||||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
|
|
@ -257,4 +280,21 @@ public final class BCF2Utils {
|
|||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,8 +56,9 @@ public abstract class VCFCompoundHeaderLine extends VCFHeaderLine implements VCF
|
|||
public String getDescription() { return description; }
|
||||
public VCFHeaderLineType getType() { return type; }
|
||||
public VCFHeaderLineCount getCountType() { return countType; }
|
||||
public boolean isFixedCount() { return countType == VCFHeaderLineCount.INTEGER; }
|
||||
public int getCount() {
|
||||
if ( countType != VCFHeaderLineCount.INTEGER )
|
||||
if ( ! isFixedCount() )
|
||||
throw new ReviewedStingException("Asking for header line count when type is not an integer");
|
||||
return count;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,8 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
|
|
@ -44,6 +46,11 @@ import java.util.Map;
|
|||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
@Invariant({
|
||||
"headerLine != null",
|
||||
"BCF2Type.INTEGERS.contains(dictionaryOffsetType)",
|
||||
"dictionaryOffset >= 0"
|
||||
})
|
||||
public abstract class BCF2FieldEncoder {
|
||||
final VCFCompoundHeaderLine headerLine;
|
||||
final BCF2Type fixedType;
|
||||
|
|
@ -72,16 +79,41 @@ public abstract class BCF2FieldEncoder {
|
|||
public boolean hasUnboundedCount() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; }
|
||||
public boolean hasContextDeterminedCount() { return ! hasFixedCount() && ! hasUnboundedCount(); }
|
||||
|
||||
//
|
||||
// TODO -- this class should own two clean methods
|
||||
//
|
||||
// Tell us whether the type and size are static, determined by from the VC itself,
|
||||
// or from the actual encoded values. If the last case, provide a function that tell us
|
||||
// the encoding type and size of the underlying data, given a single value.
|
||||
//
|
||||
|
||||
// TODO -- cleanup logic of counts
|
||||
// todo -- differentiate between the VCF header declared size and the encoded size
|
||||
// TODO -- for example, getUnboundedCount should be getCountFromSizeOfValue()
|
||||
//
|
||||
// GenotypeEncoders need to inspect the size properties of the underlying encoder
|
||||
// and determine how (and whether) they need to iterate once through the data to
|
||||
// determine max size (for padding)
|
||||
//
|
||||
|
||||
@Requires("hasFixedCount()")
|
||||
public int getFixedCount() { return headerLine.getCount(); }
|
||||
public int getFixedCount() {
|
||||
return headerLine.getCount();
|
||||
}
|
||||
|
||||
public int getUnboundedCount(final Object value) {
|
||||
return value instanceof List ? ((List) value).size() : 1;
|
||||
}
|
||||
|
||||
public int getContextDeterminedCount(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
|
||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
||||
if ( hasFixedCount() )
|
||||
return getFixedCount();
|
||||
else if ( hasUnboundedCount() )
|
||||
return value instanceof List ? ((List) value).size() : 1;
|
||||
return getUnboundedCount(value);
|
||||
else
|
||||
return getContextDeterminedCount(vc);
|
||||
}
|
||||
|
|
@ -107,7 +139,11 @@ public abstract class BCF2FieldEncoder {
|
|||
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
||||
}
|
||||
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException;
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
|
|
@ -133,10 +169,10 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
if ( value != null ) {
|
||||
final String s = encodeString(value);
|
||||
encoder.encodeString(s, s.length());
|
||||
encoder.encodeString(s, Math.max(s.length(), minValues));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -153,7 +189,7 @@ public abstract class BCF2FieldEncoder {
|
|||
public static class Flag extends BCF2FieldEncoder {
|
||||
public Flag(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.INT8);
|
||||
if ( getHeaderLine().getCount() != 0 )
|
||||
if ( ! headerLine.isFixedCount() || headerLine.getCount() != 0 )
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||
}
|
||||
|
||||
|
|
@ -163,7 +199,8 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
@Requires("minValues <= 1")
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodePrimitive(1, getFixedType());
|
||||
}
|
||||
}
|
||||
|
|
@ -174,10 +211,14 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
for ( final double d : doubles )
|
||||
int count = 0;
|
||||
for ( final double d : doubles ) {
|
||||
encoder.encodeRawFloat(d);
|
||||
count++;
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -186,15 +227,26 @@ public abstract class BCF2FieldEncoder {
|
|||
super(headerLine, encoder, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getUnboundedCount(final Object value) {
|
||||
return value == null ? 0 : ((int[])value).length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
for ( final int i : (int[])value )
|
||||
encoder.encodeRawInt(i, type);
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
if ( value != null ) {
|
||||
for ( final int i : (int[])value ) {
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -209,9 +261,13 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
for ( final int i : toList(Integer.class, value) )
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
int count = 0;
|
||||
for ( final int i : toList(Integer.class, value) ) {
|
||||
encoder.encodeRawInt(i, type);
|
||||
count++;
|
||||
}
|
||||
for ( ; count < minValues; count++ ) encoder.encodeRawMissingValue(type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -226,8 +282,9 @@ public abstract class BCF2FieldEncoder {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encoder.encodeRawInt((Integer)value, type);
|
||||
@Requires("minValues <= 1") // 0 is ok as this means no values need to be encoded
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException {
|
||||
encoder.encodeRawInt(value == null ? type.getMissingBytes() : (Integer)value, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,18 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
|
|
@ -58,43 +67,53 @@ import java.io.IOException;
|
|||
* @since Date created
|
||||
*/
|
||||
public abstract class BCF2FieldWriter {
|
||||
private final VCFHeader header;
|
||||
private final BCF2FieldEncoder fieldEncoder;
|
||||
|
||||
protected BCF2FieldWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
protected BCF2FieldWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
this.header = header;
|
||||
this.fieldEncoder = fieldEncoder;
|
||||
}
|
||||
|
||||
protected VCFHeader getHeader() { return header; }
|
||||
protected BCF2FieldEncoder getFieldEncoder() {
|
||||
return fieldEncoder;
|
||||
}
|
||||
protected String getField() { return getFieldEncoder().getField(); }
|
||||
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
encoder.encodeTyped(fieldEncoder.getDictionaryOffset(), fieldEncoder.getDictionaryOffsetType());
|
||||
}
|
||||
|
||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { }
|
||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { } // TODO -- overload done so that we null out values and test for correctness
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Sites writers
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static abstract class SiteWriter extends BCF2FieldWriter {
|
||||
protected SiteWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
super(fieldEncoder);
|
||||
protected SiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
|
||||
}
|
||||
|
||||
public static class GenericSiteWriter extends SiteWriter {
|
||||
public GenericSiteWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
super(fieldEncoder);
|
||||
public GenericSiteWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
final Object rawValue = vc.getAttribute(getFieldEncoder().getField(), null);
|
||||
final Object rawValue = vc.getAttribute(getField(), null);
|
||||
final BCF2Type type = getFieldEncoder().getType(rawValue);
|
||||
if ( rawValue == null ) {
|
||||
// the value is missing, just write in null
|
||||
|
|
@ -106,5 +125,189 @@ public abstract class BCF2FieldWriter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Genotypes writers
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public static abstract class GenotypesWriter extends BCF2FieldWriter {
|
||||
int nValuesPerGenotype = -1;
|
||||
BCF2Type encodingType = null;
|
||||
|
||||
protected GenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
||||
if ( fieldEncoder.hasFixedCount() ) {
|
||||
nValuesPerGenotype = getFieldEncoder().getFixedCount();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// writes the key information
|
||||
super.start(encoder, vc);
|
||||
|
||||
// only update if we need to
|
||||
if ( ! getFieldEncoder().hasFixedCount() ) {
|
||||
if ( getFieldEncoder().hasContextDeterminedCount() )
|
||||
// we are cheap -- just depends on genotype of allele counts
|
||||
nValuesPerGenotype = getFieldEncoder().getContextDeterminedCount(vc);
|
||||
else
|
||||
// we have to go fishing through the values themselves (expensive)
|
||||
nValuesPerGenotype = computeMaxSizeOfGenotypeFieldFromValues(vc);
|
||||
}
|
||||
|
||||
encoder.encodeType(nValuesPerGenotype, encodingType);
|
||||
}
|
||||
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
final Object fieldValue = g.getAttribute(getField(), null);
|
||||
getFieldEncoder().encodeValue(encoder, fieldValue, encodingType, nValuesPerGenotype);
|
||||
}
|
||||
|
||||
public Object getGenotypeValue(final Genotype g) {
|
||||
return g.getAttribute(getField());
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final VariantContext vc) {
|
||||
int size = -1;
|
||||
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final Object o = getGenotypeValue(g);
|
||||
size = Math.max(size, getFieldEncoder().getBCFFieldCount(vc, o));
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
}
|
||||
|
||||
public static class FixedTypeGenotypesWriter extends GenotypesWriter {
|
||||
public FixedTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
|
||||
encodingType = getFieldEncoder().getFixedType();
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntegerTypeGenotypesWriter extends GenotypesWriter {
|
||||
public IntegerTypeGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
// the only value that is dynamic are integers
|
||||
final List<Integer> values = new ArrayList<Integer>(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
for ( final Object i : BCF2Utils.toList(g.getAttribute(getField(), null)) ) {
|
||||
values.add((Integer)i); // we know they are all integers
|
||||
}
|
||||
}
|
||||
|
||||
encodingType = BCF2Utils.determineIntegerType(values);
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO THIS ROUTINE NEEDS TO BE OPTIMIZED. IT ACCOUNTS FOR A SIGNIFICANT AMOUNT OF THE
|
||||
// TODO RUNTIME FOR WRITING OUT BCF FILES WITH MANY GENOTYPES
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
public static class IGFGenotypesWriter extends GenotypesWriter {
|
||||
final IntGenotypeFieldAccessors.Accessor ige;
|
||||
|
||||
public IGFGenotypesWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
super(header, fieldEncoder);
|
||||
this.ige = ige;
|
||||
|
||||
if ( ! (fieldEncoder instanceof BCF2FieldEncoder.IntArray) )
|
||||
throw new ReviewedStingException("BUG: IntGenotypesWriter requires IntArray encoder for field " + getField());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
encodingType = BCF2Type.INT8;
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
final int[] pls = ige.getValues(g);
|
||||
final BCF2Type plsType = getFieldEncoder().getType(pls);
|
||||
encodingType = BCF2Utils.maxIntegerType(encodingType, plsType);
|
||||
if ( encodingType == BCF2Type.INT32 )
|
||||
break; // stop early
|
||||
}
|
||||
|
||||
super.start(encoder, vc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
getFieldEncoder().encodeValue(encoder, ige.getValues(g), encodingType, nValuesPerGenotype);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getGenotypeValue(final Genotype g) {
|
||||
return ige.getValues(g);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO we should really have a fast path for encoding diploid genotypes where
|
||||
// TODO we don't pay the overhead of creating the allele maps
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
public static class GTWriter extends GenotypesWriter {
|
||||
Map<Allele, Integer> alleleMap = null;
|
||||
|
||||
public GTWriter(final VCFHeader header, final BCF2FieldEncoder fieldEncoder) {
|
||||
super(header, fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
|
||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
|
||||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
encodingType = BCF2Type.INT8;
|
||||
alleleMap = buildAlleleMap(vc);
|
||||
nValuesPerGenotype = vc.getMaxPloidy();
|
||||
super.start(encoder, vc); //To change body of overridden methods use File | Settings | File Templates.
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addGenotype(final BCF2Encoder encoder, final VariantContext vc, final Genotype g) throws IOException {
|
||||
final List<Allele> alleles = g.getAlleles();
|
||||
final int samplePloidy = alleles.size();
|
||||
for ( int i = 0; i < nValuesPerGenotype; i++ ) {
|
||||
if ( i < samplePloidy ) {
|
||||
// we encode the actual allele
|
||||
final Allele a = alleles.get(i);
|
||||
final int offset = alleleMap.get(a);
|
||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||
encoder.encodePrimitive(encoded, encodingType);
|
||||
} else {
|
||||
// we need to pad with missing as we have ploidy < max for this sample
|
||||
encoder.encodePrimitive(encodingType.getMissingBytes(), encodingType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final static Map<Allele, Integer> buildAlleleMap(final VariantContext vc) {
|
||||
final Map<Allele, Integer> alleleMap = new HashMap<Allele, Integer>(vc.getAlleles().size()+1);
|
||||
alleleMap.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 0; i < alleles.size(); i++ ) {
|
||||
alleleMap.put(alleles.get(i), i);
|
||||
}
|
||||
|
||||
return alleleMap;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,10 +26,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
|
|
@ -65,6 +62,8 @@ import java.util.Map;
|
|||
public class BCF2FieldWriterManager {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
|
||||
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
|
||||
final Map<String, BCF2FieldWriter.GenotypesWriter> genotypesWriters = new HashMap<String, BCF2FieldWriter.GenotypesWriter>();
|
||||
final IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
|
||||
public BCF2FieldWriterManager() { }
|
||||
|
||||
|
|
@ -72,41 +71,108 @@ public class BCF2FieldWriterManager {
|
|||
for (final VCFHeaderLine line : header.getMetaData()) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
final String field = ((VCFInfoHeaderLine) line).getID();
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter((VCFInfoHeaderLine)line, encoder, dictionary);
|
||||
logger.info("Installing for field " + field + " field writer " + writer);
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, dictionary);
|
||||
log(field, writer);
|
||||
siteWriters.put(field, writer);
|
||||
} else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
final String field = ((VCFFormatHeaderLine) line).getID();
|
||||
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, dictionary);
|
||||
log(field, writer);
|
||||
genotypesWriters.put(field, writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) {
|
||||
BCF2FieldEncoder fieldEncoder = null;
|
||||
switch ( line.getType() ) {
|
||||
case Character:
|
||||
case String:
|
||||
fieldEncoder = new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
|
||||
break;
|
||||
case Flag:
|
||||
fieldEncoder = new BCF2FieldEncoder.Flag(line, encoder, dict);
|
||||
break;
|
||||
case Float:
|
||||
fieldEncoder = new BCF2FieldEncoder.Float(line, encoder, dict);
|
||||
break;
|
||||
case Integer:
|
||||
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
|
||||
fieldEncoder = new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
|
||||
else
|
||||
fieldEncoder = new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
|
||||
return new BCF2FieldWriter.GenericSiteWriter(fieldEncoder);
|
||||
private final void log(final String field, final BCF2FieldWriter writer) {
|
||||
logger.info("Using writer " + writer);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Master routine to look at the header, a specific line, and
|
||||
// build an appropriate SiteWriter for that header element
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFHeader header,
|
||||
final VCFInfoHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict) {
|
||||
return new BCF2FieldWriter.GenericSiteWriter(header, createFieldEncoder(line, encoder, dict, false));
|
||||
}
|
||||
|
||||
private BCF2FieldEncoder createFieldEncoder(final VCFCompoundHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict,
|
||||
final boolean createGenotypesEncoders ) {
|
||||
|
||||
if ( createGenotypesEncoders && intGenotypeFieldAccessors.getAccessor(line.getID()) != null ) {
|
||||
if ( line.getType() != VCFHeaderLineType.Integer )
|
||||
logger.warn("Warning: field " + line.getID() + " expected to encode an integer but saw " + line.getType() + " for record " + line);
|
||||
return new BCF2FieldEncoder.IntArray(line, encoder, dict);
|
||||
} else if ( createGenotypesEncoders && line.getID().equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
} else {
|
||||
switch ( line.getType() ) {
|
||||
case Character:
|
||||
case String:
|
||||
return new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
|
||||
case Flag:
|
||||
return new BCF2FieldEncoder.Flag(line, encoder, dict);
|
||||
case Float:
|
||||
return new BCF2FieldEncoder.Float(line, encoder, dict);
|
||||
case Integer:
|
||||
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
|
||||
return new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
|
||||
else
|
||||
return new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Master routine to look at the header, a specific line, and
|
||||
// build an appropriate Genotypes for that header element
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private BCF2FieldWriter.GenotypesWriter createGenotypesWriter(final VCFHeader header,
|
||||
final VCFFormatHeaderLine line,
|
||||
final BCF2Encoder encoder,
|
||||
final Map<String, Integer> dict) {
|
||||
final String field = line.getID();
|
||||
final BCF2FieldEncoder fieldEncoder = createFieldEncoder(line, encoder, dict, true);
|
||||
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
return new BCF2FieldWriter.GTWriter(header, fieldEncoder);
|
||||
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
||||
return new BCF2FieldWriter.IGFGenotypesWriter(header, fieldEncoder, intGenotypeFieldAccessors.getAccessor(field));
|
||||
} else if ( line.getType() == VCFHeaderLineType.Integer ) {
|
||||
return new BCF2FieldWriter.IntegerTypeGenotypesWriter(header, fieldEncoder);
|
||||
} else {
|
||||
return new BCF2FieldWriter.FixedTypeGenotypesWriter(header, fieldEncoder);
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Accessors to get site / genotype writers
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) {
|
||||
final BCF2FieldWriter.SiteWriter writer = siteWriters.get(key);
|
||||
return getWriter(key, siteWriters);
|
||||
}
|
||||
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String key) {
|
||||
return getWriter(key, genotypesWriters);
|
||||
}
|
||||
|
||||
public <T> T getWriter(final String key, final Map<String, T> map) {
|
||||
final T writer = map.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
return writer;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,7 +50,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private final boolean doNotWriteGenotypes;
|
||||
|
||||
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
|
||||
IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
|
||||
|
||||
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
|
||||
|
|
@ -233,251 +232,32 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
writer.start(encoder, vc);
|
||||
writer.site(encoder, vc);
|
||||
writer.done(encoder, vc);
|
||||
|
||||
// the old way of doing things
|
||||
// final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
|
||||
// encodeStringByRef(key);
|
||||
// encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Type matching routines between VCF and BCF
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
|
||||
private final class VCFToBCFEncoding {
|
||||
VCFHeaderLineType vcfType;
|
||||
BCF2Type BCF2Type;
|
||||
List<? extends Object> valuesToEncode;
|
||||
|
||||
private VCFToBCFEncoding(final VCFHeaderLineType vcfType, final BCF2Type BCF2Type, final List<? extends Object> valuesToEncode) {
|
||||
this.vcfType = vcfType;
|
||||
this.BCF2Type = BCF2Type;
|
||||
this.valuesToEncode = valuesToEncode;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO -- we really need explicit converters as first class objects
|
||||
// TODO -- need to generalize so we can enable vectors of compressed genotype ints
|
||||
// TODO -- no sense in allocating these over and over
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
private final VCFToBCFEncoding prepFieldValueForEncoding(final String field, final Object value) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
final boolean isList = value instanceof List;
|
||||
final Object toType = isList ? ((List)value).get(0) : value;
|
||||
|
||||
try {
|
||||
switch ( metaData.getType() ) {
|
||||
case Character:
|
||||
assert toType instanceof String;
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, Collections.singletonList(value));
|
||||
case Flag:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.INT8, Collections.singletonList(1));
|
||||
case String:
|
||||
final String s = isList ? BCF2Utils.collapseStringList((List<String>)value) : (String)value;
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.CHAR, Collections.singletonList(s));
|
||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||
List<Integer> l;
|
||||
BCF2Type intType;
|
||||
if ( isList ) {
|
||||
l = (List<Integer>)value;
|
||||
intType = BCF2Utils.determineIntegerType(l);
|
||||
} else if ( value != null ) {
|
||||
intType = BCF2Utils.determineIntegerType((Integer) value);
|
||||
l = Collections.singletonList((Integer)value);
|
||||
} else {
|
||||
intType = BCF2Type.INT8;
|
||||
l = Collections.singletonList((Integer) null);
|
||||
}
|
||||
return new VCFToBCFEncoding(metaData.getType(), intType, l);
|
||||
case Float:
|
||||
return new VCFToBCFEncoding(metaData.getType(), BCF2Type.FLOAT, isList ? (List<Double>)value : Collections.singletonList(value));
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Error computing VCF -> BCF encoding. Received cast class exception"
|
||||
+ " indicating that the VCF header for " + metaData + " is inconsistent with the" +
|
||||
" value seen in the VariantContext object = " + value, e);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Genotype field encoding
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
final BCF2Codec.LazyData lazyData = getLazyData(vc);
|
||||
if ( lazyData != null ) {
|
||||
// we never decoded any data from this BCF file, so just pass it back
|
||||
return lazyData.bytes;
|
||||
} else {
|
||||
// we have to do work to convert the VC into a BCF2 byte stream
|
||||
List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
final List<String> genotypeFields = VCFWriter.calcVCFGenotypeKeys(vc, header);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
addGenotypes(vc);
|
||||
} else if ( intGenotypeFieldAccessors.getAccessor(field) != null ) {
|
||||
addIntGenotypeField(field, intGenotypeFieldAccessors.getAccessor(field), vc);
|
||||
} else {
|
||||
addGenericGenotypeField(vc, field);
|
||||
final BCF2FieldWriter.GenotypesWriter writer = fieldManager.getGenotypeFieldWriter(field);
|
||||
|
||||
writer.start(encoder, vc);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
// todo -- can we optimize this get (string -> genotype) which can be expensive
|
||||
final Genotype g = vc.getGenotype(name);
|
||||
writer.addGenotype(encoder, vc, g);
|
||||
}
|
||||
writer.done(encoder, vc);
|
||||
}
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
}
|
||||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContextUtils.getMetaDataForField(header, field);
|
||||
assert metaData != null; // field is supposed to be in header
|
||||
|
||||
int nFields = metaData.getCount(vc.getNAlleles() - 1);
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc, ige);
|
||||
} else {
|
||||
return nFields;
|
||||
}
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc, final IntGenotypeFieldAccessors.Accessor ige) {
|
||||
int size = -1;
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
|
||||
if ( ige == null ) {
|
||||
for ( final Genotype g : gc ) {
|
||||
final Object o = g.getAttribute(field);
|
||||
if ( o == null ) continue;
|
||||
if ( o instanceof List ) {
|
||||
// only do compute if first value is of type list
|
||||
size = Math.max(size, ((List)o).size());
|
||||
} else if ( size == -1 )
|
||||
size = 1;
|
||||
}
|
||||
} else {
|
||||
for ( final Genotype g : gc ) {
|
||||
size = Math.max(size, ige.getSize(g));
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
|
||||
final int numInFormatField = getNGenotypeFieldValues(field, vc, null);
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(field, null);
|
||||
|
||||
startGenotypeField(field, numInFormatField, encoding.BCF2Type);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
try {
|
||||
final Object fieldValue = g.getAttribute(field);
|
||||
|
||||
if ( numInFormatField == 1 ) {
|
||||
// we encode the actual allele, encodeRawValue handles the missing case where fieldValue == null
|
||||
encoder.encodeRawValue(fieldValue, encoding.BCF2Type);
|
||||
} else if ( encoding.vcfType == VCFHeaderLineType.String ) {
|
||||
// we have multiple strings, so we need to collapse them
|
||||
throw new ReviewedStingException("LIMITATION: Cannot encode arrays of string values in genotypes");
|
||||
} else {
|
||||
// multiple values, need to handle general case
|
||||
final List<Object> asList = toList(fieldValue);
|
||||
final int nSampleValues = asList.size();
|
||||
for ( int i = 0; i < numInFormatField; i++ ) {
|
||||
encoder.encodeRawValue(i < nSampleValues ? asList.get(i) : null, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new ReviewedStingException("Value stored in VariantContext incompatible with VCF header type for field " + field, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO THIS ROUTINE NEEDS TO BE OPTIMIZED. IT ACCOUNTS FOR A SIGNIFICANT AMOUNT OF THE
|
||||
// TODO RUNTIME FOR WRITING OUT BCF FILES WITH MANY GENOTYPES
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
private final void addIntGenotypeField(final String field,
|
||||
final IntGenotypeFieldAccessors.Accessor ige,
|
||||
final VariantContext vc) throws IOException {
|
||||
final int numPLs = getNGenotypeFieldValues(field, vc, ige);
|
||||
final int[] allPLs = new int[numPLs * vc.getNSamples()];
|
||||
|
||||
// collect all of the PLs into a single vector of values
|
||||
int i = 0;
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
final int[] pls = ige.getValues(g);
|
||||
|
||||
if ( pls == null )
|
||||
for ( int j = 0; j < numPLs; j++) allPLs[i++] = -1;
|
||||
else {
|
||||
assert pls.length == numPLs;
|
||||
for ( int pl : pls ) allPLs[i++] = pl;
|
||||
}
|
||||
}
|
||||
|
||||
// determine the best size
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(allPLs);
|
||||
startGenotypeField(field, numPLs, type);
|
||||
for ( int pl : allPLs )
|
||||
encoder.encodePrimitive(pl == -1 ? type.getMissingBytes() : pl, type);
|
||||
}
|
||||
|
||||
// TODO TODO TODO TODO TODO
|
||||
// TODO
|
||||
// TODO we should really have a fast path for encoding diploid genotypes where
|
||||
// TODO we don't pay the overhead of creating the allele maps
|
||||
// TODO
|
||||
// TODO TODO TODO TODO TODO
|
||||
private final void addGenotypes(final VariantContext vc) throws IOException {
|
||||
if ( vc.getNAlleles() > BCF2Utils.MAX_ALLELES_IN_GENOTYPES )
|
||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||
"with > " + BCF2Utils.MAX_ALLELES_IN_GENOTYPES + " alleles, but you have "
|
||||
+ vc.getNAlleles() + " at " + vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
final Map<Allele, Integer> alleleMap = buildAlleleMap(vc);
|
||||
final int maxPloidy = vc.getMaxPloidy();
|
||||
startGenotypeField(VCFConstants.GENOTYPE_KEY, maxPloidy, BCF2Type.INT8);
|
||||
for ( final String name : header.getGenotypeSamples() ) {
|
||||
final Genotype g = vc.getGenotype(name); // todo -- can we optimize this?
|
||||
final List<Allele> alleles = g.getAlleles();
|
||||
final int samplePloidy = alleles.size();
|
||||
for ( int i = 0; i < maxPloidy; i++ ) {
|
||||
if ( i < samplePloidy ) {
|
||||
// we encode the actual allele
|
||||
final Allele a = alleles.get(i);
|
||||
final int offset = alleleMap.get(a);
|
||||
final int encoded = ((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00);
|
||||
encoder.encodePrimitive(encoded, BCF2Type.INT8);
|
||||
} else {
|
||||
// we need to pad with missing as we have ploidy < max for this sample
|
||||
encoder.encodePrimitive(BCF2Type.INT8.getMissingBytes(), BCF2Type.INT8);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final static Map<Allele, Integer> buildAlleleMap(final VariantContext vc) {
|
||||
final Map<Allele, Integer> alleleMap = new HashMap<Allele, Integer>(vc.getAlleles().size()+1);
|
||||
alleleMap.put(Allele.NO_CALL, -1); // convenience for lookup
|
||||
|
||||
final List<Allele> alleles = vc.getAlleles();
|
||||
for ( int i = 0; i < alleles.size(); i++ ) {
|
||||
alleleMap.put(alleles.get(i), i);
|
||||
}
|
||||
|
||||
return alleleMap;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Low-level block encoding
|
||||
|
|
@ -499,16 +279,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
outputStream.write(genotypesBlock);
|
||||
}
|
||||
|
||||
@Requires("string != null")
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
private final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||
final Integer offset = stringDictionaryMap.get(string);
|
||||
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(offset);
|
||||
encoder.encodeTyped(offset, type);
|
||||
return type;
|
||||
}
|
||||
|
||||
@Requires("! strings.isEmpty()")
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
|
|
@ -540,29 +310,6 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
return maxType;
|
||||
}
|
||||
|
||||
@Requires({"key != null", "! key.equals(\"\")", "size >= 0"})
|
||||
private final void startGenotypeField(final String key, final int size, final BCF2Type valueType) throws IOException {
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeType(size, valueType);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
private final static List<Object> toList(final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<Object>)o;
|
||||
else return Collections.singletonList(o);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create the contigDictionary from the contigLines extracted from the VCF header
|
||||
*
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSamplePilot1() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10,022,000-10,025,000", 1,
|
||||
Arrays.asList("bf5c76bec6e00199d441b6175b6b7c39"));
|
||||
Arrays.asList("40dae29480e26efb117344e0e82e997c"));
|
||||
executeTest("test MultiSample Pilot1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -37,7 +37,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("9f56f8d62c047213c894c3f250706aea"));
|
||||
Arrays.asList("96b9f6005d9437bf527344cf98319739"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testWithAllelesPassedIn2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "allelesForUG.vcf -I " + validationDataLocation + "pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,025,000", 1,
|
||||
Arrays.asList("0a5048062cd9022b761ae87efed5957e"));
|
||||
Arrays.asList("02c5c466ad26104b64166beb788a618c"));
|
||||
executeTest("test MultiSample Pilot2 with alleles passed in and emitting all sites", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -53,7 +53,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSingleSamplePilot2() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("f50a30bf9bbd4e5dcd5d7d9282b6dadf"));
|
||||
Arrays.asList("7cc7ec249243a0dcb9e3a3033d4704e4"));
|
||||
executeTest("test SingleSample Pilot2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -61,7 +61,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultipleSNPAlleles() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH --dbsnp " + b37dbSNP129 + " -I " + testDir + "multiallelic.snps.bam -o %s -L " + testDir + "multiallelic.snps.intervals", 1,
|
||||
Arrays.asList("6fb6ea5f2b9da02a0fea7cb2994fb5db"));
|
||||
Arrays.asList("c0416efb6ff481889542e7807baa495c"));
|
||||
executeTest("test Multiple SNP alleles", spec);
|
||||
}
|
||||
|
||||
|
|
@ -69,7 +69,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testBadRead() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm BOTH -I " + testDir + "badRead.test.bam -o %s -L 1:22753424-22753464", 1,
|
||||
Arrays.asList("95158fb50db5d41a678cd331a3ffe5e1"));
|
||||
Arrays.asList("e2cf97bca4a720ca64ca7f682da6c9f0"));
|
||||
executeTest("test bad read", spec);
|
||||
}
|
||||
|
||||
|
|
@ -77,7 +77,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testReverseTrim() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b37KGReference + " -nosl --no_cmdline_in_header -glm INDEL -I " + validationDataLocation + "CEUTrio.HiSeq.b37.chr20.10_11mb.bam -o %s -L 20:10289124 -L 20:10090289", 1,
|
||||
Arrays.asList("c86e05f315a86bc190d72cde911e6fe2"));
|
||||
Arrays.asList("976694d7aedd0272a3e2ba46e00581f5"));
|
||||
executeTest("test reverse trim", spec);
|
||||
}
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
//
|
||||
// --------------------------------------------------------------------------------------------------------------
|
||||
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "f6d655714706b6e8390037db3fad60ef";
|
||||
private final static String COMPRESSED_OUTPUT_MD5 = "c0f3d88386367fcd66d196b49a22bfaa";
|
||||
|
||||
@Test
|
||||
public void testCompressedOutput() {
|
||||
|
|
@ -108,7 +108,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
|
||||
// Note that we need to turn off any randomization for this to work, so no downsampling and no annotations
|
||||
|
||||
String md5 = "7bc812cc553b4ab77c08049f0e32d0f6";
|
||||
String md5 = "34cb7146c037925e8f324cffd986834d";
|
||||
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -dt NONE -G none -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,075,000", 1,
|
||||
|
|
@ -140,7 +140,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinBaseQualityScore() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 --min_base_quality_score 26", 1,
|
||||
Arrays.asList("dfeaccb68165fdaffafde9150914432d"));
|
||||
Arrays.asList("ac4c8e0fdaf94a7949ceb157d3c836a9"));
|
||||
executeTest("test min_base_quality_score 26", spec);
|
||||
}
|
||||
|
||||
|
|
@ -148,7 +148,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testSLOD() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH --dbsnp " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("35ef19b4f248969c74da8bd7489385d6"));
|
||||
Arrays.asList("751cfe684b0ed19b0d509bb7a315d65d"));
|
||||
executeTest("test SLOD", spec);
|
||||
}
|
||||
|
||||
|
|
@ -156,7 +156,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testNDA() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " --annotateNDA -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("aa49989fde8c6378f5c751f8b267c471"));
|
||||
Arrays.asList("cbf1b8b261f726abe43045806e17e841"));
|
||||
executeTest("test NDA", spec);
|
||||
}
|
||||
|
||||
|
|
@ -164,23 +164,23 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testCompTrack() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
"-T UnifiedGenotyper -R " + b36KGReference + " --no_cmdline_in_header -glm BOTH -comp:FOO " + b36dbSNP129 + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000", 1,
|
||||
Arrays.asList("ffaeb60a5776d85b41c64786ddc4d14d"));
|
||||
Arrays.asList("70fd3b551f9def332393b7f3976440ac"));
|
||||
executeTest("test using comp track", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterSitesOnly() {
|
||||
testOutputParameters("-sites_only", "f9a4005c53291170800e6023503d5635");
|
||||
testOutputParameters("-sites_only", "9700208609431548b2651dc582f1ea10");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllConfident() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "e6c63baff51aaeb318c8bebaf2989828");
|
||||
testOutputParameters("--output_mode EMIT_ALL_CONFIDENT_SITES", "b7bfc2a5298f86e3aef1bb70bebfb38e");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testOutputParameterAllSites() {
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "43ffa34646d781a368ea81342c21ae2e");
|
||||
testOutputParameters("--output_mode EMIT_ALL_SITES", "90dcce8cd6de92258f332b261e64a5e9");
|
||||
}
|
||||
|
||||
private void testOutputParameters(final String args, final String md5) {
|
||||
|
|
@ -194,7 +194,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_call_conf 10 ", 1,
|
||||
Arrays.asList("c7cb29121eb30e752ab6652a6d2a62a6"));
|
||||
Arrays.asList("c5311739d1bba16cd59fb33e5f92ff49"));
|
||||
executeTest("test confidence 1", spec1);
|
||||
}
|
||||
|
||||
|
|
@ -202,7 +202,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testConfidence2() {
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommand + " -I " + validationDataLocation + "NA12878.1kg.p2.chr1_10mb_11_mb.SLX.bam -o %s -L 1:10,000,000-10,010,000 -stand_emit_conf 10 ", 1,
|
||||
Arrays.asList("e7bdb76be82420a03ff28038d283822d"));
|
||||
Arrays.asList("189f5b7026595a0814e1eb1466834760"));
|
||||
executeTest("test confidence 2", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -213,12 +213,12 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
// --------------------------------------------------------------------------------------------------------------
|
||||
@Test
|
||||
public void testHeterozyosity1() {
|
||||
testHeterozosity( 0.01, "ca65e199e9ff0bc986df3dee74e11eb1" );
|
||||
testHeterozosity( 0.01, "a818570e33316086ad5cb95992faca69" );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHeterozyosity2() {
|
||||
testHeterozosity( 1.0 / 1850, "ddcdfe4a5252da59278a6f1ba6f8a175" );
|
||||
testHeterozosity( 1.0 / 1850, "3bfdba9bcc9c84d326750ad4a879897f" );
|
||||
}
|
||||
|
||||
private void testHeterozosity(final double arg, final String md5) {
|
||||
|
|
@ -242,7 +242,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("c4b3876d76e3d0fb78a1d3ebd674f1a1"));
|
||||
Arrays.asList("1bd18653e8d9bf72c180142c2287eb5e"));
|
||||
|
||||
executeTest(String.format("test multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -261,7 +261,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -L 1:10,000,000-10,100,000" +
|
||||
" -baq CALCULATE_AS_NECESSARY",
|
||||
1,
|
||||
Arrays.asList("41445b1cd1a82af71126ff1692f7a5fe"));
|
||||
Arrays.asList("1defb30f6b870fbe5bcb27aed22ffb04"));
|
||||
|
||||
executeTest(String.format("test calling with BAQ"), spec);
|
||||
}
|
||||
|
|
@ -280,7 +280,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("c9e79470a4ce6eacde366e9fcf4d5b14"));
|
||||
Arrays.asList("2d52f5243da578c4070b03881852aae3"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX"), spec);
|
||||
}
|
||||
|
|
@ -295,7 +295,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -minIndelCnt 1" +
|
||||
" -L 1:10,000,000-10,100,000",
|
||||
1,
|
||||
Arrays.asList("70f8a17ba68131520db5c764ac5acdd2"));
|
||||
Arrays.asList("9271105e630ab39cf1c88b338da54594"));
|
||||
|
||||
executeTest(String.format("test indel caller in SLX with low min allele count"), spec);
|
||||
}
|
||||
|
|
@ -308,7 +308,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
" -o %s" +
|
||||
" -L 1:10,000,000-10,500,000",
|
||||
1,
|
||||
Arrays.asList("e4316d80fd833886820c8b4e122fbfc4"));
|
||||
Arrays.asList("80f0221c47c30a03cb902b1c54c1eb12"));
|
||||
|
||||
executeTest(String.format("test indel calling, multiple technologies"), spec);
|
||||
}
|
||||
|
|
@ -318,7 +318,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("c92aba3635f3331ddf8ae7a0382ca594"));
|
||||
Arrays.asList("ce40ff952025e806638156c3cd002927"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in", spec);
|
||||
}
|
||||
|
||||
|
|
@ -328,7 +328,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
baseCommandIndels + " --output_mode EMIT_ALL_SITES --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles "
|
||||
+ testDir + "indelAllelesForUG.vcf -I " + validationDataLocation +
|
||||
"pilot2_daughters.chr20.10k-11k.bam -o %s -L 20:10,000,000-10,100,000", 1,
|
||||
Arrays.asList("b87034f349887160ec1124e12863d543"));
|
||||
Arrays.asList("4738fa9e68a010e05c64ae965ed94de3"));
|
||||
executeTest("test MultiSample Pilot2 indels with alleles passed in and emitting all sites", spec);
|
||||
}
|
||||
|
||||
|
|
@ -336,13 +336,13 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMultiSampleIndels1() {
|
||||
WalkerTest.WalkerTestSpec spec1 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " -I " + validationDataLocation + "low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("51e6a7868d2ea2daefa411ed82f18be2"));
|
||||
Arrays.asList("0b0114364680a23e125f469208b96c6f"));
|
||||
List<File> result = executeTest("test MultiSample Pilot1 CEU indels", spec1).getFirst();
|
||||
|
||||
WalkerTest.WalkerTestSpec spec2 = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndels + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -alleles " + result.get(0).getAbsolutePath() + " -I " + validationDataLocation +
|
||||
"low_coverage_CEU.chr1.10k-11k.bam -o %s -L 1:10450700-10551000", 1,
|
||||
Arrays.asList("954c52be0c6ca9ed5a213a53f4efbc10"));
|
||||
Arrays.asList("adcf53b8dcfde7f2c657745751549bfe"));
|
||||
executeTest("test MultiSample Pilot1 CEU indels using GENOTYPE_GIVEN_ALLELES", spec2);
|
||||
}
|
||||
|
||||
|
|
@ -352,7 +352,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
baseCommandIndelsb37 + " --genotyping_mode GENOTYPE_GIVEN_ALLELES -out_mode EMIT_ALL_SITES -alleles " + testDir + vcf + " -I " + validationDataLocation +
|
||||
"NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam -o %s -L " + validationDataLocation + vcf, 1,
|
||||
Arrays.asList("ae44230ed54fd8ce63711cae908470cb"));
|
||||
Arrays.asList("3e3ac23846801c34acbf10a1a527264a"));
|
||||
executeTest("test GENOTYPE_GIVEN_ALLELES with no evidence in reads", spec);
|
||||
}
|
||||
|
||||
|
|
@ -385,7 +385,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction0() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.0", 1,
|
||||
Arrays.asList("471012c1d3dbec4633710264de5daa24"));
|
||||
Arrays.asList("eb9d8d1b19617190762f62bad139263a"));
|
||||
executeTest("test minIndelFraction 0.0", spec);
|
||||
}
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction25() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 0.25", 1,
|
||||
Arrays.asList("9165507fb202d515512a947a8a9db6bb"));
|
||||
Arrays.asList("952f46506d3ef9c11cafcc072a00750a"));
|
||||
executeTest("test minIndelFraction 0.25", spec);
|
||||
}
|
||||
|
||||
|
|
@ -401,7 +401,7 @@ public class UnifiedGenotyperIntegrationTest extends WalkerTest {
|
|||
public void testMinIndelFraction100() {
|
||||
WalkerTest.WalkerTestSpec spec = new WalkerTest.WalkerTestSpec(
|
||||
assessMinIndelFraction + " -minIndelFrac 1", 1,
|
||||
Arrays.asList("c1bbd4998b7c6dffee1682d3e5c929cc"));
|
||||
Arrays.asList("50a6774b7d8f71fe0e125c204d50ba84"));
|
||||
executeTest("test minIndelFraction 1.0", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -156,20 +156,32 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
}
|
||||
|
||||
private final static void addHeaderLine(final Set<VCFHeaderLine> metaData, final String id, final int count, final VCFHeaderLineType type) {
|
||||
metaData.add(new VCFInfoHeaderLine(id, count, type, "x"));
|
||||
if ( type != VCFHeaderLineType.Flag )
|
||||
metaData.add(new VCFFormatHeaderLine(id, count, type, "x"));
|
||||
}
|
||||
|
||||
private final static void addHeaderLine(final Set<VCFHeaderLine> metaData, final String id, final VCFHeaderLineCount count, final VCFHeaderLineType type) {
|
||||
metaData.add(new VCFInfoHeaderLine(id, count, type, "x"));
|
||||
if ( type != VCFHeaderLineType.Flag )
|
||||
metaData.add(new VCFFormatHeaderLine(id, count, type, "x"));
|
||||
}
|
||||
|
||||
private static void createSyntheticHeader() {
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
|
||||
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "x"));
|
||||
addHeaderLine(metaData, "STRING1", 1, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "STRING3", 3, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "STRING20", 20, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||
|
||||
metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
|
||||
metaData.add(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||
metaData.add(new VCFFormatHeaderLine("PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer, "Normalized, Phred-scaled likelihoods for genotypes as defined in the VCF specification"));
|
||||
metaData.add(new VCFFormatHeaderLine("GS", 2, VCFHeaderLineType.String, "A doubleton list of strings"));
|
||||
metaData.add(new VCFFormatHeaderLine("GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "A variable list of strings"));
|
||||
metaData.add(new VCFFormatHeaderLine("FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "Genotype filters"));
|
||||
addHeaderLine(metaData, "GT", 1, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "GQ", 1, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "PL", VCFHeaderLineCount.G, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "GS", 2, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "GV", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||
addHeaderLine(metaData, "FT", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String);
|
||||
|
||||
// prep the header
|
||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||
|
|
@ -177,13 +189,13 @@ public class VariantContextTestProvider {
|
|||
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
||||
|
||||
metaData.add(new VCFInfoHeaderLine("INT1", 1, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLAG", 0, VCFHeaderLineType.Flag, "x"));
|
||||
addHeaderLine(metaData, "INT1", 1, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "INT3", 3, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "INT20", 20, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer);
|
||||
addHeaderLine(metaData, "FLOAT1", 1, VCFHeaderLineType.Float);
|
||||
addHeaderLine(metaData, "FLOAT3", 3, VCFHeaderLineType.Float);
|
||||
addHeaderLine(metaData, "FLAG", 0, VCFHeaderLineType.Flag);
|
||||
|
||||
syntheticHeader = new VCFHeader(metaData);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue