Initial version of clean, fast formatting routines built dynamically from a VCF header
-- BCFFieldEncoder and writers divide up the task of formatting values (atomic or vector, ints, strings, floats, etc) from the task of writing these out at the sites or genotypes level. -- Allows us to create efficient encoders for specific combinations of header fields, such as int[] encoded values with exactly 3 values -- Currently only used for INFO fields, but subsequent commit will include optimized genotype field encoder -- Allowed us to naturally support encoding of lists of strings -- Bugfixes in VariantContextUtils introduced in genotype -> genotypebuilder conversion -- Fixes for integration test failures -- Enabling contig updates -- WalkerTest now prints out relative paths where possible to make cut/paste/run easier
This commit is contained in:
parent
51a3b6e25e
commit
2a86b81a3f
|
|
@ -51,7 +51,7 @@ import java.util.List;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class VariantContextWriterStub implements Stub<VariantContextWriter>, VariantContextWriter {
|
||||
public final static boolean UPDATE_CONTIG_HEADERS = false;
|
||||
public final static boolean UPDATE_CONTIG_HEADERS = true;
|
||||
|
||||
/**
|
||||
* The engine, central to the GATK's processing.
|
||||
|
|
|
|||
|
|
@ -326,7 +326,7 @@ public class BeagleOutputToVCFWalker extends RodWalker<Integer, Integer> {
|
|||
else {
|
||||
originalAttributes.put("OG",".");
|
||||
}
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g.getSampleName(), alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
Genotype imputedGenotype = new GenotypeBuilder(g).alleles(alleles).log10PError(genotypeQuality).attributes(originalAttributes).phased(genotypeIsPhased).make();
|
||||
if ( imputedGenotype.isHet() || imputedGenotype.isHomVar() ) {
|
||||
beagleVarCounts++;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -730,7 +730,7 @@ public class SelectVariants extends RodWalker<Integer, Integer> implements TreeR
|
|||
//Set genotype to no call if it falls in the fraction.
|
||||
if(fractionGenotypes>0 && randomGenotypes.nextDouble()<fractionGenotypes){
|
||||
List<Allele> alleles = Arrays.asList(Allele.NO_CALL, Allele.NO_CALL);
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).GQ(-1).make());
|
||||
genotypes.add(new GenotypeBuilder(genotype).alleles(alleles).noGQ().make());
|
||||
}
|
||||
else{
|
||||
genotypes.add(genotype);
|
||||
|
|
|
|||
|
|
@ -346,6 +346,16 @@ public final class GenotypeBuilder {
|
|||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells this builder to remove all extended attributes
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public GenotypeBuilder noAttributes() {
|
||||
this.extendedAttributes = null;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* This genotype has this attribute key / value pair.
|
||||
*
|
||||
|
|
|
|||
|
|
@ -462,9 +462,10 @@ public class VariantContextUtils {
|
|||
// Genotypes
|
||||
final GenotypesContext genotypes = GenotypesContext.create(vc.getNSamples());
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
// TODO -- fixme
|
||||
//Map<String, Object> genotypeAttributes = subsetAttributes(g.commonInfo, keysToPreserve);
|
||||
//genotypes.add(new GenotypeBuilder(g).attributes(genotypeAttributes).make());
|
||||
final GenotypeBuilder gb = new GenotypeBuilder(g);
|
||||
// remove AD, DP, PL, and all extended attributes, keeping just GT and GQ
|
||||
gb.noAD().noDP().noPL().noAttributes();
|
||||
genotypes.add(gb.make());
|
||||
}
|
||||
|
||||
return builder.genotypes(genotypes).attributes(attributes);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public abstract class BCF2FieldEncoder {
|
||||
final VCFCompoundHeaderLine headerLine;
|
||||
final BCF2Type fixedType;
|
||||
final int dictionaryOffset;
|
||||
final BCF2Type dictionaryOffsetType;
|
||||
|
||||
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
||||
this.headerLine = headerLine;
|
||||
this.fixedType = fixedType;
|
||||
|
||||
final Integer offset = dict.get(getField());
|
||||
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF");
|
||||
this.dictionaryOffset = offset;
|
||||
dictionaryOffsetType = BCF2Utils.determineIntegerType(offset);
|
||||
}
|
||||
|
||||
public VCFHeaderLineCount getCountType() {
|
||||
return headerLine.getCountType();
|
||||
}
|
||||
|
||||
public VCFCompoundHeaderLine getHeaderLine() {
|
||||
return headerLine;
|
||||
}
|
||||
|
||||
public boolean hasFixedCount() { return getCountType() == VCFHeaderLineCount.INTEGER; }
|
||||
public boolean hasUnboundedCount() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; }
|
||||
public boolean hasContextDeterminedCount() { return ! hasFixedCount() && ! hasUnboundedCount(); }
|
||||
|
||||
@Requires("hasFixedCount()")
|
||||
public int getFixedCount() { return headerLine.getCount(); }
|
||||
public int getContextDeterminedCount(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
||||
if ( hasFixedCount() )
|
||||
return getFixedCount();
|
||||
else if ( hasUnboundedCount() )
|
||||
return value instanceof List ? ((List) value).size() : 1;
|
||||
else
|
||||
return getContextDeterminedCount(vc);
|
||||
}
|
||||
|
||||
public String getField() { return headerLine.getID(); }
|
||||
|
||||
public int getDictionaryOffset() { return dictionaryOffset; }
|
||||
public BCF2Type getDictionaryOffsetType() { return dictionaryOffsetType; }
|
||||
|
||||
public boolean isFixedTyped() { return ! isDynamicallyTyped(); }
|
||||
public boolean isDynamicallyTyped() { return fixedType == null; }
|
||||
public BCF2Type getType(final Object value) { return isDynamicallyTyped() ? getDynamicType(value) : getFixedType(); }
|
||||
public BCF2Type getFixedType() {
|
||||
if ( fixedType != null )
|
||||
return fixedType;
|
||||
else
|
||||
throw new ReviewedStingException("Not a fixed type encoder: " + getField());
|
||||
}
|
||||
public BCF2Type getDynamicType(final Object value) { throw new ReviewedStingException("Function getDynamicType() not implemented"); }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldEncoder for " + getField() + " with count " + getCountType() + " encoded with " + getClass().getSimpleName();
|
||||
}
|
||||
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Helper function that takes an object and returns a list representation
|
||||
* of it:
|
||||
*
|
||||
* o == null => []
|
||||
* o is a list => o
|
||||
* else => [o]
|
||||
*
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
private final static <T> List<T> toList(final Class<T> c, final Object o) {
|
||||
if ( o == null ) return Collections.emptyList();
|
||||
else if ( o instanceof List ) return (List<T>)o;
|
||||
else return Collections.singletonList((T)o);
|
||||
}
|
||||
|
||||
public static class StringOrCharacter extends BCF2FieldEncoder {
|
||||
public StringOrCharacter(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.CHAR);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
if ( value != null ) {
|
||||
final String s = encodeString(value);
|
||||
encoder.encodeString(s, s.length());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBCFFieldCount(final VariantContext vc, final Object value) {
|
||||
return value == null ? 0 : encodeString(value).length();
|
||||
}
|
||||
|
||||
private String encodeString(final Object value) {
|
||||
return value instanceof List ? BCF2Utils.collapseStringList((List<String>)value) : (String)value;
|
||||
}
|
||||
}
|
||||
|
||||
public static class Flag extends BCF2FieldEncoder {
|
||||
public Flag(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.INT8);
|
||||
if ( getHeaderLine().getCount() != 0 )
|
||||
throw new ReviewedStingException("Flag encoder only suppports atomic flags!");
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getFixedCount() {
|
||||
return 1; // the header says 0 but we will write 1 value
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encoder.encodePrimitive(1, getFixedType());
|
||||
}
|
||||
}
|
||||
|
||||
public static class Float extends BCF2FieldEncoder {
|
||||
public Float(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, BCF2Type.FLOAT);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
final List<Double> doubles = toList(Double.class, value);
|
||||
for ( final double d : doubles )
|
||||
encoder.encodeRawFloat(d);
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntArray extends BCF2FieldEncoder {
|
||||
public IntArray(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((int[])value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
for ( final int i : (int[])value )
|
||||
encoder.encodeRawInt(i, type);
|
||||
}
|
||||
}
|
||||
|
||||
public static class IntList extends BCF2FieldEncoder {
|
||||
public IntList(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType(toList(Integer.class, value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
for ( final int i : toList(Integer.class, value) )
|
||||
encoder.encodeRawInt(i, type);
|
||||
}
|
||||
}
|
||||
|
||||
public static class AtomicInt extends BCF2FieldEncoder {
|
||||
public AtomicInt(final VCFCompoundHeaderLine headerLine, final BCF2Encoder encoder, final Map<String, Integer> dict ) {
|
||||
super(headerLine, encoder, dict, null);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BCF2Type getDynamicType(final Object value) {
|
||||
return value == null ? BCF2Type.INT8 : BCF2Utils.determineIntegerType((Integer)value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encoder.encodeRawInt((Integer)value, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,110 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public abstract class BCF2FieldWriter {
|
||||
private final BCF2FieldEncoder fieldEncoder;
|
||||
|
||||
protected BCF2FieldWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
this.fieldEncoder = fieldEncoder;
|
||||
}
|
||||
|
||||
protected BCF2FieldEncoder getFieldEncoder() {
|
||||
return fieldEncoder;
|
||||
}
|
||||
|
||||
public void start(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
encoder.encodeTyped(fieldEncoder.getDictionaryOffset(), fieldEncoder.getDictionaryOffsetType());
|
||||
}
|
||||
|
||||
public void done(final BCF2Encoder encoder, final VariantContext vc) throws IOException { }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BCF2FieldWriter " + getClass().getSimpleName() + " with encoder " + getFieldEncoder();
|
||||
}
|
||||
|
||||
public static abstract class SiteWriter extends BCF2FieldWriter {
|
||||
protected SiteWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
super(fieldEncoder);
|
||||
}
|
||||
|
||||
public abstract void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException;
|
||||
}
|
||||
|
||||
public static class GenericSiteWriter extends SiteWriter {
|
||||
public GenericSiteWriter(final BCF2FieldEncoder fieldEncoder) {
|
||||
super(fieldEncoder);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void site(final BCF2Encoder encoder, final VariantContext vc) throws IOException {
|
||||
final Object rawValue = vc.getAttribute(getFieldEncoder().getField(), null);
|
||||
final BCF2Type type = getFieldEncoder().getType(rawValue);
|
||||
if ( rawValue == null ) {
|
||||
// the value is missing, just write in null
|
||||
encoder.encodeType(0, type);
|
||||
} else {
|
||||
final int valueCount = getFieldEncoder().getBCFFieldCount(vc, rawValue);
|
||||
encoder.encodeType(valueCount, type);
|
||||
getFieldEncoder().encodeValue(encoder, rawValue, type);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,113 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineCount;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
*/
|
||||
public class BCF2FieldWriterManager {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
|
||||
final Map<String, BCF2FieldWriter.SiteWriter> siteWriters = new HashMap<String, BCF2FieldWriter.SiteWriter>();
|
||||
|
||||
public BCF2FieldWriterManager() { }
|
||||
|
||||
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> dictionary) {
|
||||
for (final VCFHeaderLine line : header.getMetaData()) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
final String field = ((VCFInfoHeaderLine) line).getID();
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter((VCFInfoHeaderLine)line, encoder, dictionary);
|
||||
logger.info("Installing for field " + field + " field writer " + writer);
|
||||
siteWriters.put(field, writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private BCF2FieldWriter.SiteWriter createInfoWriter(final VCFInfoHeaderLine line, final BCF2Encoder encoder, final Map<String, Integer> dict) {
|
||||
BCF2FieldEncoder fieldEncoder = null;
|
||||
switch ( line.getType() ) {
|
||||
case Character:
|
||||
case String:
|
||||
fieldEncoder = new BCF2FieldEncoder.StringOrCharacter(line, encoder, dict);
|
||||
break;
|
||||
case Flag:
|
||||
fieldEncoder = new BCF2FieldEncoder.Flag(line, encoder, dict);
|
||||
break;
|
||||
case Float:
|
||||
fieldEncoder = new BCF2FieldEncoder.Float(line, encoder, dict);
|
||||
break;
|
||||
case Integer:
|
||||
if ( line.getCountType() == VCFHeaderLineCount.INTEGER && line.getCount() == 1 )
|
||||
fieldEncoder = new BCF2FieldEncoder.AtomicInt(line, encoder, dict);
|
||||
else
|
||||
fieldEncoder = new BCF2FieldEncoder.IntList(line, encoder, dict);
|
||||
break;
|
||||
default:
|
||||
throw new ReviewedStingException("Unexpected type for field " + line.getID());
|
||||
}
|
||||
|
||||
return new BCF2FieldWriter.GenericSiteWriter(fieldEncoder);
|
||||
}
|
||||
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) {
|
||||
final BCF2FieldWriter.SiteWriter writer = siteWriters.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
return writer;
|
||||
}
|
||||
}
|
||||
|
|
@ -51,6 +51,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
|
||||
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
|
||||
IntGenotypeFieldAccessors intGenotypeFieldAccessors = new IntGenotypeFieldAccessors();
|
||||
final BCF2FieldWriterManager fieldManager = new BCF2FieldWriterManager();
|
||||
|
||||
public BCF2Writer(final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing, final boolean doNotWriteGenotypes) {
|
||||
super(writerName(location, output), location, output, refDict, enableOnTheFlyIndexing);
|
||||
|
|
@ -80,6 +81,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
stringDictionaryMap.put(dict.get(i), i);
|
||||
}
|
||||
|
||||
// setup the field encodings
|
||||
fieldManager.setup(header, encoder, stringDictionaryMap);
|
||||
|
||||
try {
|
||||
// write out the header into a byte stream, get it's length, and write everything to the file
|
||||
final ByteArrayOutputStream capture = new ByteArrayOutputStream();
|
||||
|
|
@ -225,10 +229,15 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private void buildInfo( VariantContext vc ) throws IOException {
|
||||
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
|
||||
final String key = infoFieldEntry.getKey();
|
||||
final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
|
||||
final BCF2FieldWriter.SiteWriter writer = fieldManager.getSiteFieldWriter(key);
|
||||
writer.start(encoder, vc);
|
||||
writer.site(encoder, vc);
|
||||
writer.done(encoder, vc);
|
||||
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
|
||||
// the old way of doing things
|
||||
// final VCFToBCFEncoding encoding = prepFieldValueForEncoding(key, infoFieldEntry.getValue());
|
||||
// encodeStringByRef(key);
|
||||
// encoder.encodeTyped(encoding.valuesToEncode, encoding.BCF2Type);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -278,9 +287,9 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
BCF2Type intType;
|
||||
if ( isList ) {
|
||||
l = (List<Integer>)value;
|
||||
intType = encoder.determineIntegerType(l);
|
||||
intType = BCF2Utils.determineIntegerType(l);
|
||||
} else if ( value != null ) {
|
||||
intType = encoder.determineIntegerType((Integer)value);
|
||||
intType = BCF2Utils.determineIntegerType((Integer) value);
|
||||
l = Collections.singletonList((Integer)value);
|
||||
} else {
|
||||
intType = BCF2Type.INT8;
|
||||
|
|
@ -417,7 +426,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
// determine the best size
|
||||
final BCF2Type type = encoder.determineIntegerType(allPLs);
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(allPLs);
|
||||
startGenotypeField(field, numPLs, type);
|
||||
for ( int pl : allPLs )
|
||||
encoder.encodePrimitive(pl == -1 ? type.getMissingBytes() : pl, type);
|
||||
|
|
@ -495,7 +504,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
private final BCF2Type encodeStringByRef(final String string) throws IOException {
|
||||
final Integer offset = stringDictionaryMap.get(string);
|
||||
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + string + " in header as required by BCF");
|
||||
final BCF2Type type = encoder.determineIntegerType(offset);
|
||||
final BCF2Type type = BCF2Utils.determineIntegerType(offset);
|
||||
encoder.encodeTyped(offset, type);
|
||||
return type;
|
||||
}
|
||||
|
|
@ -516,7 +525,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
offsets.add(offset);
|
||||
|
||||
if ( maxType != BCF2Type.INT32) { // don't bother looking if we already are at 32 bit ints
|
||||
final BCF2Type type1 = encoder.determineIntegerType(offset);
|
||||
final BCF2Type type1 = BCF2Utils.determineIntegerType(offset);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: if ( maxType == BCF2Type.INT8 ) maxType = BCF2Type.INT16; break;
|
||||
|
|
|
|||
|
|
@ -87,8 +87,10 @@ public abstract class BaseTest {
|
|||
private static final String networkTempDir;
|
||||
private static final File networkTempDirFile;
|
||||
|
||||
public static final File testDirFile = new File("public/testdata/");
|
||||
protected static final String testDirRelative = "public/testdata/";
|
||||
public static final File testDirFile = new File(testDirRelative);
|
||||
public static final String testDir = testDirFile.getAbsolutePath() + "/";
|
||||
protected static final String testDirRoot = testDirFile.getPath().replace(testDirRelative, "");
|
||||
|
||||
public static final String keysDataLocation = validationDataLocation + "keys/";
|
||||
public static final String gatkKeyFile = CryptUtils.GATK_USER_KEY_DIRECTORY + "gsamembers_broadinstitute.org.key";
|
||||
|
|
|
|||
|
|
@ -354,7 +354,9 @@ public class WalkerTest extends BaseTest {
|
|||
final String now = new SimpleDateFormat("HH:mm:ss").format(new Date());
|
||||
final String cmdline = Utils.join(" ",command);
|
||||
System.out.println(String.format("[%s] Executing test %s with GATK arguments: %s", now, name, cmdline));
|
||||
BaseTest.log(cmdline); // also write the command line to the HTML log for convenient follow-up
|
||||
// also write the command line to the HTML log for convenient follow-up
|
||||
// do the replaceAll so paths become relative to the current
|
||||
BaseTest.log(cmdline.replaceAll(testDirRoot, ""));
|
||||
CommandLineExecutable.start(instance, command);
|
||||
} catch (Exception e) {
|
||||
gotAnException = true;
|
||||
|
|
|
|||
|
|
@ -55,6 +55,8 @@ public class VariantContextTestProvider {
|
|||
final private static boolean ENABLE_PLOIDY_TESTS = true;
|
||||
final private static boolean ENABLE_PL_TESTS = true;
|
||||
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
|
||||
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = false;
|
||||
|
||||
private static VCFHeader syntheticHeader;
|
||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||
private static VariantContext ROOT;
|
||||
|
|
@ -160,6 +162,7 @@ public class VariantContextTestProvider {
|
|||
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("VAR.INFO.STRING", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.String, "x"));
|
||||
|
||||
metaData.add(new VCFFormatHeaderLine("GT", 1, VCFHeaderLineType.String, "Genotype"));
|
||||
metaData.add(new VCFFormatHeaderLine("GQ", 1, VCFHeaderLineType.Integer, "Genotype Quality"));
|
||||
|
|
@ -180,7 +183,7 @@ public class VariantContextTestProvider {
|
|||
metaData.add(new VCFInfoHeaderLine("INT.VAR", VCFHeaderLineCount.UNBOUNDED, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLAG", 1, VCFHeaderLineType.Flag, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLAG", 0, VCFHeaderLineType.Flag, "x"));
|
||||
|
||||
syntheticHeader = new VCFHeader(metaData);
|
||||
}
|
||||
|
|
@ -246,6 +249,11 @@ public class VariantContextTestProvider {
|
|||
add(builder().attribute("STRING3", null));
|
||||
add(builder().attribute("STRING20", Arrays.asList("s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));
|
||||
|
||||
add(builder().attribute("VAR.INFO.STRING", "s1"));
|
||||
add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2")));
|
||||
add(builder().attribute("VAR.INFO.STRING", Arrays.asList("s1", "s2", "s3")));
|
||||
add(builder().attribute("VAR.INFO.STRING", null));
|
||||
|
||||
addGenotypesToTestData();
|
||||
|
||||
addComplexGenotypesTest();
|
||||
|
|
@ -390,51 +398,53 @@ public class VariantContextTestProvider {
|
|||
attr("g1", ref, "FLOAT3", 1.0, 2.0, 3.0),
|
||||
attr("g2", ref, "FLOAT3"));
|
||||
|
||||
//
|
||||
//
|
||||
// TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
|
||||
//
|
||||
//
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
|
||||
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
|
||||
if (ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS) {
|
||||
//
|
||||
//
|
||||
// TESTING MULTIPLE SIZED LISTS IN THE GENOTYPE FIELD
|
||||
//
|
||||
//
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GS", Arrays.asList("S1", "S2")),
|
||||
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
|
||||
|
||||
addGenotypeTests(site, // g1 is missing the string, and g2 is missing FLOAT1
|
||||
attr("g1", ref, "FLOAT1", 1.0),
|
||||
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
|
||||
addGenotypeTests(site, // g1 is missing the string, and g2 is missing FLOAT1
|
||||
attr("g1", ref, "FLOAT1", 1.0),
|
||||
attr("g2", ref, "GS", Arrays.asList("S3", "S4")));
|
||||
|
||||
// variable sized lists
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GV", Arrays.asList("S1")),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
|
||||
// variable sized lists
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GV", Arrays.asList("S1")),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
|
||||
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GV", Arrays.asList("S1", "S2")),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
||||
|
||||
addGenotypeTests(site, // missing value in varlist of string
|
||||
attr("g1", ref, "FLOAT1", 1.0),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
||||
addGenotypeTests(site, // missing value in varlist of string
|
||||
attr("g1", ref, "FLOAT1", 1.0),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4", "S5")));
|
||||
|
||||
|
||||
//
|
||||
//
|
||||
// TESTING GENOTYPE FILTERS
|
||||
//
|
||||
//
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
//
|
||||
//
|
||||
// TESTING GENOTYPE FILTERS
|
||||
//
|
||||
//
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
}
|
||||
|
||||
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue