Code cleanup and more documentation for BCFFieldWriters
-- Update integration tests where appropriate
This commit is contained in:
parent
dc07067265
commit
bd9d40fb84
|
|
@ -22,9 +22,11 @@
|
|||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
|
|
@ -33,10 +35,10 @@ import java.io.OutputStream;
|
|||
import java.util.*;
|
||||
|
||||
/**
|
||||
* BCF2 encoder
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author depristo
|
||||
* @since 5/12
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public final class BCF2Encoder {
|
||||
// TODO -- increase default size?
|
||||
|
|
@ -62,7 +64,7 @@ public final class BCF2Encoder {
|
|||
/**
|
||||
* Method for writing raw bytes to the encoder stream
|
||||
*
|
||||
* The purpuse this method exists is to allow lazy decoding of genotype data. In that
|
||||
* The purpose this method exists is to allow lazy decoding of genotype data. In that
|
||||
* situation the reader has loaded a block of bytes, and never decoded it, so we
|
||||
* are just writing it back out immediately as a raw stream of blocks. Any
|
||||
* bad low-level formatting or changes to that byte[] will result in a malformed
|
||||
|
|
@ -93,7 +95,7 @@ public final class BCF2Encoder {
|
|||
|
||||
public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
|
||||
if ( type == BCF2Type.CHAR && v.size() != 0 ) {
|
||||
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>)v) : (String)v.get(0);
|
||||
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>) v) : (String)v.get(0);
|
||||
v = stringToBytes(s);
|
||||
}
|
||||
|
||||
|
|
@ -200,7 +202,7 @@ public final class BCF2Encoder {
|
|||
* @param o
|
||||
* @return
|
||||
*/
|
||||
protected final BCF2Type encode(final Object o) throws IOException {
|
||||
public final BCF2Type encode(final Object o) throws IOException {
|
||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||
|
||||
if ( o instanceof List ) {
|
||||
|
|
@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Invariant;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine;
|
||||
|
|
@ -41,10 +40,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
@Invariant({
|
||||
"headerLine != null",
|
||||
|
|
@ -52,9 +51,26 @@ import java.util.Map;
|
|||
"dictionaryOffset >= 0"
|
||||
})
|
||||
public abstract class BCF2FieldEncoder {
|
||||
/**
|
||||
* The header line describing the field we will encode values of
|
||||
*/
|
||||
final VCFCompoundHeaderLine headerLine;
|
||||
final BCF2Type fixedType;
|
||||
|
||||
/**
|
||||
* The BCF2 type we'll use to encoder this field, if it can be determined statically.
|
||||
* If not, this variable must be null
|
||||
*/
|
||||
final BCF2Type staticType;
|
||||
|
||||
/**
|
||||
* The integer offset into the strings map of the BCF2 file corresponding to this
|
||||
* field.
|
||||
*/
|
||||
final int dictionaryOffset;
|
||||
|
||||
/**
|
||||
* The integer type we use to encode our dictionary offset in the BCF2 file
|
||||
*/
|
||||
final BCF2Type dictionaryOffsetType;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
|
@ -63,9 +79,10 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type fixedType) {
|
||||
@Requires({"headerLine != null", "dict != null"})
|
||||
private BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type staticType) {
|
||||
this.headerLine = headerLine;
|
||||
this.fixedType = fixedType;
|
||||
this.staticType = staticType;
|
||||
|
||||
final Integer offset = dict.get(getField());
|
||||
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF");
|
||||
|
|
@ -79,6 +96,7 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Ensures("result != null")
|
||||
public final String getField() { return headerLine.getID(); }
|
||||
|
||||
/**
|
||||
|
|
@ -87,6 +105,7 @@ public abstract class BCF2FieldEncoder {
|
|||
* @param encoder where we write our dictionary offset
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires("encoder != null")
|
||||
public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
|
||||
encoder.encodeTyped(dictionaryOffset, dictionaryOffsetType);
|
||||
}
|
||||
|
|
@ -102,44 +121,81 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Ensures("result != null")
|
||||
protected final VCFHeaderLineCount getCountType() {
|
||||
return headerLine.getCountType();
|
||||
}
|
||||
|
||||
/**
|
||||
* True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasConstantNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.INTEGER;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if the only way to determine how many elements this field contains is by
|
||||
* inspecting the actual value directly, such as when the number of elements
|
||||
* is a variable length list per site or per genotype.
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())")
|
||||
public boolean hasValueDeterminedNumElements() {
|
||||
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
|
||||
}
|
||||
|
||||
/**
|
||||
* True if this field has a non-fixed number of elements that depends only on the properties
|
||||
* of the current VariantContext, such as one value per Allele or per genotype configuration.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())")
|
||||
public boolean hasContextDeterminedNumElements() {
|
||||
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements, assuming this field has a constant number of elements.
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasConstantNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements() {
|
||||
return headerLine.getCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements by looking at the actual value provided
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasValueDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final Object value) {
|
||||
return numElementsFromValue(value);
|
||||
//return value instanceof List ? ((List) value).size() : 1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of elements, assuming this field has context-determined number of elements.
|
||||
* @return
|
||||
*/
|
||||
@Requires("hasContextDeterminedNumElements()")
|
||||
@Ensures("result >= 0")
|
||||
public int numElements(final VariantContext vc) {
|
||||
return headerLine.getCount(vc.getNAlleles() - 1);
|
||||
}
|
||||
|
||||
/**
|
||||
* A convenience access for the number of elements, returning
|
||||
* the number of encoded elements, either from the fixed number
|
||||
* it has, from the VC, or from the value itself.
|
||||
* @param vc
|
||||
* @param value
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result >= 0")
|
||||
public final int numElements(final VariantContext vc, final Object value) {
|
||||
if ( hasConstantNumElements() ) return numElements();
|
||||
|
|
@ -169,12 +225,28 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Is the BCF2 type of this field static, or does it have to be determine from
|
||||
* the actual field value itself?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result || isDynamicallyTyped()")
|
||||
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
|
||||
|
||||
/**
|
||||
* Is the BCF2 type of this field static, or does it have to be determine from
|
||||
* the actual field value itself?
|
||||
* @return
|
||||
*/
|
||||
@Ensures("result || isStaticallyTyped()")
|
||||
public final boolean isDynamicallyTyped() { return fixedType == null; }
|
||||
public final boolean isDynamicallyTyped() { return staticType == null; }
|
||||
|
||||
/**
|
||||
* Get the BCF2 type for this field, either from the static type of the
|
||||
* field itself or by inspecting the value itself.
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public final BCF2Type getType(final Object value) {
|
||||
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
|
||||
}
|
||||
|
|
@ -182,7 +254,7 @@ public abstract class BCF2FieldEncoder {
|
|||
@Requires("isStaticallyTyped()")
|
||||
@Ensures("result != null")
|
||||
public final BCF2Type getStaticType() {
|
||||
return fixedType;
|
||||
return staticType;
|
||||
}
|
||||
|
||||
@Requires("isDynamicallyTyped()")
|
||||
|
|
@ -197,11 +269,41 @@ public abstract class BCF2FieldEncoder {
|
|||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Convenience method that just called encodeValue with a no minimum for the number of values.
|
||||
*
|
||||
* Primarily useful for encoding site values
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
|
||||
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
|
||||
encodeValue(encoder, value, type, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Key abstract method that should encode a value of the given type into the encoder.
|
||||
*
|
||||
* Value will be of a type appropriate to the underlying encoder. If the genotype field is represented as
|
||||
* an int[], this will be value, and the encoder needs to handle encoding all of the values in the int[].
|
||||
*
|
||||
* The argument should be used, not the getType() method in the superclass as an outer loop might have
|
||||
* decided a more general type (int16) to use, even through this encoder could have been done with int8.
|
||||
*
|
||||
* If minValues > 0, then encodeValue must write in at least minValues items from value. If value is atomic,
|
||||
* this means that minValues - 1 MISSING values should be added to the encoder. If minValues is a collection
|
||||
* type (int[]) then minValues - values.length should be added. This argument is intended to handle padding
|
||||
* of values in genotype fields.
|
||||
*
|
||||
* @param encoder
|
||||
* @param value
|
||||
* @param type
|
||||
* @param minValues
|
||||
* @throws IOException
|
||||
*/
|
||||
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"})
|
||||
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
|
||||
|
||||
|
|
@ -243,11 +345,15 @@ public abstract class BCF2FieldEncoder {
|
|||
*/
|
||||
@Ensures("result != null")
|
||||
private String javaStringToBCF2String(final Object value) {
|
||||
return value == null
|
||||
? ""
|
||||
: (value instanceof List
|
||||
? BCF2Utils.collapseStringList((List<String>)value)
|
||||
: (String)value);
|
||||
if ( value == null )
|
||||
return "";
|
||||
else if (value instanceof List) {
|
||||
if ( ((List) value).size() == 1 )
|
||||
return (String)((List) value).get(0);
|
||||
else
|
||||
return BCF2Utils.collapseStringList((List<String>)value);
|
||||
} else
|
||||
return (String)value;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,6 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Requires;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
|
|
@ -41,10 +40,10 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
*
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 6/12
|
||||
* @since 06/12
|
||||
*/
|
||||
public abstract class BCF2FieldWriter {
|
||||
private final VCFHeader header;
|
||||
|
|
|
|||
|
|
@ -24,8 +24,9 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
import com.google.java.contract.Ensures;
|
||||
import com.google.java.contract.Requires;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
|
|
@ -33,31 +34,10 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* [Short one sentence description of this walker]
|
||||
* <p/>
|
||||
* <p>
|
||||
* [Functionality of this walker]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Input</h2>
|
||||
* <p>
|
||||
* [Input description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Output</h2>
|
||||
* <p>
|
||||
* [Output description]
|
||||
* </p>
|
||||
* <p/>
|
||||
* <h2>Examples</h2>
|
||||
* <pre>
|
||||
* java
|
||||
* -jar GenomeAnalysisTK.jar
|
||||
* -T $WalkerName
|
||||
* </pre>
|
||||
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
|
||||
*
|
||||
* @author Your Name
|
||||
* @since Date created
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
public class BCF2FieldWriterManager {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
|
||||
|
|
@ -67,23 +47,35 @@ public class BCF2FieldWriterManager {
|
|||
|
||||
public BCF2FieldWriterManager() { }
|
||||
|
||||
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> dictionary) {
|
||||
/**
|
||||
* Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header
|
||||
*
|
||||
* Must be called before any of the getter methods will work
|
||||
*
|
||||
* @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF
|
||||
* @param encoder the encoder we are going to use to write out the BCF2 data
|
||||
* @param stringDictionary a map from VCFHeader strings to their offsets for encoding
|
||||
*/
|
||||
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) {
|
||||
for (final VCFHeaderLine line : header.getMetaData()) {
|
||||
if ( line instanceof VCFInfoHeaderLine ) {
|
||||
final String field = ((VCFInfoHeaderLine) line).getID();
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, dictionary);
|
||||
log(field, writer);
|
||||
siteWriters.put(field, writer);
|
||||
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, stringDictionary);
|
||||
add(siteWriters, field, writer);
|
||||
} else if ( line instanceof VCFFormatHeaderLine ) {
|
||||
final String field = ((VCFFormatHeaderLine) line).getID();
|
||||
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, dictionary);
|
||||
log(field, writer);
|
||||
genotypesWriters.put(field, writer);
|
||||
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, stringDictionary);
|
||||
add(genotypesWriters, field, writer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final void log(final String field, final BCF2FieldWriter writer) {
|
||||
@Requires({"field != null", "writer != null"})
|
||||
@Ensures("map.containsKey(field)")
|
||||
private final <T> void add(final Map<String, T> map, final String field, final T writer) {
|
||||
if ( map.containsKey(field) )
|
||||
throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
|
||||
map.put(field, writer);
|
||||
logger.info(writer);
|
||||
}
|
||||
|
||||
|
|
@ -160,14 +152,26 @@ public class BCF2FieldWriterManager {
|
|||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) {
|
||||
return getWriter(key, siteWriters);
|
||||
/**
|
||||
* Get a site writer specialized to encode values for site info field
|
||||
* @param field key found in the VCF header INFO records
|
||||
* @return
|
||||
*/
|
||||
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
|
||||
return getWriter(field, siteWriters);
|
||||
}
|
||||
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String key) {
|
||||
return getWriter(key, genotypesWriters);
|
||||
/**
|
||||
* Get a genotypes writer specialized to encode values for genotypes field
|
||||
* @param field key found in the VCF header FORMAT records
|
||||
* @return
|
||||
*/
|
||||
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
|
||||
return getWriter(field, genotypesWriters);
|
||||
}
|
||||
|
||||
@Requires({"map != null", "key != null"})
|
||||
@Ensures("result != null")
|
||||
public <T> T getWriter(final String key, final Map<String, T> map) {
|
||||
final T writer = map.get(key);
|
||||
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);
|
||||
|
|
|
|||
|
|
@ -29,7 +29,6 @@ import com.google.java.contract.Requires;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
|
|
@ -40,6 +39,49 @@ import org.broadinstitute.sting.utils.variantcontext.*;
|
|||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* VariantContextWriter that emits BCF2 binary encoding
|
||||
*
|
||||
* Overall structure of this writer is complex for efficiency reasons
|
||||
*
|
||||
* -- The BCF2Writer manages the low-level BCF2 encoder, the mappings
|
||||
* from contigs and strings to offsets, the VCF header, and holds the
|
||||
* lower-level encoders that map from VC and Genotype fields to their
|
||||
* specific encoders. This class also writes out the standard BCF2 fields
|
||||
* like POS, contig, the size of info and genotype data, QUAL, etc. It
|
||||
* has loops over the INFO and GENOTYPES to encode each individual datum
|
||||
* with the generic field encoders, but the actual encoding work is
|
||||
* done with by the FieldWriters classes themselves
|
||||
*
|
||||
* -- BCF2FieldWriter are specialized classes for writing out SITE and
|
||||
* genotype information for specific SITE/GENOTYPE fields (like AC for
|
||||
* sites and GQ for genotypes). These are objects in themselves because
|
||||
* the manage all of the complexity of relating the types in the VCF header
|
||||
* with the proper encoding in BCF as well as the type representing this
|
||||
* in java. Relating all three of these pieces of information together
|
||||
* is the main complexity challenge in the encoder. The piece of code
|
||||
* that determines which FieldWriters to associate with each SITE and
|
||||
* GENOTYPE field is the BCF2FieldWriterManager. These FieldWriters
|
||||
* are specialized for specific combinations of encoders (see below)
|
||||
* and contexts (genotypes) for efficiency, so they smartly manage
|
||||
* the writing of PLs (encoded as int[]) directly into the lowest
|
||||
* level BCFEncoder.
|
||||
*
|
||||
* -- At the third level is the BCF2FieldEncoder, relatively simple
|
||||
* pieces of code that handle the task of determining the right
|
||||
* BCF2 type for specific field values, as well as reporting back
|
||||
* information such as the number of elements used to encode it
|
||||
* (simple for atomic values like Integer but complex for PLs
|
||||
* or lists of strings)
|
||||
*
|
||||
* -- At the lowest level is the BCF2Encoder itself. This provides
|
||||
* just the limited encoding methods specified by the BCF2 specification. This encoder
|
||||
* doesn't do anything but make it possible to conveniently write out valid low-level
|
||||
* BCF2 constructs.
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 06/12
|
||||
*/
|
||||
class BCF2Writer extends IndexingVariantContextWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
|
|||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.*;
|
||||
|
|
@ -329,8 +330,13 @@ class VCFWriter extends IndexingVariantContextWriter {
|
|||
*/
|
||||
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
|
||||
throws IOException {
|
||||
if ( mHeader.getGenotypeSamples().size() != vc.getNSamples() )
|
||||
throw new ReviewedStingException("BUG: number of VariantContext samples " + vc.getNSamples() + " != to the number of sample found in the VCF header" + mHeader.getGenotypeSamples().size());
|
||||
if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
|
||||
final List<String> badSampleNames = new ArrayList<String>();
|
||||
for ( final Genotype g : vc.getGenotypes() )
|
||||
if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
|
||||
badSampleNames.add(g.getSampleName());
|
||||
throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
|
||||
}
|
||||
|
||||
for ( String sample : mHeader.getGenotypeSamples() ) {
|
||||
mWriter.write(VCFConstants.FIELD_SEPARATOR);
|
||||
|
|
|
|||
|
|
@ -304,7 +304,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
|
||||
" --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
|
||||
1, Arrays.asList("3cf734416452d953d433da6a3f418c3c"));
|
||||
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
|
||||
executeTestParallel("testSelect1", spec);
|
||||
}
|
||||
|
||||
|
|
@ -332,7 +332,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testCompVsEvalAC() {
|
||||
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("722ef452dede5d23038d10eca89d4f31"));
|
||||
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d"));
|
||||
executeTestParallel("testCompVsEvalAC",spec);
|
||||
}
|
||||
|
||||
|
|
@ -535,7 +535,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("9236930cb26b01a9b9d770b0f048b182")
|
||||
Arrays.asList("f8460af997436a5ce4407fefb0e2724d")
|
||||
);
|
||||
executeTest("testModernVCFWithLargeIndels", spec);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("1024d7d1e563c56c2c667f98b1b81028")
|
||||
Arrays.asList("6c1a9e64a00a5b312531729bc73b5183")
|
||||
);
|
||||
|
||||
executeTest("testRepeatedLineSelection--" + testfile, spec);
|
||||
|
|
@ -59,7 +59,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
|
||||
1,
|
||||
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
|
||||
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testComplexSelection--" + testfile, spec);
|
||||
|
|
@ -73,7 +73,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
|
||||
1,
|
||||
Arrays.asList("34e714c7469b3cf5bf910222baff4cd0")
|
||||
Arrays.asList("ed0f40334a82aa8e4698d5bfd8ed4d52")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
@ -169,7 +169,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
|
||||
1,
|
||||
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
|
||||
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
executeTest("testParallelization (2 threads)--" + testfile, spec);
|
||||
|
|
@ -183,7 +183,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
|
|||
spec = new WalkerTestSpec(
|
||||
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
|
||||
1,
|
||||
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
|
||||
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
|
||||
);
|
||||
spec.disableShadowBCF();
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
|
|||
|
||||
import org.apache.commons.lang.ArrayUtils;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Encoder;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
|
|
|||
|
|
@ -55,7 +55,7 @@ public class VariantContextTestProvider {
|
|||
final private static boolean ENABLE_PLOIDY_TESTS = true;
|
||||
final private static boolean ENABLE_PL_TESTS = true;
|
||||
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
|
||||
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = false;
|
||||
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true;
|
||||
|
||||
private static VCFHeader syntheticHeader;
|
||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||
|
|
@ -448,7 +448,7 @@ public class VariantContextTestProvider {
|
|||
|
||||
// variable sized lists
|
||||
addGenotypeTests(site,
|
||||
attr("g1", ref, "GV", Arrays.asList("S1")),
|
||||
attr("g1", ref, "GV", "S1"),
|
||||
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
|
||||
|
||||
addGenotypeTests(site,
|
||||
|
|
@ -466,18 +466,18 @@ public class VariantContextTestProvider {
|
|||
//
|
||||
//
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
|
||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
addGenotypeTests(site,
|
||||
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
|
||||
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
|
||||
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
|
||||
}
|
||||
|
||||
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
||||
|
|
|
|||
Loading…
Reference in New Issue