Code cleanup and more documentation for BCFFieldWriters

-- Update integration tests where appropriate
This commit is contained in:
Mark DePristo 2012-06-13 17:07:19 -04:00
parent dc07067265
commit bd9d40fb84
10 changed files with 244 additions and 84 deletions

View File

@ -22,9 +22,11 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.bcf2;
package org.broadinstitute.sting.utils.variantcontext.writer;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import java.io.ByteArrayOutputStream;
@ -33,10 +35,10 @@ import java.io.OutputStream;
import java.util.*;
/**
* BCF2 encoder
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
*
* @author depristo
* @since 5/12
* @author Mark DePristo
* @since 06/12
*/
public final class BCF2Encoder {
// TODO -- increase default size?
@ -62,7 +64,7 @@ public final class BCF2Encoder {
/**
* Method for writing raw bytes to the encoder stream
*
* The purpuse this method exists is to allow lazy decoding of genotype data. In that
* The purpose this method exists is to allow lazy decoding of genotype data. In that
* situation the reader has loaded a block of bytes, and never decoded it, so we
* are just writing it back out immediately as a raw stream of blocks. Any
* bad low-level formatting or changes to that byte[] will result in a malformed
@ -93,7 +95,7 @@ public final class BCF2Encoder {
public final void encodeTyped(List<? extends Object> v, final BCF2Type type) throws IOException {
if ( type == BCF2Type.CHAR && v.size() != 0 ) {
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>)v) : (String)v.get(0);
final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List<String>) v) : (String)v.get(0);
v = stringToBytes(s);
}
@ -200,7 +202,7 @@ public final class BCF2Encoder {
* @param o
* @return
*/
protected final BCF2Type encode(final Object o) throws IOException {
public final BCF2Type encode(final Object o) throws IOException {
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
if ( o instanceof List ) {

View File

@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import com.google.java.contract.Ensures;
import com.google.java.contract.Invariant;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine;
@ -41,10 +40,10 @@ import java.util.List;
import java.util.Map;
/**
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
*
*
* @author Your Name
* @since Date created
* @author Mark DePristo
* @since 06/12
*/
@Invariant({
"headerLine != null",
@ -52,9 +51,26 @@ import java.util.Map;
"dictionaryOffset >= 0"
})
public abstract class BCF2FieldEncoder {
/**
* The header line describing the field we will encode values of
*/
final VCFCompoundHeaderLine headerLine;
final BCF2Type fixedType;
/**
* The BCF2 type we'll use to encoder this field, if it can be determined statically.
* If not, this variable must be null
*/
final BCF2Type staticType;
/**
* The integer offset into the strings map of the BCF2 file corresponding to this
* field.
*/
final int dictionaryOffset;
/**
* The integer type we use to encode our dictionary offset in the BCF2 file
*/
final BCF2Type dictionaryOffsetType;
// ----------------------------------------------------------------------
@ -63,9 +79,10 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type fixedType) {
@Requires({"headerLine != null", "dict != null"})
private BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map<String, Integer> dict, final BCF2Type staticType) {
this.headerLine = headerLine;
this.fixedType = fixedType;
this.staticType = staticType;
final Integer offset = dict.get(getField());
if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF");
@ -79,6 +96,7 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
@Ensures("result != null")
public final String getField() { return headerLine.getID(); }
/**
@ -87,6 +105,7 @@ public abstract class BCF2FieldEncoder {
* @param encoder where we write our dictionary offset
* @throws IOException
*/
@Requires("encoder != null")
public final void writeFieldKey(final BCF2Encoder encoder) throws IOException {
encoder.encodeTyped(dictionaryOffset, dictionaryOffsetType);
}
@ -102,44 +121,81 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
@Ensures("result != null")
protected final VCFHeaderLineCount getCountType() {
return headerLine.getCountType();
}
/**
* True if this field has a constant, fixed number of elements (such as 1 for an atomic integer)
*
* @return
*/
@Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())")
public boolean hasConstantNumElements() {
return getCountType() == VCFHeaderLineCount.INTEGER;
}
/**
* True if the only way to determine how many elements this field contains is by
* inspecting the actual value directly, such as when the number of elements
* is a variable length list per site or per genotype.
* @return
*/
@Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())")
public boolean hasValueDeterminedNumElements() {
return getCountType() == VCFHeaderLineCount.UNBOUNDED;
}
/**
* True if this field has a non-fixed number of elements that depends only on the properties
* of the current VariantContext, such as one value per Allele or per genotype configuration.
*
* @return
*/
@Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())")
public boolean hasContextDeterminedNumElements() {
return ! hasConstantNumElements() && ! hasValueDeterminedNumElements();
}
/**
* Get the number of elements, assuming this field has a constant number of elements.
* @return
*/
@Requires("hasConstantNumElements()")
@Ensures("result >= 0")
public int numElements() {
return headerLine.getCount();
}
/**
* Get the number of elements by looking at the actual value provided
* @return
*/
@Requires("hasValueDeterminedNumElements()")
@Ensures("result >= 0")
public int numElements(final Object value) {
return numElementsFromValue(value);
//return value instanceof List ? ((List) value).size() : 1;
}
/**
* Get the number of elements, assuming this field has context-determined number of elements.
* @return
*/
@Requires("hasContextDeterminedNumElements()")
@Ensures("result >= 0")
public int numElements(final VariantContext vc) {
return headerLine.getCount(vc.getNAlleles() - 1);
}
/**
* A convenience access for the number of elements, returning
* the number of encoded elements, either from the fixed number
* it has, from the VC, or from the value itself.
* @param vc
* @param value
* @return
*/
@Ensures("result >= 0")
public final int numElements(final VariantContext vc, final Object value) {
if ( hasConstantNumElements() ) return numElements();
@ -169,12 +225,28 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
/**
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
* @return
*/
@Ensures("result || isDynamicallyTyped()")
public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); }
/**
* Is the BCF2 type of this field static, or does it have to be determine from
* the actual field value itself?
* @return
*/
@Ensures("result || isStaticallyTyped()")
public final boolean isDynamicallyTyped() { return fixedType == null; }
public final boolean isDynamicallyTyped() { return staticType == null; }
/**
* Get the BCF2 type for this field, either from the static type of the
* field itself or by inspecting the value itself.
*
* @return
*/
public final BCF2Type getType(final Object value) {
return isDynamicallyTyped() ? getDynamicType(value) : getStaticType();
}
@ -182,7 +254,7 @@ public abstract class BCF2FieldEncoder {
@Requires("isStaticallyTyped()")
@Ensures("result != null")
public final BCF2Type getStaticType() {
return fixedType;
return staticType;
}
@Requires("isDynamicallyTyped()")
@ -197,11 +269,41 @@ public abstract class BCF2FieldEncoder {
//
// ----------------------------------------------------------------------
/**
* Convenience method that just called encodeValue with a no minimum for the number of values.
*
* Primarily useful for encoding site values
*
* @param encoder
* @param value
* @param type
* @throws IOException
*/
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"})
public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException {
encodeValue(encoder, value, type, 0);
}
/**
* Key abstract method that should encode a value of the given type into the encoder.
*
* Value will be of a type appropriate to the underlying encoder. If the genotype field is represented as
* an int[], this will be value, and the encoder needs to handle encoding all of the values in the int[].
*
* The argument should be used, not the getType() method in the superclass as an outer loop might have
* decided a more general type (int16) to use, even through this encoder could have been done with int8.
*
* If minValues > 0, then encodeValue must write in at least minValues items from value. If value is atomic,
* this means that minValues - 1 MISSING values should be added to the encoder. If minValues is a collection
* type (int[]) then minValues - values.length should be added. This argument is intended to handle padding
* of values in genotype fields.
*
* @param encoder
* @param value
* @param type
* @param minValues
* @throws IOException
*/
@Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"})
public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException;
@ -243,11 +345,15 @@ public abstract class BCF2FieldEncoder {
*/
@Ensures("result != null")
private String javaStringToBCF2String(final Object value) {
return value == null
? ""
: (value instanceof List
? BCF2Utils.collapseStringList((List<String>)value)
: (String)value);
if ( value == null )
return "";
else if (value instanceof List) {
if ( ((List) value).size() == 1 )
return (String)((List) value).get(0);
else
return BCF2Utils.collapseStringList((List<String>)value);
} else
return (String)value;
}
}

View File

@ -25,7 +25,6 @@
package org.broadinstitute.sting.utils.variantcontext.writer;
import com.google.java.contract.Requires;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
@ -41,10 +40,10 @@ import java.util.List;
import java.util.Map;
/**
*
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
*
* @author Mark DePristo
* @since 6/12
* @since 06/12
*/
public abstract class BCF2FieldWriter {
private final VCFHeader header;

View File

@ -24,8 +24,9 @@
package org.broadinstitute.sting.utils.variantcontext.writer;
import com.google.java.contract.Ensures;
import com.google.java.contract.Requires;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
@ -33,31 +34,10 @@ import java.util.HashMap;
import java.util.Map;
/**
* [Short one sentence description of this walker]
* <p/>
* <p>
* [Functionality of this walker]
* </p>
* <p/>
* <h2>Input</h2>
* <p>
* [Input description]
* </p>
* <p/>
* <h2>Output</h2>
* <p>
* [Output description]
* </p>
* <p/>
* <h2>Examples</h2>
* <pre>
* java
* -jar GenomeAnalysisTK.jar
* -T $WalkerName
* </pre>
* See #BCFWriter for documentation on this classes role in encoding BCF2 files
*
* @author Your Name
* @since Date created
* @author Mark DePristo
* @since 06/12
*/
public class BCF2FieldWriterManager {
final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class);
@ -67,23 +47,35 @@ public class BCF2FieldWriterManager {
public BCF2FieldWriterManager() { }
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> dictionary) {
/**
* Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header
*
* Must be called before any of the getter methods will work
*
* @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF
* @param encoder the encoder we are going to use to write out the BCF2 data
* @param stringDictionary a map from VCFHeader strings to their offsets for encoding
*/
public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map<String, Integer> stringDictionary) {
for (final VCFHeaderLine line : header.getMetaData()) {
if ( line instanceof VCFInfoHeaderLine ) {
final String field = ((VCFInfoHeaderLine) line).getID();
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, dictionary);
log(field, writer);
siteWriters.put(field, writer);
final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, stringDictionary);
add(siteWriters, field, writer);
} else if ( line instanceof VCFFormatHeaderLine ) {
final String field = ((VCFFormatHeaderLine) line).getID();
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, dictionary);
log(field, writer);
genotypesWriters.put(field, writer);
final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, stringDictionary);
add(genotypesWriters, field, writer);
}
}
}
private final void log(final String field, final BCF2FieldWriter writer) {
@Requires({"field != null", "writer != null"})
@Ensures("map.containsKey(field)")
private final <T> void add(final Map<String, T> map, final String field, final T writer) {
if ( map.containsKey(field) )
throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders");
map.put(field, writer);
logger.info(writer);
}
@ -160,14 +152,26 @@ public class BCF2FieldWriterManager {
//
// -----------------------------------------------------------------
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) {
return getWriter(key, siteWriters);
/**
* Get a site writer specialized to encode values for site info field
* @param field key found in the VCF header INFO records
* @return
*/
public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) {
return getWriter(field, siteWriters);
}
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String key) {
return getWriter(key, genotypesWriters);
/**
* Get a genotypes writer specialized to encode values for genotypes field
* @param field key found in the VCF header FORMAT records
* @return
*/
public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) {
return getWriter(field, genotypesWriters);
}
@Requires({"map != null", "key != null"})
@Ensures("result != null")
public <T> T getWriter(final String key, final Map<String, T> map) {
final T writer = map.get(key);
if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key);

View File

@ -29,7 +29,6 @@ import com.google.java.contract.Requires;
import net.sf.samtools.SAMSequenceDictionary;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
@ -40,6 +39,49 @@ import org.broadinstitute.sting.utils.variantcontext.*;
import java.io.*;
import java.util.*;
/**
* VariantContextWriter that emits BCF2 binary encoding
*
* Overall structure of this writer is complex for efficiency reasons
*
* -- The BCF2Writer manages the low-level BCF2 encoder, the mappings
* from contigs and strings to offsets, the VCF header, and holds the
* lower-level encoders that map from VC and Genotype fields to their
* specific encoders. This class also writes out the standard BCF2 fields
* like POS, contig, the size of info and genotype data, QUAL, etc. It
* has loops over the INFO and GENOTYPES to encode each individual datum
* with the generic field encoders, but the actual encoding work is
* done with by the FieldWriters classes themselves
*
* -- BCF2FieldWriter are specialized classes for writing out SITE and
* genotype information for specific SITE/GENOTYPE fields (like AC for
* sites and GQ for genotypes). These are objects in themselves because
* the manage all of the complexity of relating the types in the VCF header
* with the proper encoding in BCF as well as the type representing this
* in java. Relating all three of these pieces of information together
* is the main complexity challenge in the encoder. The piece of code
* that determines which FieldWriters to associate with each SITE and
* GENOTYPE field is the BCF2FieldWriterManager. These FieldWriters
* are specialized for specific combinations of encoders (see below)
* and contexts (genotypes) for efficiency, so they smartly manage
* the writing of PLs (encoded as int[]) directly into the lowest
* level BCFEncoder.
*
* -- At the third level is the BCF2FieldEncoder, relatively simple
* pieces of code that handle the task of determining the right
* BCF2 type for specific field values, as well as reporting back
* information such as the number of elements used to encode it
* (simple for atomic values like Integer but complex for PLs
* or lists of strings)
*
* -- At the lowest level is the BCF2Encoder itself. This provides
* just the limited encoding methods specified by the BCF2 specification. This encoder
* doesn't do anything but make it possible to conveniently write out valid low-level
* BCF2 constructs.
*
* @author Mark DePristo
* @since 06/12
*/
class BCF2Writer extends IndexingVariantContextWriter {
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);

View File

@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer;
import net.sf.samtools.SAMSequenceDictionary;
import org.broad.tribble.TribbleException;
import org.broad.tribble.util.ParsingUtils;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
import org.broadinstitute.sting.utils.variantcontext.*;
@ -329,8 +330,13 @@ class VCFWriter extends IndexingVariantContextWriter {
*/
private void addGenotypeData(VariantContext vc, Map<Allele, String> alleleMap, List<String> genotypeFormatKeys)
throws IOException {
if ( mHeader.getGenotypeSamples().size() != vc.getNSamples() )
throw new ReviewedStingException("BUG: number of VariantContext samples " + vc.getNSamples() + " != to the number of sample found in the VCF header" + mHeader.getGenotypeSamples().size());
if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) {
final List<String> badSampleNames = new ArrayList<String>();
for ( final Genotype g : vc.getGenotypes() )
if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) )
badSampleNames.add(g.getSampleName());
throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames));
}
for ( String sample : mHeader.getGenotypeSamples() ) {
mWriter.write(VCFConstants.FIELD_SEPARATOR);

View File

@ -304,7 +304,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
" --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" +
" --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf";
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s",
1, Arrays.asList("3cf734416452d953d433da6a3f418c3c"));
1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c"));
executeTestParallel("testSelect1", spec);
}
@ -332,7 +332,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
@Test
public void testCompVsEvalAC() {
String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf";
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("722ef452dede5d23038d10eca89d4f31"));
WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d"));
executeTestParallel("testCompVsEvalAC",spec);
}
@ -535,7 +535,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-o %s"
),
1,
Arrays.asList("9236930cb26b01a9b9d770b0f048b182")
Arrays.asList("f8460af997436a5ce4407fefb0e2724d")
);
executeTest("testModernVCFWithLargeIndels", spec);
}

View File

@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -sn B -sn C --variant " + testfile),
1,
Arrays.asList("1024d7d1e563c56c2c667f98b1b81028")
Arrays.asList("6c1a9e64a00a5b312531729bc73b5183")
);
executeTest("testRepeatedLineSelection--" + testfile, spec);
@ -59,7 +59,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile),
1,
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
);
spec.disableShadowBCF();
executeTest("testComplexSelection--" + testfile, spec);
@ -73,7 +73,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
WalkerTestSpec spec = new WalkerTestSpec(
"-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile,
1,
Arrays.asList("34e714c7469b3cf5bf910222baff4cd0")
Arrays.asList("ed0f40334a82aa8e4698d5bfd8ed4d52")
);
spec.disableShadowBCF();
@ -169,7 +169,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"),
1,
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
);
spec.disableShadowBCF();
executeTest("testParallelization (2 threads)--" + testfile, spec);
@ -183,7 +183,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest {
spec = new WalkerTestSpec(
baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"),
1,
Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42")
Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8")
);
spec.disableShadowBCF();

View File

@ -31,6 +31,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2;
import org.apache.commons.lang.ArrayUtils;
import org.broadinstitute.sting.BaseTest;
import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Encoder;
import org.testng.Assert;
import org.testng.annotations.BeforeSuite;
import org.testng.annotations.DataProvider;

View File

@ -55,7 +55,7 @@ public class VariantContextTestProvider {
final private static boolean ENABLE_PLOIDY_TESTS = true;
final private static boolean ENABLE_PL_TESTS = true;
final private static boolean ENABLE_SOURCE_VCF_TESTS = true;
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = false;
final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true;
private static VCFHeader syntheticHeader;
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
@ -448,7 +448,7 @@ public class VariantContextTestProvider {
// variable sized lists
addGenotypeTests(site,
attr("g1", ref, "GV", Arrays.asList("S1")),
attr("g1", ref, "GV", "S1"),
attr("g2", ref, "GV", Arrays.asList("S3", "S4")));
addGenotypeTests(site,
@ -466,18 +466,18 @@ public class VariantContextTestProvider {
//
//
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make());
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make());
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
addGenotypeTests(site,
new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make());
new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(),
new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(),
new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make());
}
// TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample