diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java similarity index 95% rename from public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java rename to public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java index 255e3c0ce..4d2eae4ab 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Encoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Encoder.java @@ -22,9 +22,11 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -package org.broadinstitute.sting.utils.codecs.bcf2; +package org.broadinstitute.sting.utils.variantcontext.writer; import com.google.java.contract.Requires; +import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; +import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import java.io.ByteArrayOutputStream; @@ -33,10 +35,10 @@ import java.io.OutputStream; import java.util.*; /** - * BCF2 encoder + * See #BCFWriter for documentation on this classes role in encoding BCF2 files * - * @author depristo - * @since 5/12 + * @author Mark DePristo + * @since 06/12 */ public final class BCF2Encoder { // TODO -- increase default size? @@ -62,7 +64,7 @@ public final class BCF2Encoder { /** * Method for writing raw bytes to the encoder stream * - * The purpuse this method exists is to allow lazy decoding of genotype data. In that + * The purpose this method exists is to allow lazy decoding of genotype data. In that * situation the reader has loaded a block of bytes, and never decoded it, so we * are just writing it back out immediately as a raw stream of blocks. Any * bad low-level formatting or changes to that byte[] will result in a malformed @@ -93,7 +95,7 @@ public final class BCF2Encoder { public final void encodeTyped(List v, final BCF2Type type) throws IOException { if ( type == BCF2Type.CHAR && v.size() != 0 ) { - final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List)v) : (String)v.get(0); + final String s = v.size() > 1 ? BCF2Utils.collapseStringList((List) v) : (String)v.get(0); v = stringToBytes(s); } @@ -200,7 +202,7 @@ public final class BCF2Encoder { * @param o * @return */ - protected final BCF2Type encode(final Object o) throws IOException { + public final BCF2Type encode(final Object o) throws IOException { if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values"); if ( o instanceof List ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index 6a9136f67..8e5d6fdef 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -27,7 +27,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer; import com.google.java.contract.Ensures; import com.google.java.contract.Invariant; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.codecs.vcf.VCFCompoundHeaderLine; @@ -41,10 +40,10 @@ import java.util.List; import java.util.Map; /** + * See #BCFWriter for documentation on this classes role in encoding BCF2 files * - * - * @author Your Name - * @since Date created + * @author Mark DePristo + * @since 06/12 */ @Invariant({ "headerLine != null", @@ -52,9 +51,26 @@ import java.util.Map; "dictionaryOffset >= 0" }) public abstract class BCF2FieldEncoder { + /** + * The header line describing the field we will encode values of + */ final VCFCompoundHeaderLine headerLine; - final BCF2Type fixedType; + + /** + * The BCF2 type we'll use to encoder this field, if it can be determined statically. + * If not, this variable must be null + */ + final BCF2Type staticType; + + /** + * The integer offset into the strings map of the BCF2 file corresponding to this + * field. + */ final int dictionaryOffset; + + /** + * The integer type we use to encode our dictionary offset in the BCF2 file + */ final BCF2Type dictionaryOffsetType; // ---------------------------------------------------------------------- @@ -63,9 +79,10 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- - public BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map dict, final BCF2Type fixedType) { + @Requires({"headerLine != null", "dict != null"}) + private BCF2FieldEncoder(final VCFCompoundHeaderLine headerLine, final Map dict, final BCF2Type staticType) { this.headerLine = headerLine; - this.fixedType = fixedType; + this.staticType = staticType; final Integer offset = dict.get(getField()); if ( offset == null ) throw new ReviewedStingException("Format error: could not find string " + getField() + " in header as required by BCF"); @@ -79,6 +96,7 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- + @Ensures("result != null") public final String getField() { return headerLine.getID(); } /** @@ -87,6 +105,7 @@ public abstract class BCF2FieldEncoder { * @param encoder where we write our dictionary offset * @throws IOException */ + @Requires("encoder != null") public final void writeFieldKey(final BCF2Encoder encoder) throws IOException { encoder.encodeTyped(dictionaryOffset, dictionaryOffsetType); } @@ -102,44 +121,81 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- + @Ensures("result != null") protected final VCFHeaderLineCount getCountType() { return headerLine.getCountType(); } + /** + * True if this field has a constant, fixed number of elements (such as 1 for an atomic integer) + * + * @return + */ @Ensures("result != (hasValueDeterminedNumElements() || hasContextDeterminedNumElements())") public boolean hasConstantNumElements() { return getCountType() == VCFHeaderLineCount.INTEGER; } + /** + * True if the only way to determine how many elements this field contains is by + * inspecting the actual value directly, such as when the number of elements + * is a variable length list per site or per genotype. + * @return + */ @Ensures("result != (hasConstantNumElements() || hasContextDeterminedNumElements())") public boolean hasValueDeterminedNumElements() { return getCountType() == VCFHeaderLineCount.UNBOUNDED; } + /** + * True if this field has a non-fixed number of elements that depends only on the properties + * of the current VariantContext, such as one value per Allele or per genotype configuration. + * + * @return + */ @Ensures("result != (hasValueDeterminedNumElements() || hasConstantNumElements())") public boolean hasContextDeterminedNumElements() { return ! hasConstantNumElements() && ! hasValueDeterminedNumElements(); } + /** + * Get the number of elements, assuming this field has a constant number of elements. + * @return + */ @Requires("hasConstantNumElements()") @Ensures("result >= 0") public int numElements() { return headerLine.getCount(); } + /** + * Get the number of elements by looking at the actual value provided + * @return + */ @Requires("hasValueDeterminedNumElements()") @Ensures("result >= 0") public int numElements(final Object value) { return numElementsFromValue(value); - //return value instanceof List ? ((List) value).size() : 1; } + /** + * Get the number of elements, assuming this field has context-determined number of elements. + * @return + */ @Requires("hasContextDeterminedNumElements()") @Ensures("result >= 0") public int numElements(final VariantContext vc) { return headerLine.getCount(vc.getNAlleles() - 1); } + /** + * A convenience access for the number of elements, returning + * the number of encoded elements, either from the fixed number + * it has, from the VC, or from the value itself. + * @param vc + * @param value + * @return + */ @Ensures("result >= 0") public final int numElements(final VariantContext vc, final Object value) { if ( hasConstantNumElements() ) return numElements(); @@ -169,12 +225,28 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- + /** + * Is the BCF2 type of this field static, or does it have to be determine from + * the actual field value itself? + * @return + */ @Ensures("result || isDynamicallyTyped()") public final boolean isStaticallyTyped() { return ! isDynamicallyTyped(); } + /** + * Is the BCF2 type of this field static, or does it have to be determine from + * the actual field value itself? + * @return + */ @Ensures("result || isStaticallyTyped()") - public final boolean isDynamicallyTyped() { return fixedType == null; } + public final boolean isDynamicallyTyped() { return staticType == null; } + /** + * Get the BCF2 type for this field, either from the static type of the + * field itself or by inspecting the value itself. + * + * @return + */ public final BCF2Type getType(final Object value) { return isDynamicallyTyped() ? getDynamicType(value) : getStaticType(); } @@ -182,7 +254,7 @@ public abstract class BCF2FieldEncoder { @Requires("isStaticallyTyped()") @Ensures("result != null") public final BCF2Type getStaticType() { - return fixedType; + return staticType; } @Requires("isDynamicallyTyped()") @@ -197,11 +269,41 @@ public abstract class BCF2FieldEncoder { // // ---------------------------------------------------------------------- + /** + * Convenience method that just called encodeValue with a no minimum for the number of values. + * + * Primarily useful for encoding site values + * + * @param encoder + * @param value + * @param type + * @throws IOException + */ @Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()"}) public void encodeOneValue(final BCF2Encoder encoder, final Object value, final BCF2Type type) throws IOException { encodeValue(encoder, value, type, 0); } + /** + * Key abstract method that should encode a value of the given type into the encoder. + * + * Value will be of a type appropriate to the underlying encoder. If the genotype field is represented as + * an int[], this will be value, and the encoder needs to handle encoding all of the values in the int[]. + * + * The argument should be used, not the getType() method in the superclass as an outer loop might have + * decided a more general type (int16) to use, even through this encoder could have been done with int8. + * + * If minValues > 0, then encodeValue must write in at least minValues items from value. If value is atomic, + * this means that minValues - 1 MISSING values should be added to the encoder. If minValues is a collection + * type (int[]) then minValues - values.length should be added. This argument is intended to handle padding + * of values in genotype fields. + * + * @param encoder + * @param value + * @param type + * @param minValues + * @throws IOException + */ @Requires({"encoder != null", "isDynamicallyTyped() || type == getStaticType()", "minValues >= 0"}) public abstract void encodeValue(final BCF2Encoder encoder, final Object value, final BCF2Type type, final int minValues) throws IOException; @@ -243,11 +345,15 @@ public abstract class BCF2FieldEncoder { */ @Ensures("result != null") private String javaStringToBCF2String(final Object value) { - return value == null - ? "" - : (value instanceof List - ? BCF2Utils.collapseStringList((List)value) - : (String)value); + if ( value == null ) + return ""; + else if (value instanceof List) { + if ( ((List) value).size() == 1 ) + return (String)((List) value).get(0); + else + return BCF2Utils.collapseStringList((List)value); + } else + return (String)value; } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java index 4d915ea2e..1c62590a8 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriter.java @@ -25,7 +25,6 @@ package org.broadinstitute.sting.utils.variantcontext.writer; import com.google.java.contract.Requires; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader; @@ -41,10 +40,10 @@ import java.util.List; import java.util.Map; /** - * + * See #BCFWriter for documentation on this classes role in encoding BCF2 files * * @author Mark DePristo - * @since 6/12 + * @since 06/12 */ public abstract class BCF2FieldWriter { private final VCFHeader header; diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java index 7ce4f64e4..665c2cc21 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldWriterManager.java @@ -24,8 +24,9 @@ package org.broadinstitute.sting.utils.variantcontext.writer; +import com.google.java.contract.Ensures; +import com.google.java.contract.Requires; import org.apache.log4j.Logger; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; @@ -33,31 +34,10 @@ import java.util.HashMap; import java.util.Map; /** - * [Short one sentence description of this walker] - *

- *

- * [Functionality of this walker] - *

- *

- *

Input

- *

- * [Input description] - *

- *

- *

Output

- *

- * [Output description] - *

- *

- *

Examples

- *
- *    java
- *      -jar GenomeAnalysisTK.jar
- *      -T $WalkerName
- *  
+ * See #BCFWriter for documentation on this classes role in encoding BCF2 files * - * @author Your Name - * @since Date created + * @author Mark DePristo + * @since 06/12 */ public class BCF2FieldWriterManager { final protected static Logger logger = Logger.getLogger(BCF2FieldWriterManager.class); @@ -67,23 +47,35 @@ public class BCF2FieldWriterManager { public BCF2FieldWriterManager() { } - public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map dictionary) { + /** + * Setup the FieldWriters appropriate to each INFO and FORMAT in the VCF header + * + * Must be called before any of the getter methods will work + * + * @param header a VCFHeader containing description for every INFO and FORMAT field we'll attempt to write out to BCF + * @param encoder the encoder we are going to use to write out the BCF2 data + * @param stringDictionary a map from VCFHeader strings to their offsets for encoding + */ + public void setup(final VCFHeader header, final BCF2Encoder encoder, final Map stringDictionary) { for (final VCFHeaderLine line : header.getMetaData()) { if ( line instanceof VCFInfoHeaderLine ) { final String field = ((VCFInfoHeaderLine) line).getID(); - final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, dictionary); - log(field, writer); - siteWriters.put(field, writer); + final BCF2FieldWriter.SiteWriter writer = createInfoWriter(header, (VCFInfoHeaderLine)line, encoder, stringDictionary); + add(siteWriters, field, writer); } else if ( line instanceof VCFFormatHeaderLine ) { final String field = ((VCFFormatHeaderLine) line).getID(); - final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, dictionary); - log(field, writer); - genotypesWriters.put(field, writer); + final BCF2FieldWriter.GenotypesWriter writer = createGenotypesWriter(header, (VCFFormatHeaderLine)line, encoder, stringDictionary); + add(genotypesWriters, field, writer); } } } - private final void log(final String field, final BCF2FieldWriter writer) { + @Requires({"field != null", "writer != null"}) + @Ensures("map.containsKey(field)") + private final void add(final Map map, final String field, final T writer) { + if ( map.containsKey(field) ) + throw new ReviewedStingException("BUG: field " + field + " already seen in VCFHeader while building BCF2 field encoders"); + map.put(field, writer); logger.info(writer); } @@ -160,14 +152,26 @@ public class BCF2FieldWriterManager { // // ----------------------------------------------------------------- - public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String key) { - return getWriter(key, siteWriters); + /** + * Get a site writer specialized to encode values for site info field + * @param field key found in the VCF header INFO records + * @return + */ + public BCF2FieldWriter.SiteWriter getSiteFieldWriter(final String field) { + return getWriter(field, siteWriters); } - public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String key) { - return getWriter(key, genotypesWriters); + /** + * Get a genotypes writer specialized to encode values for genotypes field + * @param field key found in the VCF header FORMAT records + * @return + */ + public BCF2FieldWriter.GenotypesWriter getGenotypeFieldWriter(final String field) { + return getWriter(field, genotypesWriters); } + @Requires({"map != null", "key != null"}) + @Ensures("result != null") public T getWriter(final String key, final Map map) { final T writer = map.get(key); if ( writer == null ) throw new ReviewedStingException("BUG: no writer found for " + key); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 2c44c0fc8..3beb101fa 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -29,7 +29,6 @@ import com.google.java.contract.Requires; import net.sf.samtools.SAMSequenceDictionary; import org.apache.log4j.Logger; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec; -import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Encoder; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type; import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; @@ -40,6 +39,49 @@ import org.broadinstitute.sting.utils.variantcontext.*; import java.io.*; import java.util.*; +/** + * VariantContextWriter that emits BCF2 binary encoding + * + * Overall structure of this writer is complex for efficiency reasons + * + * -- The BCF2Writer manages the low-level BCF2 encoder, the mappings + * from contigs and strings to offsets, the VCF header, and holds the + * lower-level encoders that map from VC and Genotype fields to their + * specific encoders. This class also writes out the standard BCF2 fields + * like POS, contig, the size of info and genotype data, QUAL, etc. It + * has loops over the INFO and GENOTYPES to encode each individual datum + * with the generic field encoders, but the actual encoding work is + * done with by the FieldWriters classes themselves + * + * -- BCF2FieldWriter are specialized classes for writing out SITE and + * genotype information for specific SITE/GENOTYPE fields (like AC for + * sites and GQ for genotypes). These are objects in themselves because + * the manage all of the complexity of relating the types in the VCF header + * with the proper encoding in BCF as well as the type representing this + * in java. Relating all three of these pieces of information together + * is the main complexity challenge in the encoder. The piece of code + * that determines which FieldWriters to associate with each SITE and + * GENOTYPE field is the BCF2FieldWriterManager. These FieldWriters + * are specialized for specific combinations of encoders (see below) + * and contexts (genotypes) for efficiency, so they smartly manage + * the writing of PLs (encoded as int[]) directly into the lowest + * level BCFEncoder. + * + * -- At the third level is the BCF2FieldEncoder, relatively simple + * pieces of code that handle the task of determining the right + * BCF2 type for specific field values, as well as reporting back + * information such as the number of elements used to encode it + * (simple for atomic values like Integer but complex for PLs + * or lists of strings) + * + * -- At the lowest level is the BCF2Encoder itself. This provides + * just the limited encoding methods specified by the BCF2 specification. This encoder + * doesn't do anything but make it possible to conveniently write out valid low-level + * BCF2 constructs. + * + * @author Mark DePristo + * @since 06/12 + */ class BCF2Writer extends IndexingVariantContextWriter { final protected static Logger logger = Logger.getLogger(BCF2Writer.class); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java index 1e87349df..16795061c 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/VCFWriter.java @@ -27,6 +27,7 @@ package org.broadinstitute.sting.utils.variantcontext.writer; import net.sf.samtools.SAMSequenceDictionary; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.vcf.*; import org.broadinstitute.sting.utils.exceptions.ReviewedStingException; import org.broadinstitute.sting.utils.variantcontext.*; @@ -329,8 +330,13 @@ class VCFWriter extends IndexingVariantContextWriter { */ private void addGenotypeData(VariantContext vc, Map alleleMap, List genotypeFormatKeys) throws IOException { - if ( mHeader.getGenotypeSamples().size() != vc.getNSamples() ) - throw new ReviewedStingException("BUG: number of VariantContext samples " + vc.getNSamples() + " != to the number of sample found in the VCF header" + mHeader.getGenotypeSamples().size()); + if ( ! mHeader.getGenotypeSamples().containsAll(vc.getSampleNames()) ) { + final List badSampleNames = new ArrayList(); + for ( final Genotype g : vc.getGenotypes() ) + if ( ! mHeader.getGenotypeSamples().contains(g.getSampleName()) ) + badSampleNames.add(g.getSampleName()); + throw new ReviewedStingException("BUG: VariantContext contains some samples not in the VCF header: bad samples are " + Utils.join(",",badSampleNames)); + } for ( String sample : mHeader.getGenotypeSamples() ) { mWriter.write(VCFConstants.FIELD_SEPARATOR); diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index acd67694f..c1dd8b18b 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -304,7 +304,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { " --eval " + validationDataLocation + "yri.trio.gatk_glftrio.intersection.annotated.filtered.chr1.vcf" + " --comp:comp_genotypes " + testDir + "yri.trio.gatk.ug.head.vcf"; WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -ST CpG -o %s", - 1, Arrays.asList("3cf734416452d953d433da6a3f418c3c")); + 1, Arrays.asList("4b9dcbce0717285e3c0c736c1bed744c")); executeTestParallel("testSelect1", spec); } @@ -332,7 +332,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { @Test public void testCompVsEvalAC() { String extraArgs = "-T VariantEval -R "+b36KGReference+" -o %s -ST CpG -EV GenotypeConcordance --eval:evalYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.ug.very.few.lines.vcf --comp:compYRI,VCF3 " + validationDataLocation + "yri.trio.gatk.fake.genotypes.ac.test.vcf"; - WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("722ef452dede5d23038d10eca89d4f31")); + WalkerTestSpec spec = new WalkerTestSpec(extraArgs,1,Arrays.asList("659a15cc842f0310106fa595a26da71d")); executeTestParallel("testCompVsEvalAC",spec); } @@ -535,7 +535,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("9236930cb26b01a9b9d770b0f048b182") + Arrays.asList("f8460af997436a5ce4407fefb0e2724d") ); executeTest("testModernVCFWithLargeIndels", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java index 2e0a1e89d..8eecfa5be 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/variantutils/SelectVariantsIntegrationTest.java @@ -31,7 +31,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -sn B -sn C --variant " + testfile), 1, - Arrays.asList("1024d7d1e563c56c2c667f98b1b81028") + Arrays.asList("6c1a9e64a00a5b312531729bc73b5183") ); executeTest("testRepeatedLineSelection--" + testfile, spec); @@ -59,7 +59,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile), 1, - Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42") + Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8") ); spec.disableShadowBCF(); executeTest("testComplexSelection--" + testfile, spec); @@ -73,7 +73,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T SelectVariants -R " + b36KGReference + " -L 1:1-1000000 -o %s --no_cmdline_in_header -xl_sn A -xl_sf " + samplesFile + " --variant " + testfile, 1, - Arrays.asList("34e714c7469b3cf5bf910222baff4cd0") + Arrays.asList("ed0f40334a82aa8e4698d5bfd8ed4d52") ); spec.disableShadowBCF(); @@ -169,7 +169,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 2"), 1, - Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42") + Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8") ); spec.disableShadowBCF(); executeTest("testParallelization (2 threads)--" + testfile, spec); @@ -183,7 +183,7 @@ public class SelectVariantsIntegrationTest extends WalkerTest { spec = new WalkerTestSpec( baseTestString(" -sn A -se '[CDH]' -sf " + samplesFile + " -env -ef -select 'DP < 250' --variant " + testfile + " -nt 4"), 1, - Arrays.asList("357c26f0a57f9d59a3bfca168af4fe42") + Arrays.asList("eb1d0ff1db27413c14ea1af52b2f74c8") ); spec.disableShadowBCF(); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index 52cbd63e7..0e42c3173 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -31,6 +31,7 @@ package org.broadinstitute.sting.utils.codecs.bcf2; import org.apache.commons.lang.ArrayUtils; import org.broadinstitute.sting.BaseTest; +import org.broadinstitute.sting.utils.variantcontext.writer.BCF2Encoder; import org.testng.Assert; import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index 64f9cbd6a..1e9808680 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -55,7 +55,7 @@ public class VariantContextTestProvider { final private static boolean ENABLE_PLOIDY_TESTS = true; final private static boolean ENABLE_PL_TESTS = true; final private static boolean ENABLE_SOURCE_VCF_TESTS = true; - final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = false; + final private static boolean ENABLE_VARIABLE_LENGTH_GENOTYPE_STRING_TESTS = true; private static VCFHeader syntheticHeader; final static List TEST_DATAs = new ArrayList(); @@ -448,7 +448,7 @@ public class VariantContextTestProvider { // variable sized lists addGenotypeTests(site, - attr("g1", ref, "GV", Arrays.asList("S1")), + attr("g1", ref, "GV", "S1"), attr("g2", ref, "GV", Arrays.asList("S3", "S4"))); addGenotypeTests(site, @@ -466,18 +466,18 @@ public class VariantContextTestProvider { // // addGenotypeTests(site, - new GenotypeBuilder("g1", Arrays.asList(ref, ref)).filters("X").make(), - new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make()); + new GenotypeBuilder("g1-x", Arrays.asList(ref, ref)).filters("X").make(), + new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests(site, - new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(), - new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make()); + new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), + new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make()); addGenotypeTests(site, - new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(), - new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X", "Y").make()); + new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), + new GenotypeBuilder("g2-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); addGenotypeTests(site, - new GenotypeBuilder("g1", Arrays.asList(ref, ref)).unfiltered().make(), - new GenotypeBuilder("g2", Arrays.asList(ref, ref)).filters("X").make(), - new GenotypeBuilder("g3", Arrays.asList(ref, ref)).filters("X", "Y").make()); + new GenotypeBuilder("g1-unft", Arrays.asList(ref, ref)).unfiltered().make(), + new GenotypeBuilder("g2-x", Arrays.asList(ref, ref)).filters("X").make(), + new GenotypeBuilder("g3-xy", Arrays.asList(ref, ref)).filters("X", "Y").make()); } // TODO -- test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample