diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 7a6d96131..a13be21c5 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -139,25 +139,26 @@ public final class BCF2Decoder { return decodeTypedValue(typeDescriptor, size); } + @Requires("size >= 0") public final Object decodeTypedValue(final byte typeDescriptor, final int size) { - final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); - - assert size >= 0; - if ( size == 0 ) { + // missing value => null in java return null; - } else if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency - return decodeLiteralString(size); - } else if ( size == 1 ) { - return decodeSingleValue(type); } else { - final ArrayList ints = new ArrayList(size); - for ( int i = 0; i < size; i++ ) { - final Object val = decodeSingleValue(type); - if ( val == null ) continue; // auto-pruning. We remove trailing nulls - ints.add(val); + final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); + if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency + return decodeLiteralString(size); + } else if ( size == 1 ) { + return decodeSingleValue(type); + } else { + final ArrayList ints = new ArrayList(size); + for ( int i = 0; i < size; i++ ) { + final Object val = decodeSingleValue(type); + if ( val == null ) continue; // auto-pruning. We remove trailing nulls + ints.add(val); + } + return ints.isEmpty() ? null : ints; // return null when all of the values are null } - return ints.isEmpty() ? null : ints; // return null when all of the values are null } } @@ -256,7 +257,7 @@ public final class BCF2Decoder { * int elements are still forced to do a fresh allocation as well. * @return see description */ - @Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"}) + @Requires({"type != null", "type.isIntegerType()", "size >= 0"}) public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) { if ( size == 0 ) { return null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java index f874b14cd..49f375b25 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Type.java @@ -35,6 +35,7 @@ import java.util.EnumSet; * @since 05/12 */ public enum BCF2Type { + MISSING(0, 0, 0x00), INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range INT16(2, 2, 0xFFFF8000, -32767, 32767), INT32(3, 4, 0x80000000, -2147483647, 2147483647), @@ -86,7 +87,7 @@ public enum BCF2Type { * @param v * @return */ - @Requires("INTEGERS.contains(this)") + @Requires("this.isIntegerType()") public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; } /** @@ -108,7 +109,14 @@ public enum BCF2Type { /** * An enum set of the types that might represent Integer values */ - public final static EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); + private final static EnumSet INTEGERS = EnumSet.of(INT8, INT16, INT32); + + /** + * @return true if this BCF2Type corresponds to the magic "MISSING" type (0x00) + */ + public boolean isMissingType() { + return this == MISSING; + } public boolean isIntegerType() { return INTEGERS.contains(this); diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java index 143ab52df..43e933948 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Utils.java @@ -225,7 +225,7 @@ public final class BCF2Utils { } } - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Ensures("result.isIntegerType()") public final static BCF2Type determineIntegerType(final int value) { for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) { if ( potentialType.withinRange(value) ) @@ -235,7 +235,7 @@ public final class BCF2Utils { throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value); } - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Ensures("result.isIntegerType()") public final static BCF2Type determineIntegerType(final int[] values) { // literally a copy of the code below, but there's no general way to unify lists and arrays in java BCF2Type maxType = BCF2Type.INT8; @@ -260,8 +260,8 @@ public final class BCF2Utils { * @param t2 * @return */ - @Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"}) - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Requires({"t1.isIntegerType()","t2.isIntegerType()"}) + @Ensures("result.isIntegerType()") public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) { switch ( t1 ) { case INT8: return t2; @@ -271,7 +271,7 @@ public final class BCF2Utils { } } - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Ensures("result.isIntegerType()") public final static BCF2Type determineIntegerType(final List values) { BCF2Type maxType = BCF2Type.INT8; for ( final int value : values ) { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index 1f0b2b054..dcdd95d00 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.utils.variantcontext; +import org.apache.log4j.Logger; import org.broad.tribble.Feature; import org.broad.tribble.TribbleException; import org.broad.tribble.util.ParsingUtils; @@ -176,6 +177,10 @@ import java.util.*; * @author depristo */ public class VariantContext implements Feature { // to enable tribble integration + private final static boolean WARN_ABOUT_BAD_END = true; + final protected static Logger logger = Logger.getLogger(VariantContext.class); + + private boolean fullyDecoded = false; protected CommonInfo commonInfo = null; public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR; @@ -1146,10 +1151,16 @@ public class VariantContext implements Feature { // to enable tribble integratio if ( hasAttribute(VCFConstants.END_KEY) ) { final int end = getAttributeAsInt(VCFConstants.END_KEY, -1); assert end != -1; - if ( end != getEnd() ) - throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":" + if ( end != getEnd() && end != getEnd() + 1 ) { + // the end is allowed to 1 bigger because of the padding + final String message = "Badly formed variant context at location " + getChr() + ":" + getStart() + "; getEnd() was " + getEnd() - + " but this VariantContext contains an END key with value " + end); + + " but this VariantContext contains an END key with value " + end; + if ( WARN_ABOUT_BAD_END ) + logger.warn(message); + else + throw new ReviewedStingException(message); + } } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java index 812e6dd07..ddeb4d284 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2FieldEncoder.java @@ -47,7 +47,7 @@ import java.util.Map; */ @Invariant({ "headerLine != null", - "BCF2Type.INTEGERS.contains(dictionaryOffsetType)", + "dictionaryOffsetType.isIntegerType()", "dictionaryOffset >= 0" }) public abstract class BCF2FieldEncoder { diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java index 45610bbf9..c77fe7172 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/writer/BCF2Writer.java @@ -338,7 +338,7 @@ class BCF2Writer extends IndexingVariantContextWriter { } @Requires("! strings.isEmpty()") - @Ensures("BCF2Type.INTEGERS.contains(result)") + @Ensures("result.isIntegerType()") private final BCF2Type encodeStringsByRef(final Collection strings) throws IOException { final List offsets = new ArrayList(strings.size()); diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java index b271d8c84..e9b845d59 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/vcf/VCFIntegrationTest.java @@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test reading and writing breakpoint VCF", spec1); } + @Test(enabled = true) + public void testReadingAndWriting1000GSVs() { + String testVCF = privateTestDir + "1000G_SVs.chr1.vcf"; + + String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s "; + + String test1 = baseCommand + "-T SelectVariants -V " + testVCF; + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("")); + executeTest("Test reading and writing 1000G Phase I SVs", spec1); + } + @Test public void testReadingAndWritingSamtools() { String testVCF = privateTestDir + "samtools.vcf"; @@ -59,12 +70,12 @@ public class VCFIntegrationTest extends WalkerTest { executeTest("Test writing samtools WEx BCF example", spec1); } - @Test(enabled = false) + @Test(enabled = true) public void testReadingSamtoolsWExBCFExample() { String testVCF = privateTestDir + "ex2.bcf"; String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s "; String test1 = baseCommand + "-T SelectVariants -V " + testVCF; - WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94")); + WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0439e2b4ccc63bb4ba7c283cd9ab1b25")); executeTest("Test reading samtools WEx BCF example", spec1); } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java index ca4cdf306..c436a7b45 100644 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/VariantContextTestProvider.java @@ -597,23 +597,41 @@ public class VariantContextTestProvider { } public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException { + testReaderWriter(tester, data.header, data.vcs, data.vcs, true); + } + + public static void testReaderWriter(final VariantContextIOTest tester, + final VCFHeader header, + final List expected, + final Iterable vcs, + final boolean recurse) throws IOException { final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension()); tmpFile.deleteOnExit(); - // todo -- test all options - - // write + // write expected to disk final EnumSet options = EnumSet.of(Options.INDEX_ON_THE_FLY); final VariantContextWriter writer = tester.makeWriter(tmpFile, options); - writer.writeHeader(data.header); - final List expected = data.vcs; - for ( VariantContext vc : expected ) - writer.add(vc); - writer.close(); + writeVCsToFile(writer, header, vcs); - final Iterable actual = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); + // ensure writing of expected == actual + final Pair> p = readAllVCs(tmpFile, tester.makeCodec()); + final Iterable actual = p.getSecond(); assertEquals(actual, expected); + if ( recurse ) { + // if we are doing a recursive test, grab a fresh iterator over the written values + final Iterable read = readAllVCs(tmpFile, tester.makeCodec()).getSecond(); + testReaderWriter(tester, p.getFirst(), expected, read, false); + } + } + + private static void writeVCsToFile(final VariantContextWriter writer, final VCFHeader header, final Iterable vcs) { + // write + writer.writeHeader(header); + for ( VariantContext vc : vcs ) + if (vc != null) + writer.add(vc); + writer.close(); } /**