Support for MISSING BCF2 type
-- Heng wants to use 0x0? to represent any missing type value, which in our implementation was invalid. Updated our codebase to support this construct. Heng said he'll update the BCF2 quick reference. -- Enabled integration test reading Heng's ex2.bcf file -- GATK now only warns in the case where the END info field isn't the same (or +1 due to padding) as the getEnd() function as determined by the GATK. Turns out there's a single record in the 1000G SV call set that doesn't have the right length -- VariantContextTestProvider now tests that X = Y where X -> writing -> reading -> writing -> reading = Y for a variety of variant context inputs X -- Added integration test reading 1000G SV chr1 calls (from Chris)
This commit is contained in:
parent
50365d01c4
commit
2ca5fc62a2
|
|
@ -139,25 +139,26 @@ public final class BCF2Decoder {
|
||||||
return decodeTypedValue(typeDescriptor, size);
|
return decodeTypedValue(typeDescriptor, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Requires("size >= 0")
|
||||||
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
|
||||||
|
|
||||||
assert size >= 0;
|
|
||||||
|
|
||||||
if ( size == 0 ) {
|
if ( size == 0 ) {
|
||||||
|
// missing value => null in java
|
||||||
return null;
|
return null;
|
||||||
} else if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency
|
|
||||||
return decodeLiteralString(size);
|
|
||||||
} else if ( size == 1 ) {
|
|
||||||
return decodeSingleValue(type);
|
|
||||||
} else {
|
} else {
|
||||||
final ArrayList<Object> ints = new ArrayList<Object>(size);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
for ( int i = 0; i < size; i++ ) {
|
if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency
|
||||||
final Object val = decodeSingleValue(type);
|
return decodeLiteralString(size);
|
||||||
if ( val == null ) continue; // auto-pruning. We remove trailing nulls
|
} else if ( size == 1 ) {
|
||||||
ints.add(val);
|
return decodeSingleValue(type);
|
||||||
|
} else {
|
||||||
|
final ArrayList<Object> ints = new ArrayList<Object>(size);
|
||||||
|
for ( int i = 0; i < size; i++ ) {
|
||||||
|
final Object val = decodeSingleValue(type);
|
||||||
|
if ( val == null ) continue; // auto-pruning. We remove trailing nulls
|
||||||
|
ints.add(val);
|
||||||
|
}
|
||||||
|
return ints.isEmpty() ? null : ints; // return null when all of the values are null
|
||||||
}
|
}
|
||||||
return ints.isEmpty() ? null : ints; // return null when all of the values are null
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -256,7 +257,7 @@ public final class BCF2Decoder {
|
||||||
* int elements are still forced to do a fresh allocation as well.
|
* int elements are still forced to do a fresh allocation as well.
|
||||||
* @return see description
|
* @return see description
|
||||||
*/
|
*/
|
||||||
@Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"})
|
@Requires({"type != null", "type.isIntegerType()", "size >= 0"})
|
||||||
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
||||||
if ( size == 0 ) {
|
if ( size == 0 ) {
|
||||||
return null;
|
return null;
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ import java.util.EnumSet;
|
||||||
* @since 05/12
|
* @since 05/12
|
||||||
*/
|
*/
|
||||||
public enum BCF2Type {
|
public enum BCF2Type {
|
||||||
|
MISSING(0, 0, 0x00),
|
||||||
INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range
|
INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range
|
||||||
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
||||||
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
||||||
|
|
@ -86,7 +87,7 @@ public enum BCF2Type {
|
||||||
* @param v
|
* @param v
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Requires("INTEGERS.contains(this)")
|
@Requires("this.isIntegerType()")
|
||||||
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -108,7 +109,14 @@ public enum BCF2Type {
|
||||||
/**
|
/**
|
||||||
* An enum set of the types that might represent Integer values
|
* An enum set of the types that might represent Integer values
|
||||||
*/
|
*/
|
||||||
public final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
private final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return true if this BCF2Type corresponds to the magic "MISSING" type (0x00)
|
||||||
|
*/
|
||||||
|
public boolean isMissingType() {
|
||||||
|
return this == MISSING;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isIntegerType() {
|
public boolean isIntegerType() {
|
||||||
return INTEGERS.contains(this);
|
return INTEGERS.contains(this);
|
||||||
|
|
|
||||||
|
|
@ -225,7 +225,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final int value) {
|
public final static BCF2Type determineIntegerType(final int value) {
|
||||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||||
if ( potentialType.withinRange(value) )
|
if ( potentialType.withinRange(value) )
|
||||||
|
|
@ -235,7 +235,7 @@ public final class BCF2Utils {
|
||||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||||
BCF2Type maxType = BCF2Type.INT8;
|
BCF2Type maxType = BCF2Type.INT8;
|
||||||
|
|
@ -260,8 +260,8 @@ public final class BCF2Utils {
|
||||||
* @param t2
|
* @param t2
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
@Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"})
|
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
||||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||||
switch ( t1 ) {
|
switch ( t1 ) {
|
||||||
case INT8: return t2;
|
case INT8: return t2;
|
||||||
|
|
@ -271,7 +271,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||||
BCF2Type maxType = BCF2Type.INT8;
|
BCF2Type maxType = BCF2Type.INT8;
|
||||||
for ( final int value : values ) {
|
for ( final int value : values ) {
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.utils.variantcontext;
|
package org.broadinstitute.sting.utils.variantcontext;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.util.ParsingUtils;
|
import org.broad.tribble.util.ParsingUtils;
|
||||||
|
|
@ -176,6 +177,10 @@ import java.util.*;
|
||||||
* @author depristo
|
* @author depristo
|
||||||
*/
|
*/
|
||||||
public class VariantContext implements Feature { // to enable tribble integration
|
public class VariantContext implements Feature { // to enable tribble integration
|
||||||
|
private final static boolean WARN_ABOUT_BAD_END = true;
|
||||||
|
final protected static Logger logger = Logger.getLogger(VariantContext.class);
|
||||||
|
|
||||||
|
|
||||||
private boolean fullyDecoded = false;
|
private boolean fullyDecoded = false;
|
||||||
protected CommonInfo commonInfo = null;
|
protected CommonInfo commonInfo = null;
|
||||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||||
|
|
@ -1146,10 +1151,16 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
||||||
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
||||||
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
||||||
assert end != -1;
|
assert end != -1;
|
||||||
if ( end != getEnd() )
|
if ( end != getEnd() && end != getEnd() + 1 ) {
|
||||||
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":"
|
// the end is allowed to 1 bigger because of the padding
|
||||||
|
final String message = "Badly formed variant context at location " + getChr() + ":"
|
||||||
+ getStart() + "; getEnd() was " + getEnd()
|
+ getStart() + "; getEnd() was " + getEnd()
|
||||||
+ " but this VariantContext contains an END key with value " + end);
|
+ " but this VariantContext contains an END key with value " + end;
|
||||||
|
if ( WARN_ABOUT_BAD_END )
|
||||||
|
logger.warn(message);
|
||||||
|
else
|
||||||
|
throw new ReviewedStingException(message);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,7 @@ import java.util.Map;
|
||||||
*/
|
*/
|
||||||
@Invariant({
|
@Invariant({
|
||||||
"headerLine != null",
|
"headerLine != null",
|
||||||
"BCF2Type.INTEGERS.contains(dictionaryOffsetType)",
|
"dictionaryOffsetType.isIntegerType()",
|
||||||
"dictionaryOffset >= 0"
|
"dictionaryOffset >= 0"
|
||||||
})
|
})
|
||||||
public abstract class BCF2FieldEncoder {
|
public abstract class BCF2FieldEncoder {
|
||||||
|
|
|
||||||
|
|
@ -338,7 +338,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("! strings.isEmpty()")
|
@Requires("! strings.isEmpty()")
|
||||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
@Ensures("result.isIntegerType()")
|
||||||
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test(enabled = true)
|
||||||
|
public void testReadingAndWriting1000GSVs() {
|
||||||
|
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
|
||||||
|
|
||||||
|
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
|
|
||||||
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
|
||||||
|
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testReadingAndWritingSamtools() {
|
public void testReadingAndWritingSamtools() {
|
||||||
String testVCF = privateTestDir + "samtools.vcf";
|
String testVCF = privateTestDir + "samtools.vcf";
|
||||||
|
|
@ -59,12 +70,12 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
executeTest("Test writing samtools WEx BCF example", spec1);
|
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = false)
|
@Test(enabled = true)
|
||||||
public void testReadingSamtoolsWExBCFExample() {
|
public void testReadingSamtoolsWExBCFExample() {
|
||||||
String testVCF = privateTestDir + "ex2.bcf";
|
String testVCF = privateTestDir + "ex2.bcf";
|
||||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94"));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0439e2b4ccc63bb4ba7c283cd9ab1b25"));
|
||||||
executeTest("Test reading samtools WEx BCF example", spec1);
|
executeTest("Test reading samtools WEx BCF example", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -597,23 +597,41 @@ public class VariantContextTestProvider {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
||||||
|
testReaderWriter(tester, data.header, data.vcs, data.vcs, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void testReaderWriter(final VariantContextIOTest tester,
|
||||||
|
final VCFHeader header,
|
||||||
|
final List<VariantContext> expected,
|
||||||
|
final Iterable<VariantContext> vcs,
|
||||||
|
final boolean recurse) throws IOException {
|
||||||
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
|
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
|
||||||
tmpFile.deleteOnExit();
|
tmpFile.deleteOnExit();
|
||||||
|
|
||||||
// todo -- test all options
|
// write expected to disk
|
||||||
|
|
||||||
// write
|
|
||||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||||
writer.writeHeader(data.header);
|
writeVCsToFile(writer, header, vcs);
|
||||||
final List<VariantContext> expected = data.vcs;
|
|
||||||
for ( VariantContext vc : expected )
|
|
||||||
writer.add(vc);
|
|
||||||
writer.close();
|
|
||||||
|
|
||||||
final Iterable<VariantContext> actual = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
// ensure writing of expected == actual
|
||||||
|
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
|
||||||
|
final Iterable<VariantContext> actual = p.getSecond();
|
||||||
assertEquals(actual, expected);
|
assertEquals(actual, expected);
|
||||||
|
|
||||||
|
if ( recurse ) {
|
||||||
|
// if we are doing a recursive test, grab a fresh iterator over the written values
|
||||||
|
final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
||||||
|
testReaderWriter(tester, p.getFirst(), expected, read, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void writeVCsToFile(final VariantContextWriter writer, final VCFHeader header, final Iterable<VariantContext> vcs) {
|
||||||
|
// write
|
||||||
|
writer.writeHeader(header);
|
||||||
|
for ( VariantContext vc : vcs )
|
||||||
|
if (vc != null)
|
||||||
|
writer.add(vc);
|
||||||
|
writer.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue