Support for MISSING BCF2 type
-- Heng wants to use 0x0? to represent any missing type value, which in our implementation was invalid. Updated our codebase to support this construct. Heng said he'll update the BCF2 quick reference. -- Enabled integration test reading Heng's ex2.bcf file -- GATK now only warns in the case where the END info field isn't the same (or +1 due to padding) as the getEnd() function as determined by the GATK. Turns out there's a single record in the 1000G SV call set that doesn't have the right length -- VariantContextTestProvider now tests that X = Y where X -> writing -> reading -> writing -> reading = Y for a variety of variant context inputs X -- Added integration test reading 1000G SV chr1 calls (from Chris)
This commit is contained in:
parent
50365d01c4
commit
2ca5fc62a2
|
|
@ -139,25 +139,26 @@ public final class BCF2Decoder {
|
|||
return decodeTypedValue(typeDescriptor, size);
|
||||
}
|
||||
|
||||
@Requires("size >= 0")
|
||||
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
||||
if ( size == 0 ) {
|
||||
// missing value => null in java
|
||||
return null;
|
||||
} else if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency
|
||||
return decodeLiteralString(size);
|
||||
} else if ( size == 1 ) {
|
||||
return decodeSingleValue(type);
|
||||
} else {
|
||||
final ArrayList<Object> ints = new ArrayList<Object>(size);
|
||||
for ( int i = 0; i < size; i++ ) {
|
||||
final Object val = decodeSingleValue(type);
|
||||
if ( val == null ) continue; // auto-pruning. We remove trailing nulls
|
||||
ints.add(val);
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
if ( type == BCF2Type.CHAR ) { // special case string decoding for efficiency
|
||||
return decodeLiteralString(size);
|
||||
} else if ( size == 1 ) {
|
||||
return decodeSingleValue(type);
|
||||
} else {
|
||||
final ArrayList<Object> ints = new ArrayList<Object>(size);
|
||||
for ( int i = 0; i < size; i++ ) {
|
||||
final Object val = decodeSingleValue(type);
|
||||
if ( val == null ) continue; // auto-pruning. We remove trailing nulls
|
||||
ints.add(val);
|
||||
}
|
||||
return ints.isEmpty() ? null : ints; // return null when all of the values are null
|
||||
}
|
||||
return ints.isEmpty() ? null : ints; // return null when all of the values are null
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -256,7 +257,7 @@ public final class BCF2Decoder {
|
|||
* int elements are still forced to do a fresh allocation as well.
|
||||
* @return see description
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(type)", "size >= 0", "type != null"})
|
||||
@Requires({"type != null", "type.isIntegerType()", "size >= 0"})
|
||||
public final int[] decodeIntArray(final int size, final BCF2Type type, int[] maybeDest) {
|
||||
if ( size == 0 ) {
|
||||
return null;
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ import java.util.EnumSet;
|
|||
* @since 05/12
|
||||
*/
|
||||
public enum BCF2Type {
|
||||
MISSING(0, 0, 0x00),
|
||||
INT8 (1, 1, 0xFFFFFF80, -127, 127), // todo -- confirm range
|
||||
INT16(2, 2, 0xFFFF8000, -32767, 32767),
|
||||
INT32(3, 4, 0x80000000, -2147483647, 2147483647),
|
||||
|
|
@ -86,7 +87,7 @@ public enum BCF2Type {
|
|||
* @param v
|
||||
* @return
|
||||
*/
|
||||
@Requires("INTEGERS.contains(this)")
|
||||
@Requires("this.isIntegerType()")
|
||||
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
||||
|
||||
/**
|
||||
|
|
@ -108,7 +109,14 @@ public enum BCF2Type {
|
|||
/**
|
||||
* An enum set of the types that might represent Integer values
|
||||
*/
|
||||
public final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
||||
private final static EnumSet<BCF2Type> INTEGERS = EnumSet.of(INT8, INT16, INT32);
|
||||
|
||||
/**
|
||||
* @return true if this BCF2Type corresponds to the magic "MISSING" type (0x00)
|
||||
*/
|
||||
public boolean isMissingType() {
|
||||
return this == MISSING;
|
||||
}
|
||||
|
||||
public boolean isIntegerType() {
|
||||
return INTEGERS.contains(this);
|
||||
|
|
|
|||
|
|
@ -225,7 +225,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int value) {
|
||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
|
|
@ -235,7 +235,7 @@ public final class BCF2Utils {
|
|||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
|
|
@ -260,8 +260,8 @@ public final class BCF2Utils {
|
|||
* @param t2
|
||||
* @return
|
||||
*/
|
||||
@Requires({"BCF2Type.INTEGERS.contains(t1)","BCF2Type.INTEGERS.contains(t2)"})
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||
switch ( t1 ) {
|
||||
case INT8: return t2;
|
||||
|
|
@ -271,7 +271,7 @@ public final class BCF2Utils {
|
|||
}
|
||||
}
|
||||
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
@Ensures("result.isIntegerType()")
|
||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||
BCF2Type maxType = BCF2Type.INT8;
|
||||
for ( final int value : values ) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.util.ParsingUtils;
|
||||
|
|
@ -176,6 +177,10 @@ import java.util.*;
|
|||
* @author depristo
|
||||
*/
|
||||
public class VariantContext implements Feature { // to enable tribble integration
|
||||
private final static boolean WARN_ABOUT_BAD_END = true;
|
||||
final protected static Logger logger = Logger.getLogger(VariantContext.class);
|
||||
|
||||
|
||||
private boolean fullyDecoded = false;
|
||||
protected CommonInfo commonInfo = null;
|
||||
public final static double NO_LOG10_PERROR = CommonInfo.NO_LOG10_PERROR;
|
||||
|
|
@ -1146,10 +1151,16 @@ public class VariantContext implements Feature { // to enable tribble integratio
|
|||
if ( hasAttribute(VCFConstants.END_KEY) ) {
|
||||
final int end = getAttributeAsInt(VCFConstants.END_KEY, -1);
|
||||
assert end != -1;
|
||||
if ( end != getEnd() )
|
||||
throw new ReviewedStingException("Badly formed variant context at location " + getChr() + ":"
|
||||
if ( end != getEnd() && end != getEnd() + 1 ) {
|
||||
// the end is allowed to 1 bigger because of the padding
|
||||
final String message = "Badly formed variant context at location " + getChr() + ":"
|
||||
+ getStart() + "; getEnd() was " + getEnd()
|
||||
+ " but this VariantContext contains an END key with value " + end);
|
||||
+ " but this VariantContext contains an END key with value " + end;
|
||||
if ( WARN_ABOUT_BAD_END )
|
||||
logger.warn(message);
|
||||
else
|
||||
throw new ReviewedStingException(message);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,7 @@ import java.util.Map;
|
|||
*/
|
||||
@Invariant({
|
||||
"headerLine != null",
|
||||
"BCF2Type.INTEGERS.contains(dictionaryOffsetType)",
|
||||
"dictionaryOffsetType.isIntegerType()",
|
||||
"dictionaryOffset >= 0"
|
||||
})
|
||||
public abstract class BCF2FieldEncoder {
|
||||
|
|
|
|||
|
|
@ -338,7 +338,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
}
|
||||
|
||||
@Requires("! strings.isEmpty()")
|
||||
@Ensures("BCF2Type.INTEGERS.contains(result)")
|
||||
@Ensures("result.isIntegerType()")
|
||||
private final BCF2Type encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||
|
||||
|
|
|
|||
|
|
@ -39,6 +39,17 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test reading and writing breakpoint VCF", spec1);
|
||||
}
|
||||
|
||||
@Test(enabled = true)
|
||||
public void testReadingAndWriting1000GSVs() {
|
||||
String testVCF = privateTestDir + "1000G_SVs.chr1.vcf";
|
||||
|
||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
|
||||
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReadingAndWritingSamtools() {
|
||||
String testVCF = privateTestDir + "samtools.vcf";
|
||||
|
|
@ -59,12 +70,12 @@ public class VCFIntegrationTest extends WalkerTest {
|
|||
executeTest("Test writing samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
@Test(enabled = false)
|
||||
@Test(enabled = true)
|
||||
public void testReadingSamtoolsWExBCFExample() {
|
||||
String testVCF = privateTestDir + "ex2.bcf";
|
||||
String baseCommand = "-R " + b36KGReference + " --no_cmdline_in_header -o %s ";
|
||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("63a2e0484ae37b0680514f53e0bf0c94"));
|
||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("0439e2b4ccc63bb4ba7c283cd9ab1b25"));
|
||||
executeTest("Test reading samtools WEx BCF example", spec1);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -597,23 +597,41 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
||||
testReaderWriter(tester, data.header, data.vcs, data.vcs, true);
|
||||
}
|
||||
|
||||
public static void testReaderWriter(final VariantContextIOTest tester,
|
||||
final VCFHeader header,
|
||||
final List<VariantContext> expected,
|
||||
final Iterable<VariantContext> vcs,
|
||||
final boolean recurse) throws IOException {
|
||||
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
|
||||
tmpFile.deleteOnExit();
|
||||
|
||||
// todo -- test all options
|
||||
|
||||
// write
|
||||
// write expected to disk
|
||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||
writer.writeHeader(data.header);
|
||||
final List<VariantContext> expected = data.vcs;
|
||||
for ( VariantContext vc : expected )
|
||||
writer.add(vc);
|
||||
writer.close();
|
||||
writeVCsToFile(writer, header, vcs);
|
||||
|
||||
final Iterable<VariantContext> actual = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
||||
// ensure writing of expected == actual
|
||||
final Pair<VCFHeader, Iterable<VariantContext>> p = readAllVCs(tmpFile, tester.makeCodec());
|
||||
final Iterable<VariantContext> actual = p.getSecond();
|
||||
assertEquals(actual, expected);
|
||||
|
||||
if ( recurse ) {
|
||||
// if we are doing a recursive test, grab a fresh iterator over the written values
|
||||
final Iterable<VariantContext> read = readAllVCs(tmpFile, tester.makeCodec()).getSecond();
|
||||
testReaderWriter(tester, p.getFirst(), expected, read, false);
|
||||
}
|
||||
}
|
||||
|
||||
private static void writeVCsToFile(final VariantContextWriter writer, final VCFHeader header, final Iterable<VariantContext> vcs) {
|
||||
// write
|
||||
writer.writeHeader(header);
|
||||
for ( VariantContext vc : vcs )
|
||||
if (vc != null)
|
||||
writer.add(vc);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
Loading…
Reference in New Issue