Update BCF2 to include a minor version number so we can rev (and report errors) with BCF2
-- We are no likely to fail with an error when reading old BCF files, rather than just giving bad results -- Added new class BCFVersion that consolidates all of the version management of BCF
This commit is contained in:
parent
b4f4d86c77
commit
fb5dabce18
|
|
@ -53,6 +53,11 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||||
private final static boolean FORBID_SYMBOLICS = false;
|
private final static boolean FORBID_SYMBOLICS = false;
|
||||||
|
|
||||||
|
private final static int ALLOWED_MAJOR_VERSION = 2;
|
||||||
|
private final static int MIN_MINOR_VERSION = 1;
|
||||||
|
|
||||||
|
private BCFVersion bcfVersion = null;
|
||||||
|
|
||||||
private VCFHeader header = null;
|
private VCFHeader header = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -131,8 +136,16 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
|
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
|
||||||
try {
|
try {
|
||||||
// note that this reads the magic as well, and so does double duty
|
// note that this reads the magic as well, and so does double duty
|
||||||
if ( ! BCF2Utils.startsWithBCF2Magic(inputStream) )
|
bcfVersion = BCFVersion.readBCFVersion(inputStream);
|
||||||
error("Input stream does not begin with BCF2 magic");
|
if ( bcfVersion == null )
|
||||||
|
error("Input stream does not contain a BCF encoded file; BCF magic header info not found");
|
||||||
|
|
||||||
|
if ( bcfVersion.getMajorVersion() != ALLOWED_MAJOR_VERSION )
|
||||||
|
error("BCF2Codec can only process BCF2 files, this file has major version " + bcfVersion.getMajorVersion());
|
||||||
|
if ( bcfVersion.getMinorVersion() < MIN_MINOR_VERSION )
|
||||||
|
error("BCF2Codec can only process BCF2 files with minor version >= " + MIN_MINOR_VERSION + " but this file has minor version " + bcfVersion.getMinorVersion());
|
||||||
|
|
||||||
|
logger.info("BCF version " + bcfVersion);
|
||||||
|
|
||||||
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
final int headerSizeInBytes = BCF2Utils.readInt(BCF2Type.INT32.getSizeInBytes(), inputStream);
|
||||||
|
|
||||||
|
|
@ -187,7 +200,8 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
FileInputStream fis = null;
|
FileInputStream fis = null;
|
||||||
try {
|
try {
|
||||||
fis = new FileInputStream(path);
|
fis = new FileInputStream(path);
|
||||||
return BCF2Utils.startsWithBCF2Magic(fis);
|
final BCFVersion version = BCFVersion.readBCFVersion(fis);
|
||||||
|
return version != null && version.getMajorVersion() == ALLOWED_MAJOR_VERSION;
|
||||||
} catch ( FileNotFoundException e ) {
|
} catch ( FileNotFoundException e ) {
|
||||||
return false;
|
return false;
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
|
|
@ -196,7 +210,7 @@ public final class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||||
try {
|
try {
|
||||||
if ( fis != null ) fis.close();
|
if ( fis != null ) fis.close();
|
||||||
} catch ( IOException e ) {
|
} catch ( IOException e ) {
|
||||||
; // do nothing
|
// do nothing
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,8 +41,6 @@ import java.util.*;
|
||||||
* @since 5/12
|
* @since 5/12
|
||||||
*/
|
*/
|
||||||
public final class BCF2Utils {
|
public final class BCF2Utils {
|
||||||
public static final byte[] MAGIC_HEADER_LINE = "BCF\2".getBytes();
|
|
||||||
|
|
||||||
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
public static final int MAX_ALLELES_IN_GENOTYPES = 127;
|
||||||
|
|
||||||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||||
|
|
@ -75,7 +73,7 @@ public final class BCF2Utils {
|
||||||
*/
|
*/
|
||||||
@Requires("header != null")
|
@Requires("header != null")
|
||||||
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
@Ensures({"result != null", "new HashSet(result).size() == result.size()"})
|
||||||
public final static ArrayList<String> makeDictionary(final VCFHeader header) {
|
public static ArrayList<String> makeDictionary(final VCFHeader header) {
|
||||||
final Set<String> seen = new HashSet<String>();
|
final Set<String> seen = new HashSet<String>();
|
||||||
final ArrayList<String> dict = new ArrayList<String>();
|
final ArrayList<String> dict = new ArrayList<String>();
|
||||||
|
|
||||||
|
|
@ -96,43 +94,37 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires({"nElements >= 0", "type != null"})
|
@Requires({"nElements >= 0", "type != null"})
|
||||||
public final static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
public static byte encodeTypeDescriptor(final int nElements, final BCF2Type type ) {
|
||||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||||
return typeByte;
|
return typeByte;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public final static int decodeSize(final byte typeDescriptor) {
|
public static int decodeSize(final byte typeDescriptor) {
|
||||||
return (0xF0 & typeDescriptor) >> 4;
|
return (0xF0 & typeDescriptor) >> 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
public static int decodeTypeID(final byte typeDescriptor) {
|
||||||
return typeDescriptor & 0x0F;
|
return typeDescriptor & 0x0F;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public final static BCF2Type decodeType(final byte typeDescriptor) {
|
public static BCF2Type decodeType(final byte typeDescriptor) {
|
||||||
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
return ID_TO_ENUM[decodeTypeID(typeDescriptor)];
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
|
public static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("nElements >= 0")
|
@Requires("nElements >= 0")
|
||||||
public final static boolean willOverflow(final long nElements) {
|
public static boolean willOverflow(final long nElements) {
|
||||||
return nElements > MAX_INLINE_ELEMENTS;
|
return nElements > MAX_INLINE_ELEMENTS;
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static boolean startsWithBCF2Magic(final InputStream stream) throws IOException {
|
public static byte readByte(final InputStream stream) {
|
||||||
final byte[] magicBytes = new byte[BCF2Utils.MAGIC_HEADER_LINE.length];
|
|
||||||
stream.read(magicBytes);
|
|
||||||
return Arrays.equals(magicBytes, BCF2Utils.MAGIC_HEADER_LINE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public final static byte readByte(final InputStream stream) {
|
|
||||||
// TODO -- shouldn't be capturing error here
|
// TODO -- shouldn't be capturing error here
|
||||||
try {
|
try {
|
||||||
return (byte)(stream.read() & 0xFF);
|
return (byte)(stream.read() & 0xFF);
|
||||||
|
|
@ -151,7 +143,7 @@ public final class BCF2Utils {
|
||||||
*/
|
*/
|
||||||
@Requires({"strings != null", "strings.size() > 1"})
|
@Requires({"strings != null", "strings.size() > 1"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public static final String collapseStringList(final List<String> strings) {
|
public static String collapseStringList(final List<String> strings) {
|
||||||
final StringBuilder b = new StringBuilder();
|
final StringBuilder b = new StringBuilder();
|
||||||
for ( final String s : strings ) {
|
for ( final String s : strings ) {
|
||||||
if ( s != null ) {
|
if ( s != null ) {
|
||||||
|
|
@ -173,14 +165,14 @@ public final class BCF2Utils {
|
||||||
*/
|
*/
|
||||||
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
@Requires({"collapsed != null", "isCollapsedString(collapsed)"})
|
||||||
@Ensures("result != null")
|
@Ensures("result != null")
|
||||||
public static final List<String> explodeStringList(final String collapsed) {
|
public static List<String> explodeStringList(final String collapsed) {
|
||||||
assert isCollapsedString(collapsed);
|
assert isCollapsedString(collapsed);
|
||||||
final String[] exploded = collapsed.substring(1).split(",");
|
final String[] exploded = collapsed.substring(1).split(",");
|
||||||
return Arrays.asList(exploded);
|
return Arrays.asList(exploded);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Requires("s != null")
|
@Requires("s != null")
|
||||||
public static final boolean isCollapsedString(final String s) {
|
public static boolean isCollapsedString(final String s) {
|
||||||
return s.charAt(0) == ',';
|
return s.charAt(0) == ',';
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -222,7 +214,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result.isIntegerType()")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final int value) {
|
public static BCF2Type determineIntegerType(final int value) {
|
||||||
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
for ( final BCF2Type potentialType : INTEGER_TYPES_BY_SIZE) {
|
||||||
if ( potentialType.withinRange(value) )
|
if ( potentialType.withinRange(value) )
|
||||||
return potentialType;
|
return potentialType;
|
||||||
|
|
@ -232,7 +224,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result.isIntegerType()")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final int[] values) {
|
public static BCF2Type determineIntegerType(final int[] values) {
|
||||||
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
// literally a copy of the code below, but there's no general way to unify lists and arrays in java
|
||||||
BCF2Type maxType = BCF2Type.INT8;
|
BCF2Type maxType = BCF2Type.INT8;
|
||||||
for ( final int value : values ) {
|
for ( final int value : values ) {
|
||||||
|
|
@ -258,7 +250,7 @@ public final class BCF2Utils {
|
||||||
*/
|
*/
|
||||||
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
@Requires({"t1.isIntegerType()","t2.isIntegerType()"})
|
||||||
@Ensures("result.isIntegerType()")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
public static BCF2Type maxIntegerType(final BCF2Type t1, final BCF2Type t2) {
|
||||||
switch ( t1 ) {
|
switch ( t1 ) {
|
||||||
case INT8: return t2;
|
case INT8: return t2;
|
||||||
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
case INT16: return t2 == BCF2Type.INT32 ? t2 : t1;
|
||||||
|
|
@ -268,7 +260,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Ensures("result.isIntegerType()")
|
@Ensures("result.isIntegerType()")
|
||||||
public final static BCF2Type determineIntegerType(final List<Integer> values) {
|
public static BCF2Type determineIntegerType(final List<Integer> values) {
|
||||||
BCF2Type maxType = BCF2Type.INT8;
|
BCF2Type maxType = BCF2Type.INT8;
|
||||||
for ( final int value : values ) {
|
for ( final int value : values ) {
|
||||||
final BCF2Type type1 = determineIntegerType(value);
|
final BCF2Type type1 = determineIntegerType(value);
|
||||||
|
|
@ -293,7 +285,7 @@ public final class BCF2Utils {
|
||||||
* @param o
|
* @param o
|
||||||
* @return
|
* @return
|
||||||
*/
|
*/
|
||||||
public final static List<Object> toList(final Object o) {
|
public static List<Object> toList(final Object o) {
|
||||||
if ( o == null ) return Collections.emptyList();
|
if ( o == null ) return Collections.emptyList();
|
||||||
else if ( o instanceof List ) return (List<Object>)o;
|
else if ( o instanceof List ) return (List<Object>)o;
|
||||||
else return Collections.singletonList(o);
|
else return Collections.singletonList(o);
|
||||||
|
|
@ -301,7 +293,7 @@ public final class BCF2Utils {
|
||||||
|
|
||||||
|
|
||||||
@Requires({"stream != null", "bytesForEachInt > 0"})
|
@Requires({"stream != null", "bytesForEachInt > 0"})
|
||||||
public final static int readInt(int bytesForEachInt, final InputStream stream) {
|
public static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||||
switch ( bytesForEachInt ) {
|
switch ( bytesForEachInt ) {
|
||||||
case 1: {
|
case 1: {
|
||||||
return (byte)(readByte(stream));
|
return (byte)(readByte(stream));
|
||||||
|
|
@ -319,7 +311,7 @@ public final class BCF2Utils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
public static void encodeRawBytes(final int value, final BCF2Type type, final OutputStream encodeStream) throws IOException {
|
||||||
switch ( type.getSizeInBytes() ) {
|
switch ( type.getSizeInBytes() ) {
|
||||||
case 1:
|
case 1:
|
||||||
encodeStream.write(0xFF & value);
|
encodeStream.write(0xFF & value);
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,80 @@
|
||||||
|
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Simple holder for BCF version information
|
||||||
|
*
|
||||||
|
* User: depristo
|
||||||
|
* Date: 8/2/12
|
||||||
|
* Time: 2:16 PM
|
||||||
|
*/
|
||||||
|
public class BCFVersion {
|
||||||
|
/**
|
||||||
|
* BCF2 begins with the MAGIC info BCF_M_m where M is the major version (currently 2)
|
||||||
|
* and m is the minor version, currently 1
|
||||||
|
*/
|
||||||
|
public static final byte[] MAGIC_HEADER_START = "BCF".getBytes();
|
||||||
|
|
||||||
|
final int majorVersion;
|
||||||
|
final int minorVersion;
|
||||||
|
|
||||||
|
public BCFVersion(int majorVersion, int minorVersion) {
|
||||||
|
this.majorVersion = majorVersion;
|
||||||
|
this.minorVersion = minorVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the major version number of this BCF file
|
||||||
|
*/
|
||||||
|
public int getMajorVersion() {
|
||||||
|
return majorVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return the minor version number of this BCF file
|
||||||
|
*/
|
||||||
|
public int getMinorVersion() {
|
||||||
|
return minorVersion;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return a new BCFVersion object describing the major and minor version of the BCF file in stream
|
||||||
|
*
|
||||||
|
* Note that stream must be at the very start of the file.
|
||||||
|
*
|
||||||
|
* @param stream
|
||||||
|
* @return a BCFVersion object, or null if stream doesn't contain a BCF file
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public static BCFVersion readBCFVersion(final InputStream stream) throws IOException {
|
||||||
|
final byte[] magicBytes = new byte[MAGIC_HEADER_START.length];
|
||||||
|
stream.read(magicBytes);
|
||||||
|
if ( Arrays.equals(magicBytes, MAGIC_HEADER_START) ) {
|
||||||
|
// we're a BCF file
|
||||||
|
final int majorByte = stream.read();
|
||||||
|
final int minorByte = stream.read();
|
||||||
|
return new BCFVersion( majorByte, minorByte );
|
||||||
|
} else
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write out the BCF magic information indicating this is a BCF file with corresponding major and minor versions
|
||||||
|
* @param out
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
public void write(final OutputStream out) throws IOException {
|
||||||
|
out.write(MAGIC_HEADER_START);
|
||||||
|
out.write(getMajorVersion() & 0xFF);
|
||||||
|
out.write(getMinorVersion() & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return String.format("BCF%d.%d", getMajorVersion(), getMinorVersion());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -31,6 +31,7 @@ import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Codec;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Type;
|
||||||
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCF2Utils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.bcf2.BCFVersion;
|
||||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
@ -83,6 +84,9 @@ import java.util.*;
|
||||||
* @since 06/12
|
* @since 06/12
|
||||||
*/
|
*/
|
||||||
class BCF2Writer extends IndexingVariantContextWriter {
|
class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
|
public static final int MAJOR_VERSION = 2;
|
||||||
|
public static final int MINOR_VERSION = 1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
* If true, we will write out the undecoded raw bytes for a genotypes block, if it
|
||||||
* is found in the input VC. This can be very dangerous as the genotype encoding
|
* is found in the input VC. This can be very dangerous as the genotype encoding
|
||||||
|
|
@ -153,7 +157,7 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
||||||
writer.close();
|
writer.close();
|
||||||
|
|
||||||
final byte[] headerBytes = capture.toByteArray();
|
final byte[] headerBytes = capture.toByteArray();
|
||||||
outputStream.write(BCF2Utils.MAGIC_HEADER_LINE);
|
new BCFVersion(MAJOR_VERSION, MINOR_VERSION).write(outputStream);
|
||||||
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
|
BCF2Utils.encodeRawBytes(headerBytes.length, BCF2Type.INT32, outputStream);
|
||||||
outputStream.write(headerBytes);
|
outputStream.write(headerBytes);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,7 @@ public class VCFIntegrationTest extends WalkerTest {
|
||||||
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
String baseCommand = "-R " + b37KGReference + " --no_cmdline_in_header -o %s ";
|
||||||
|
|
||||||
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
String test1 = baseCommand + "-T SelectVariants -V " + testVCF;
|
||||||
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList(""));
|
WalkerTestSpec spec1 = new WalkerTestSpec(test1, 1, Arrays.asList("bdab26dd7648a806dbab01f64db2bdab"));
|
||||||
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
executeTest("Test reading and writing 1000G Phase I SVs", spec1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue