Move BCF2 from private utils to public codecs
This commit is contained in:
parent
d13cda6b6f
commit
679ffdd333
|
|
@ -0,0 +1,414 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
public class BCF2Codec implements FeatureCodec<VariantContext> {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Codec.class);
|
||||
private VCFHeader header = null;
|
||||
private final ArrayList<String> contigNames = new ArrayList<String>();
|
||||
private final ArrayList<String> dictionary = new ArrayList<String>();
|
||||
private final BCF2Decoder decoder = new BCF2Decoder();
|
||||
private boolean skipGenotypes = false;
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Feature codec interface functions
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public Feature decodeLoc( final PositionalBufferedStream inputStream ) {
|
||||
return decode(inputStream);
|
||||
// TODO: a less expensive version of decodeLoc() that doesn't use VariantContext
|
||||
// TODO: very easy -- just decodeSitesBlock, and then skip to end of end of sites block
|
||||
// TODO: and then skip genotypes block
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContext decode( final PositionalBufferedStream inputStream ) {
|
||||
final VariantContextBuilder builder = new VariantContextBuilder();
|
||||
|
||||
final int sitesBlockSize = decoder.readBlockSize(inputStream);
|
||||
final int genotypeBlockSize = decoder.readBlockSize(inputStream);
|
||||
decoder.readNextBlock(sitesBlockSize, inputStream);
|
||||
final SitesInfoForDecoding info = decodeSitesBlock(builder);
|
||||
|
||||
if ( isSkippingGenotypes() ) {
|
||||
decoder.skipNextBlock(genotypeBlockSize, inputStream);
|
||||
} else {
|
||||
decoder.readNextBlock(genotypeBlockSize, inputStream);
|
||||
decodeGenotypes(info, builder);
|
||||
}
|
||||
|
||||
return builder.make();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Class<VariantContext> getFeatureType() {
|
||||
return VariantContext.class;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FeatureCodecHeader readHeader( final PositionalBufferedStream inputStream ) {
|
||||
AsciiLineReader headerReader = new AsciiLineReader(inputStream);
|
||||
String headerLine;
|
||||
List<String> headerLines = new ArrayList<String>();
|
||||
boolean foundHeaderEnd = false;
|
||||
|
||||
try {
|
||||
while ( ! foundHeaderEnd && (headerLine = headerReader.readLine()) != null) {
|
||||
if ( headerLine.startsWith(VCFHeader.METADATA_INDICATOR) ) {
|
||||
headerLines.add(headerLine);
|
||||
}
|
||||
else if ( headerLine.startsWith(VCFHeader.HEADER_INDICATOR) ) {
|
||||
headerLines.add(headerLine);
|
||||
foundHeaderEnd = true;
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBCF2("Reached end of header without encountering a field layout line");
|
||||
}
|
||||
}
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 header");
|
||||
}
|
||||
|
||||
if ( ! foundHeaderEnd ) {
|
||||
throw new UserException.MalformedBCF2("Reached end of header without encountering a field layout line");
|
||||
}
|
||||
|
||||
// read the header
|
||||
this.header = AbstractVCFCodec.parseHeader(headerLines, VCFHeaderVersion.VCF4_1);
|
||||
|
||||
// create the config offsets
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines())
|
||||
contigNames.add(contig.getID());
|
||||
|
||||
// create the string dictionary
|
||||
parseDictionary(header);
|
||||
|
||||
// position right before next line (would be right before first real record byte at end of header)
|
||||
return new FeatureCodecHeader(header, inputStream.getPosition());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean canDecode( final String path ) {
|
||||
try {
|
||||
FileInputStream fis = new FileInputStream(path);
|
||||
AsciiLineReader reader = new AsciiLineReader(new PositionalBufferedStream(fis));
|
||||
String firstLine = reader.readLine();
|
||||
if ( firstLine != null && firstLine.equals(BCF2Constants.VERSION_LINE) ) {
|
||||
return true;
|
||||
}
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private final void parseDictionary(final VCFHeader header) {
|
||||
for ( final VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line.getKey().equals(BCF2Constants.DICTIONARY_LINE_TAG) ) {
|
||||
for ( final String string : line.getValue().split(BCF2Constants.DICTIONARY_LINE_ENTRY_SEPARATOR) )
|
||||
dictionary.add(string);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// if we got here we never found a dictionary, or there are no elements in the dictionary
|
||||
if ( dictionary.size() == 0 )
|
||||
throw new UserException.MalformedBCF2("Dictionary header element was absent or empty");
|
||||
}
|
||||
|
||||
public boolean isSkippingGenotypes() {
|
||||
return skipGenotypes;
|
||||
}
|
||||
|
||||
public void setSkipGenotypes(final boolean skipGenotypes) {
|
||||
this.skipGenotypes = skipGenotypes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implicit block
|
||||
//
|
||||
// The first four records of BCF are inline untype encoded data of:
|
||||
//
|
||||
// 4 byte integer chrom offset
|
||||
// 4 byte integer start
|
||||
// 4 byte integer ref length
|
||||
// 4 byte float qual
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private final SitesInfoForDecoding decodeSitesBlock(final VariantContextBuilder builder) {
|
||||
final int contigOffset = decoder.decodeInt(BCFType.INT32.getSizeInBytes());
|
||||
final String contig = lookupContigName(contigOffset);
|
||||
builder.chr(contig);
|
||||
|
||||
final int pos = decoder.decodeInt(BCFType.INT32.getSizeInBytes());
|
||||
final int refLength = decoder.decodeInt(BCFType.INT32.getSizeInBytes());
|
||||
builder.start((long)pos);
|
||||
builder.stop((long)(pos + refLength - 1)); // minus one because of our open intervals
|
||||
|
||||
final Object qual = decoder.decodeSingleValue(BCFType.FLOAT);
|
||||
if ( qual != null ) {
|
||||
builder.log10PError(((Double)qual) / -10.0);
|
||||
}
|
||||
|
||||
final int nAlleleInfo = decoder.decodeInt(BCFType.INT32.getSizeInBytes());
|
||||
final int nFormatSamples = decoder.decodeInt(BCFType.INT32.getSizeInBytes());
|
||||
final int nAlleles = nAlleleInfo >> 16;
|
||||
final int nInfo = nAlleleInfo & 0x00FF;
|
||||
final int nFormatFields = nFormatSamples >> 24;
|
||||
final int nSamples = nFormatSamples & 0x0FFF;
|
||||
|
||||
decodeID(builder);
|
||||
final ArrayList<Allele> alleles = decodeAlleles(builder, pos, nAlleles);
|
||||
decodeFilter(builder);
|
||||
decodeInfo(builder, nInfo);
|
||||
|
||||
return new SitesInfoForDecoding(pos, nFormatFields, nSamples, alleles);
|
||||
}
|
||||
|
||||
private final static class SitesInfoForDecoding {
|
||||
final int pos;
|
||||
final int nFormatFields;
|
||||
final int nSamples;
|
||||
final ArrayList<Allele> alleles;
|
||||
|
||||
private SitesInfoForDecoding(final int pos, final int nFormatFields, final int nSamples, final ArrayList<Allele> alleles) {
|
||||
this.pos = pos;
|
||||
this.nFormatFields = nFormatFields;
|
||||
this.nSamples = nSamples;
|
||||
this.alleles = alleles;
|
||||
}
|
||||
}
|
||||
|
||||
private void decodeID( final VariantContextBuilder builder ) {
|
||||
final String id = (String)decoder.decodeTypedValue();
|
||||
|
||||
if ( id == null ) {
|
||||
builder.noID();
|
||||
}
|
||||
else {
|
||||
builder.id(id);
|
||||
}
|
||||
}
|
||||
|
||||
public static ArrayList<Allele> clipAllelesIfNecessary(int position, String ref, ArrayList<Allele> unclippedAlleles) {
|
||||
if ( AbstractVCFCodec.isSingleNucleotideEvent(unclippedAlleles) ) {
|
||||
ArrayList<Allele> clippedAlleles = new ArrayList<Allele>(unclippedAlleles.size());
|
||||
AbstractVCFCodec.clipAlleles(position, ref, unclippedAlleles, clippedAlleles, -1);
|
||||
return clippedAlleles;
|
||||
} else
|
||||
return unclippedAlleles;
|
||||
}
|
||||
|
||||
private ArrayList<Allele> decodeAlleles( final VariantContextBuilder builder, final int pos, final int nAlleles ) {
|
||||
// TODO -- probably need inline decoder for efficiency here (no sense in going bytes -> string -> vector -> bytes
|
||||
ArrayList<Allele> alleles = new ArrayList<Allele>(nAlleles);
|
||||
String ref = null;
|
||||
|
||||
for ( int i = 0; i < nAlleles; i++ ) {
|
||||
final String allele = (String)decoder.decodeTypedValue();
|
||||
|
||||
if ( i == 0 ) {
|
||||
ref = allele;
|
||||
alleles.add(Allele.create(allele, true));
|
||||
} else {
|
||||
alleles.add(Allele.create(allele, false));
|
||||
}
|
||||
}
|
||||
|
||||
alleles = clipAllelesIfNecessary(pos, ref, alleles);
|
||||
builder.alleles(alleles);
|
||||
|
||||
builder.referenceBaseForIndel(ref.getBytes()[0]);
|
||||
|
||||
return alleles;
|
||||
}
|
||||
|
||||
private void decodeFilter( final VariantContextBuilder builder ) {
|
||||
final Object filters = decoder.decodeTypedValue();
|
||||
|
||||
if ( filters == null ) {
|
||||
builder.unfiltered();
|
||||
}
|
||||
else {
|
||||
builder.filters(new LinkedHashSet<String>(asStrings(filters)));
|
||||
}
|
||||
}
|
||||
|
||||
private void decodeInfo( final VariantContextBuilder builder, final int numInfoFields ) {
|
||||
final Map<String, Object> infoFieldEntries = new HashMap<String, Object>(numInfoFields);
|
||||
|
||||
for ( int i = 0; i < numInfoFields; i++ ) {
|
||||
final String key = getDictionaryString();
|
||||
Object value = decoder.decodeTypedValue();
|
||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, key);
|
||||
if ( metaData.getType() == VCFHeaderLineType.Flag ) value = true; // special case for flags
|
||||
infoFieldEntries.put(key, value);
|
||||
}
|
||||
|
||||
builder.attributes(infoFieldEntries);
|
||||
}
|
||||
|
||||
private void decodeGenotypes( final SitesInfoForDecoding siteInfo, final VariantContextBuilder builder ) {
|
||||
final List<String> samples = new ArrayList<String>(header.getGenotypeSamples());
|
||||
final int nSamples = siteInfo.nSamples;
|
||||
final int nFields = siteInfo.nFormatFields;
|
||||
final Map<String, List<Object>> fieldValues = decodeGenotypeFieldValues(nFields, nSamples);
|
||||
|
||||
if ( samples.size() != nSamples )
|
||||
throw new UserException.MalformedBCF2("GATK currently doesn't support reading BCF2 files with " +
|
||||
"different numbers of samples per record. Saw " + samples.size() +
|
||||
" samples in header but have a record with " + nSamples + " samples");
|
||||
|
||||
final List<Genotype> genotypes = new ArrayList<Genotype>(nSamples);
|
||||
for ( int i = 0; i < nSamples; i++ ) {
|
||||
final String sampleName = samples.get(i);
|
||||
List<Allele> alleles = null;
|
||||
boolean isPhased = false;
|
||||
double log10PError = VariantContext.NO_LOG10_PERROR;
|
||||
Set<String> filters = null;
|
||||
Map<String, Object> attributes = null;
|
||||
double[] log10Likelihoods = null;
|
||||
|
||||
for ( final Map.Entry<String, List<Object>> entry : fieldValues.entrySet() ) {
|
||||
final String field = entry.getKey();
|
||||
final List<Object> values = entry.getValue();
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
alleles = decodeGenotypeAlleles(siteInfo.alleles, (List<Integer>)values.get(i));
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
final Integer value = (Integer)values.get(i);
|
||||
if ( value != BCFType.INT8.getMissingJavaValue() )
|
||||
log10PError = value / -10.0;
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
throw new ReviewedStingException("Genotype filters not implemented in GATK BCF2");
|
||||
//filters = new HashSet<String>(values.get(i));
|
||||
} else { // add to attributes
|
||||
if ( attributes == null ) attributes = new HashMap<String, Object>(nFields);
|
||||
attributes.put(field, values.get(i));
|
||||
}
|
||||
}
|
||||
|
||||
if ( alleles == null ) throw new ReviewedStingException("BUG: no alleles found");
|
||||
|
||||
final Genotype g = new Genotype(sampleName, alleles, log10PError, filters, attributes, isPhased, log10Likelihoods);
|
||||
genotypes.add(g);
|
||||
}
|
||||
|
||||
builder.genotypes(genotypes);
|
||||
}
|
||||
|
||||
private final List<Allele> decodeGenotypeAlleles(final ArrayList<Allele> siteAlleles, final List<Integer> encoded) {
|
||||
final List<Allele> gt = new ArrayList<Allele>(encoded.size());
|
||||
for ( final Integer encode : encoded ) {
|
||||
if ( encode == null ) // absent, as are all following by definition
|
||||
return gt;
|
||||
else {
|
||||
final int offset = encode >> 1;
|
||||
if ( offset == 0 )
|
||||
gt.add(Allele.NO_CALL);
|
||||
else
|
||||
gt.add(siteAlleles.get(offset - 1));
|
||||
}
|
||||
}
|
||||
return gt;
|
||||
}
|
||||
|
||||
private final Map<String, List<Object>> decodeGenotypeFieldValues(final int nFields, final int nSamples) {
|
||||
final Map<String, List<Object>> map = new LinkedHashMap<String, List<Object>>(nFields);
|
||||
|
||||
for ( int i = 0; i < nFields; i++ ) {
|
||||
final String field = getDictionaryString();
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final List<Object> values = new ArrayList<Object>(nSamples);
|
||||
for ( int j = 0; j < nSamples; j++ )
|
||||
values.add(decoder.decodeTypedValue(typeDescriptor));
|
||||
map.put(field, values);
|
||||
}
|
||||
|
||||
return map;
|
||||
}
|
||||
|
||||
private final String getDictionaryString() {
|
||||
final int offset = (Integer)decoder.decodeTypedValue();
|
||||
final String field = dictionary.get(offset);
|
||||
return field;
|
||||
}
|
||||
|
||||
private final String lookupContigName( final int contigOffset ) {
|
||||
if ( contigOffset < contigNames.size() ) {
|
||||
return contigNames.get(contigOffset);
|
||||
}
|
||||
else {
|
||||
throw new UserException.MalformedBCF2(String.format("No contig at index %d present in the sequence dictionary from the BCF2 header (%s)", contigOffset, contigNames));
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Utility functions
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private final Collection<String> asStrings(final Object o) {
|
||||
return asCollection(String.class, o);
|
||||
}
|
||||
|
||||
private final <T> Collection<T> asCollection(final Class<T> c, final Object o) {
|
||||
if ( o == null )
|
||||
return Collections.emptyList();
|
||||
else if ( o instanceof List ) {
|
||||
return (List<T>)o;
|
||||
} else {
|
||||
return (Set<T>)Collections.singleton(o);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
|
||||
import java.nio.charset.Charset;
|
||||
|
||||
public class BCF2Constants {
|
||||
public static final String VERSION_LINE_FORMAT = "fileformat=BCF2v%d.%d";
|
||||
public static final String VERSION_LINE = String.format(VCFHeader.METADATA_INDICATOR + VERSION_LINE_FORMAT, 0, 1);
|
||||
public static final String DICTIONARY_LINE_TAG = "dictionary";
|
||||
public static final String DICTIONARY_LINE_ENTRY_SEPARATOR = ",";
|
||||
|
||||
public static final Charset BCF2_TEXT_CHARSET = Charset.forName("US-ASCII"); // TODO: enforce this!
|
||||
|
||||
// Note that these values are prefixed by FFFFFF for convenience
|
||||
public static final int INT8_MISSING_VALUE = 0xFFFFFF80;
|
||||
public static final int INT16_MISSING_VALUE = 0xFFFF8000;
|
||||
public static final int INT32_MISSING_VALUE = 0x80000000;
|
||||
public static final int FLOAT_MISSING_VALUE = 0x7F800001;
|
||||
}
|
||||
|
|
@ -0,0 +1,277 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
public class BCF2Decoder {
|
||||
final protected static Logger logger = Logger.getLogger(FeatureCodec.class);
|
||||
|
||||
byte[] recordBytes;
|
||||
ByteArrayInputStream recordStream;
|
||||
|
||||
public BCF2Decoder() {
|
||||
// nothing to do
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new decoder ready to read BCF2 data from the byte[] recordBytes, for testing purposes
|
||||
*
|
||||
* @param recordBytes
|
||||
*/
|
||||
protected BCF2Decoder(final byte[] recordBytes) {
|
||||
setRecordBytes(recordBytes);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Routines to load, set, skip blocks of underlying data we are decoding
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Reads the next record from input stream and prepare this decoder to decode values from it
|
||||
*
|
||||
* @param stream
|
||||
* @return
|
||||
*/
|
||||
public void readNextBlock(final int blockSizeInBytes, final InputStream stream) {
|
||||
setRecordBytes(readRecordBytes(blockSizeInBytes, stream));
|
||||
}
|
||||
|
||||
/**
|
||||
* Skips the next record from input stream, invalidating current block data
|
||||
*
|
||||
* @param stream
|
||||
* @return
|
||||
*/
|
||||
public void skipNextBlock(final int blockSizeInBytes, final InputStream stream) {
|
||||
try {
|
||||
final int bytesRead = (int)stream.skip(blockSizeInBytes);
|
||||
validateReadBytes(bytesRead, blockSizeInBytes);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
|
||||
}
|
||||
this.recordBytes = null;
|
||||
this.recordStream = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the byte[] for the block of data we are currently decoding
|
||||
* @return
|
||||
*/
|
||||
public byte[] getRecordBytes() {
|
||||
return recordBytes;
|
||||
}
|
||||
|
||||
/**
|
||||
* The size of the current block in bytes
|
||||
*
|
||||
* @return
|
||||
*/
|
||||
public int getBlockSize() {
|
||||
return recordBytes.length;
|
||||
}
|
||||
|
||||
public boolean blockIsFullyDecoded() {
|
||||
return recordStream.available() == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use the recordBytes[] to read BCF2 records from now on
|
||||
*
|
||||
* @param recordBytes
|
||||
*/
|
||||
public void setRecordBytes(final byte[] recordBytes) {
|
||||
this.recordBytes = recordBytes;
|
||||
this.recordStream = new ByteArrayInputStream(recordBytes);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// High-level decoder
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
public final Object decodeTypedValue() {
|
||||
final byte typeDescriptor = readTypeDescriptor();
|
||||
return decodeTypedValue(typeDescriptor);
|
||||
}
|
||||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||
final int size = TypeDescriptor.sizeIsOverflow(typeDescriptor) ? decodeVectorSize() : TypeDescriptor.decodeSize(typeDescriptor);
|
||||
final BCFType type = TypeDescriptor.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
||||
if ( size == 0 ) {
|
||||
return null;
|
||||
} else if ( type == BCFType.CHAR ) { // special case string decoding for efficiency
|
||||
return decodeLiteralString(size);
|
||||
} else if ( size == 1 ) {
|
||||
return decodeSingleValue(type);
|
||||
} else {
|
||||
final ArrayList<Object> ints = new ArrayList<Object>(size);
|
||||
for ( int i = 0; i < size; i++ ) {
|
||||
ints.add(decodeSingleValue(type));
|
||||
}
|
||||
return ints;
|
||||
}
|
||||
}
|
||||
|
||||
public final Object decodeSingleValue(final BCFType type) {
|
||||
// TODO -- decodeTypedValue should integrate this routine
|
||||
final int value = readInt(type.getSizeInBytes(), recordStream);
|
||||
|
||||
if ( value == type.getMissingBytes() )
|
||||
return null;
|
||||
else {
|
||||
switch (type) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: return value;
|
||||
case FLOAT: return (double)rawFloatToFloat(value);
|
||||
case CHAR: return value & 0xFF; // TODO -- I cannot imagine why we'd get here, as string needs to be special cased
|
||||
default: throw new ReviewedStingException("BCF2 codec doesn't know how to decode type " + type );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Decode raw primitive data types (ints, floats, and strings)
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
private final String decodeLiteralString(final int size) {
|
||||
// TODO -- assumes size > 0
|
||||
final byte[] bytes = new byte[size]; // TODO -- in principle should just grab bytes from underlying array
|
||||
try {
|
||||
recordStream.read(bytes);
|
||||
return new String(bytes);
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
}
|
||||
}
|
||||
|
||||
private final int decodeVectorSize() {
|
||||
final byte typeDescriptor = readTypeDescriptor();
|
||||
final int size = TypeDescriptor.decodeSize(typeDescriptor);
|
||||
final BCFType type = TypeDescriptor.decodeType(typeDescriptor);
|
||||
|
||||
assert size == 1;
|
||||
assert type == BCFType.INT8 || type == BCFType.INT16 || type == BCFType.INT32;
|
||||
|
||||
return decodeInt(type.getSizeInBytes());
|
||||
}
|
||||
|
||||
public final int decodeInt(int bytesForEachInt) {
|
||||
return readInt(bytesForEachInt, recordStream);
|
||||
}
|
||||
|
||||
public final float rawFloatToFloat(final int rawFloat) {
|
||||
return Float.intBitsToFloat(rawFloat);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
//
|
||||
// Utility functions
|
||||
//
|
||||
// ----------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Read the size of the next block from inputStream
|
||||
*
|
||||
* @param inputStream
|
||||
* @return
|
||||
*/
|
||||
public final int readBlockSize(final InputStream inputStream) {
|
||||
return readInt(4, inputStream);
|
||||
}
|
||||
|
||||
/**
|
||||
*
|
||||
* @param inputStream
|
||||
* @return
|
||||
*/
|
||||
private final static byte[] readRecordBytes(final int blockSizeInBytes, final InputStream inputStream) {
|
||||
final byte[] record = new byte[blockSizeInBytes];
|
||||
try {
|
||||
final int bytesRead = inputStream.read(record);
|
||||
validateReadBytes(bytesRead, blockSizeInBytes);
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotReadInputFile("I/O error while reading BCF2 file", e);
|
||||
}
|
||||
|
||||
return record;
|
||||
}
|
||||
|
||||
private final static void validateReadBytes(final int actuallyRead, final int expected) {
|
||||
if ( actuallyRead < expected ) {
|
||||
throw new UserException.MalformedBCF2(String.format("Failed to read next complete record: %s",
|
||||
actuallyRead == -1 ?
|
||||
"premature end of input stream" :
|
||||
String.format("expected %d bytes but read only %d", expected, actuallyRead)));
|
||||
}
|
||||
}
|
||||
|
||||
public final byte readTypeDescriptor() {
|
||||
return readByte(recordStream);
|
||||
}
|
||||
|
||||
private final static byte readByte(final InputStream stream) {
|
||||
try {
|
||||
return (byte)(stream.read() & 0xFF);
|
||||
} catch ( IOException e ) {
|
||||
throw new ReviewedStingException("readByte failure", e);
|
||||
}
|
||||
}
|
||||
|
||||
private final static int readInt(int bytesForEachInt, final InputStream stream) {
|
||||
switch ( bytesForEachInt ) {
|
||||
case 1: {
|
||||
return (byte)(readByte(stream));
|
||||
} case 2: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
return (short)((b1 << 8) | b2);
|
||||
} case 4: {
|
||||
final int b1 = readByte(stream) & 0xFF;
|
||||
final int b2 = readByte(stream) & 0xFF;
|
||||
final int b3 = readByte(stream) & 0xFF;
|
||||
final int b4 = readByte(stream) & 0xFF;
|
||||
return (int)(b1 << 24 | b2 << 16 | b3 << 8 | b4);
|
||||
} default: throw new ReviewedStingException("Unexpected size during decoding");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,234 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Simple BCF2 encoder
|
||||
*
|
||||
* @author depristo
|
||||
* @since 5/12
|
||||
*/
|
||||
public class BCF2Encoder {
|
||||
// TODO -- increase default size?
|
||||
public static final int WRITE_BUFFER_INITIAL_SIZE = 16384;
|
||||
private ByteArrayOutputStream encodeStream = new ByteArrayOutputStream(WRITE_BUFFER_INITIAL_SIZE);
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Functions to return the data being encoded here
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public int getRecordSizeInBytes() {
|
||||
return encodeStream.size();
|
||||
}
|
||||
|
||||
public byte[] getRecordBytes() {
|
||||
byte[] bytes = encodeStream.toByteArray();
|
||||
encodeStream.reset();
|
||||
return bytes;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Super-high level interface
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Totally generic encoder that examines o, determines the best way to encode it, and encodes it
|
||||
* @param o
|
||||
* @return
|
||||
*/
|
||||
public final BCFType encode(final Object o) throws IOException {
|
||||
if ( o == null ) throw new ReviewedStingException("Generic encode cannot deal with null values");
|
||||
|
||||
if ( o instanceof String ) {
|
||||
return encodeString((String)o);
|
||||
} else if ( o instanceof List ) {
|
||||
final BCFType type = determinePrimitiveType(((List) o).get(0));
|
||||
encodeTypedVector((List) o, type);
|
||||
return type;
|
||||
} else {
|
||||
final BCFType type = determinePrimitiveType(o);
|
||||
encodeTypedSingleton(o, type);
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Writing typed values (have type byte)
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final void encodeTypedMissing(final BCFType type) throws IOException {
|
||||
encodeTypedVector(Collections.emptyList(), type);
|
||||
}
|
||||
|
||||
// todo -- should be specialized for each object type for efficiency
|
||||
public final void encodeTypedSingleton(final Object v, final BCFType type) throws IOException {
|
||||
encodeTypedVector(Collections.singleton(v), type);
|
||||
}
|
||||
|
||||
public final BCFType encodeString(final String v) throws IOException {
|
||||
// TODO -- this needs to be optimized
|
||||
final byte[] bytes = v.getBytes();
|
||||
final List<Byte> l = new ArrayList<Byte>(bytes.length);
|
||||
for ( int i = 0; i < bytes.length; i++) l.add(bytes[i]);
|
||||
encodeTypedVector(l, BCFType.CHAR);
|
||||
return BCFType.CHAR;
|
||||
}
|
||||
|
||||
public final <T extends Object> void encodeTypedVector(final Collection<T> v, final BCFType type) throws IOException {
|
||||
encodeType(v.size(), type);
|
||||
encodeRawValues(v, type);
|
||||
}
|
||||
|
||||
public final BCFType encodeTypedIntOfBestSize(final int value) throws IOException {
|
||||
final BCFType type = determineIntegerType(value);
|
||||
encodeTypedSingleton(value, type);
|
||||
return type;
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Writing raw values (don't have a type byte)
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final <T extends Object> void encodeRawValues(final Collection<T> v, final BCFType type) throws IOException {
|
||||
for ( final T v1 : v ) {
|
||||
encodeRawValue(v1, type);
|
||||
}
|
||||
}
|
||||
|
||||
public final <T extends Object> void encodeRawValue(final T value, final BCFType type) throws IOException {
|
||||
if ( value == type.getMissingJavaValue() )
|
||||
encodeRawMissingValue(type);
|
||||
else {
|
||||
switch (type) {
|
||||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodePrimitive((Integer)value, type); break;
|
||||
case FLOAT: encodeRawFloat((Float) value, type); break;
|
||||
case CHAR: encodeRawChar((Byte) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public final void encodeRawMissingValue(final BCFType type) throws IOException {
|
||||
encodePrimitive(type.getMissingBytes(), type);
|
||||
}
|
||||
|
||||
public final void encodeRawMissingValues(final int size, final BCFType type) throws IOException {
|
||||
for ( int i = 0; i < size; i++ )
|
||||
encodeRawMissingValue(type);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// low-level encoders
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final void encodeRawChar(final byte c) throws IOException {
|
||||
encodeStream.write(c);
|
||||
}
|
||||
|
||||
public final void encodeRawFloat(final float value, final BCFType type) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits(value), type);
|
||||
}
|
||||
|
||||
public final void encodeType(final int size, final BCFType type) throws IOException {
|
||||
final byte typeByte = TypeDescriptor.encodeTypeDescriptor(size, type);
|
||||
encodeStream.write(typeByte);
|
||||
if ( TypeDescriptor.willOverflow(size) )
|
||||
encodeTypedIntOfBestSize(size);
|
||||
}
|
||||
|
||||
public final void encodeRawInt(final int value, final BCFType type) throws IOException {
|
||||
encodePrimitive(value, type, encodeStream);
|
||||
}
|
||||
|
||||
public final void encodePrimitive(final int value, final BCFType type) throws IOException {
|
||||
encodePrimitive(value, type, encodeStream);
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// utility functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
public final BCFType determineIntegerType(final List<Integer> values) {
|
||||
BCFType maxType = BCFType.INT8;
|
||||
for ( final int value : values ) {
|
||||
final BCFType type1 = determineIntegerType(value);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: maxType = BCFType.INT16; break;
|
||||
case INT32: return BCFType.INT32; // fast path for largest possible value
|
||||
default: throw new ReviewedStingException("Unexpected integer type " + type1 );
|
||||
}
|
||||
}
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final BCFType determineIntegerType(final int value) {
|
||||
for ( final BCFType potentialType : TypeDescriptor.INTEGER_TYPES_BY_SIZE ) {
|
||||
if ( potentialType.withinRange(value) )
|
||||
return potentialType;
|
||||
}
|
||||
|
||||
throw new ReviewedStingException("Integer cannot be encoded in allowable range of even INT32: " + value);
|
||||
}
|
||||
|
||||
private final BCFType determinePrimitiveType(final Object v) {
|
||||
if ( v instanceof Integer )
|
||||
return determineIntegerType((Integer)v);
|
||||
else if ( v instanceof Float )
|
||||
return BCFType.FLOAT;
|
||||
else
|
||||
throw new ReviewedStingException("No native encoding for Object of type " + v.getClass().getSimpleName());
|
||||
}
|
||||
|
||||
public final static void encodePrimitive(final int value, final BCFType type, final OutputStream encodeStream) throws IOException {
|
||||
for ( int i = type.getSizeInBytes() - 1; i >= 0; i-- ) {
|
||||
final int shift = i * 8;
|
||||
int mask = 0xFF << shift;
|
||||
int byteValue = (mask & value) >> shift;
|
||||
encodeStream.write(byteValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,136 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.commandline.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.RodWalker;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Testing BCF2
|
||||
*
|
||||
* @author Mark DePristo
|
||||
* @since 2012
|
||||
*/
|
||||
public class BCF2TestWalker extends RodWalker<Integer, Integer> {
|
||||
/**
|
||||
* Variants from this VCF file are used by this tool as input.
|
||||
* The file must at least contain the standard VCF header lines, but
|
||||
* can be empty (i.e., no variants are contained in the file).
|
||||
*/
|
||||
@Input(fullName="variant", shortName = "V", doc="Input VCF file", required=true)
|
||||
public RodBinding<VariantContext> variants;
|
||||
|
||||
@Argument(doc="keep variants", required=false)
|
||||
public boolean keepVariants = false;
|
||||
|
||||
@Argument(doc="quiet", required=false)
|
||||
public boolean quiet = false;
|
||||
|
||||
@Argument(doc="dontIndexOnTheFly", required=false)
|
||||
public boolean dontIndexOnTheFly = false;
|
||||
|
||||
@Output(doc="File to which results should be written",required=true)
|
||||
protected File bcfFile;
|
||||
|
||||
private final List<VariantContext> vcs = new ArrayList<VariantContext>();
|
||||
protected BCF2Writer writer;
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
final Map<String, VCFHeader> vcfRods = VCFUtils.getVCFHeadersFromRods(getToolkit(), Collections.singletonList(variants));
|
||||
final VCFHeader header = VCFUtils.withUpdatedContigs(vcfRods.values().iterator().next(), getToolkit());
|
||||
try {
|
||||
writer = new BCF2Writer("out", bcfFile, new FileOutputStream(bcfFile),
|
||||
getToolkit().getMasterSequenceDictionary(), ! dontIndexOnTheFly );
|
||||
writer.writeHeader(header);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, e);
|
||||
}
|
||||
}
|
||||
|
||||
public Integer map(RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( tracker == null ) // RodWalkers can make funky map calls
|
||||
return 0;
|
||||
|
||||
for ( VariantContext vc : tracker.getValues(variants, context.getLocation())) {
|
||||
writer.add(vc);
|
||||
if ( keepVariants ) vcs.add(vc);
|
||||
}
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// default reduce -- doesn't do anything at all
|
||||
//
|
||||
public Integer reduceInit() { return 0; }
|
||||
public Integer reduce(Integer counter, Integer sum) { return counter + sum; }
|
||||
|
||||
public void onTraversalDone(Integer sum) {
|
||||
try {
|
||||
writer.close();
|
||||
logger.info("Closed writer");
|
||||
|
||||
// read in the BCF records
|
||||
BCF2Codec codec = new BCF2Codec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(bcfFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
Iterator<VariantContext> it = vcs.iterator();
|
||||
while ( ! pbs.isDone() ) {
|
||||
if ( keepVariants ) {
|
||||
VariantContext expected = it.next();
|
||||
if ( ! quiet )
|
||||
System.out.printf("vcf = %s %d %s%n", expected.getChr(), expected.getStart(), expected);
|
||||
}
|
||||
VariantContext bcfRaw = codec.decode(pbs);
|
||||
VariantContext bcf = new VariantContextBuilder(bcfRaw).source("variant").make();
|
||||
if ( ! quiet ) {
|
||||
System.out.printf("bcf = %s %d %s%n", bcf.getChr(), bcf.getStart(), bcf.toString());
|
||||
System.out.printf("--------------------------------------------------%n");
|
||||
}
|
||||
}
|
||||
|
||||
} catch ( IOException e ) {
|
||||
throw new UserException.CouldNotCreateOutputFile(bcfFile, "bad user!");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,387 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Allele;
|
||||
import org.broadinstitute.sting.utils.variantcontext.Genotype;
|
||||
import org.broadinstitute.sting.utils.variantcontext.GenotypesContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.util.*;
|
||||
|
||||
public class BCF2Writer extends IndexingVCFWriter {
|
||||
final protected static Logger logger = Logger.getLogger(BCF2Writer.class);
|
||||
private final static boolean doNotWriteGenotypes = false;
|
||||
private OutputStream outputStream; // Note: do not flush until completely done writing, to avoid issues with eventual BGZF support
|
||||
private VCFHeader header;
|
||||
private Map<String, Integer> contigDictionary = new HashMap<String, Integer>();
|
||||
private Map<String, Integer> stringDictionary = new LinkedHashMap<String, Integer>();
|
||||
|
||||
private final BCF2Encoder encoder = new BCF2Encoder(); // initialized after the header arrives
|
||||
|
||||
public BCF2Writer(final String name, final File location, final OutputStream output, final SAMSequenceDictionary refDict, final boolean enableOnTheFlyIndexing) {
|
||||
super(name, location, output, refDict, enableOnTheFlyIndexing);
|
||||
this.outputStream = getOutputStream();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Interface functions
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Override
|
||||
public void writeHeader(final VCFHeader header) {
|
||||
this.header = header;
|
||||
|
||||
// create the config offsets map
|
||||
for ( final VCFContigHeaderLine contig : header.getContigLines())
|
||||
contigDictionary.put(contig.getID(), contig.getContigIndex());
|
||||
|
||||
// set up the strings dictionary
|
||||
int offset = 0;
|
||||
stringDictionary.put(VCFConstants.PASSES_FILTERS_v4, offset++); // special case the special PASS field
|
||||
for ( VCFHeaderLine line : header.getMetaData() ) {
|
||||
if ( line instanceof VCFIDHeaderLine ) {
|
||||
VCFIDHeaderLine idLine = (VCFIDHeaderLine)line;
|
||||
stringDictionary.put(idLine.getID(), offset++);
|
||||
}
|
||||
}
|
||||
|
||||
// add the dictionary ##dictionary=x,y,z line to the header
|
||||
final String dictionaryLineValue = Utils.join(BCF2Constants.DICTIONARY_LINE_ENTRY_SEPARATOR, stringDictionary.keySet());
|
||||
header.addMetaDataLine(new VCFHeaderLine(BCF2Constants.DICTIONARY_LINE_TAG, dictionaryLineValue));
|
||||
|
||||
// write out the header
|
||||
StandardVCFWriter.writeHeader(header, new OutputStreamWriter(outputStream), doNotWriteGenotypes, BCF2Constants.VERSION_LINE, "BCF2 stream");
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add( final VariantContext initialVC ) {
|
||||
final VariantContext vc = initialVC.fullyDecode(header);
|
||||
super.add(vc); // allow on the fly indexing
|
||||
|
||||
try {
|
||||
final byte[] infoBlock = buildSitesData(vc);
|
||||
final byte[] genotypesBlock = buildSamplesData(vc);
|
||||
|
||||
// write the two blocks to disk
|
||||
writeBlock(infoBlock, genotypesBlock);
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException("Error writing record to BCF2 file: " + vc.toString(), e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
try {
|
||||
outputStream.flush();
|
||||
outputStream.close();
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new UserException("Failed to close BCF2 file");
|
||||
}
|
||||
super.close();
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// implicit block
|
||||
//
|
||||
// The first four records of BCF are inline untype encoded data of:
|
||||
//
|
||||
// 4 byte integer chrom offset
|
||||
// 4 byte integer start
|
||||
// 4 byte integer ref length
|
||||
// 4 byte float qual
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
private byte[] buildSitesData( VariantContext vc ) throws IOException {
|
||||
final int contigIndex = contigDictionary.get(vc.getChr());
|
||||
if ( contigIndex == -1 )
|
||||
throw new UserException(String.format("Contig %s not found in sequence dictionary from reference", vc.getChr()));
|
||||
|
||||
// note use of encodeRawValue to not insert the typing byte
|
||||
encoder.encodeRawValue(contigIndex, BCFType.INT32);
|
||||
|
||||
// pos
|
||||
encoder.encodeRawValue(vc.getStart(), BCFType.INT32);
|
||||
|
||||
// ref length
|
||||
encoder.encodeRawValue(vc.getEnd() - vc.getStart() + 1, BCFType.INT32);
|
||||
|
||||
// qual
|
||||
if ( vc.hasLog10PError() )
|
||||
encoder.encodeRawFloat((float) vc.getPhredScaledQual(), BCFType.FLOAT);
|
||||
else
|
||||
encoder.encodeRawMissingValue(BCFType.FLOAT);
|
||||
|
||||
// info fields
|
||||
final int nAlleles = vc.getNAlleles();
|
||||
final int nInfo = vc.getAttributes().size();
|
||||
final int nGenotypeFormatFields = StandardVCFWriter.calcVCFGenotypeKeys(vc).size();
|
||||
final int nSamples = vc.getNSamples();
|
||||
|
||||
encoder.encodeRawInt((nAlleles << 16) | (nInfo & 0x00FF), BCFType.INT32);
|
||||
encoder.encodeRawInt((nGenotypeFormatFields << 24) | (nSamples & 0x0FFF), BCFType.INT32);
|
||||
|
||||
buildID(vc);
|
||||
buildAlleles(vc);
|
||||
buildFilter(vc);
|
||||
buildInfo(vc);
|
||||
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private void buildID( VariantContext vc ) throws IOException {
|
||||
encoder.encodeString(vc.getID());
|
||||
}
|
||||
|
||||
private void buildAlleles( VariantContext vc ) throws IOException {
|
||||
for ( final Allele allele : vc.getAlleles() ) {
|
||||
final String s = vc.getAlleleWithRefPadding(allele);
|
||||
encoder.encodeString(s);
|
||||
}
|
||||
}
|
||||
|
||||
private void buildFilter( VariantContext vc ) throws IOException {
|
||||
if ( vc.isFiltered() ) {
|
||||
encodeStringsByRef(vc.getFilters());
|
||||
} else {
|
||||
encoder.encodeTypedMissing(BCFType.INT32);
|
||||
}
|
||||
}
|
||||
|
||||
private void buildInfo( VariantContext vc ) throws IOException {
|
||||
for ( Map.Entry<String, Object> infoFieldEntry : vc.getAttributes().entrySet() ) {
|
||||
final String key = infoFieldEntry.getKey();
|
||||
Object value = infoFieldEntry.getValue();
|
||||
|
||||
final VCFToBCFType typeEquiv = getBCF2TypeFromHeader(key, value);
|
||||
// handle the special FLAG case -- super annoying
|
||||
if ( typeEquiv.vcfType == VCFHeaderLineType.Flag ) value = 1;
|
||||
|
||||
encodeStringByRef(key);
|
||||
if ( value instanceof List ) // NOTE: ONLY WORKS WITH LISTS
|
||||
encoder.encodeTypedVector((List) value, typeEquiv.bcfType);
|
||||
else if ( value instanceof String )
|
||||
encoder.encodeString((String)value);
|
||||
else
|
||||
encoder.encodeTypedSingleton(value, typeEquiv.bcfType);
|
||||
}
|
||||
}
|
||||
|
||||
private byte[] buildSamplesData(final VariantContext vc) throws IOException {
|
||||
// write size
|
||||
List<String> genotypeFields = StandardVCFWriter.calcVCFGenotypeKeys(vc);
|
||||
for ( final String field : genotypeFields ) {
|
||||
if ( field.equals(VCFConstants.GENOTYPE_KEY) ) {
|
||||
addGenotypes(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_QUALITY_KEY) ) {
|
||||
addGQ(vc);
|
||||
} else if ( field.equals(VCFConstants.GENOTYPE_FILTER_KEY) ) {
|
||||
addGenotypeFilters(vc);
|
||||
} else {
|
||||
addGenericGenotypeField(vc, field);
|
||||
}
|
||||
}
|
||||
|
||||
return encoder.getRecordBytes();
|
||||
}
|
||||
|
||||
private final int getNGenotypeFieldValues(final String field, final VariantContext vc) {
|
||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
||||
int nFields = metaData.getCount(vc.getAlternateAlleles().size());
|
||||
if ( nFields == -1 ) { // unbounded, need to look at values
|
||||
return computeMaxSizeOfGenotypeFieldFromValues(field, vc);
|
||||
} else {
|
||||
return nFields;
|
||||
}
|
||||
}
|
||||
|
||||
private final int computeMaxSizeOfGenotypeFieldFromValues(final String field, final VariantContext vc) {
|
||||
int size = 1;
|
||||
final GenotypesContext gc = vc.getGenotypes();
|
||||
|
||||
for ( final Genotype g : gc ) {
|
||||
final Object o = g.getAttribute(field);
|
||||
if ( o == null ) continue;
|
||||
if ( o instanceof List ) {
|
||||
// only do compute if first value is of type list
|
||||
final List values = (List)g.getAttribute(field);
|
||||
if ( values != null )
|
||||
size = Math.max(size, values.size());
|
||||
} else {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return size;
|
||||
}
|
||||
|
||||
private final void addGenericGenotypeField(final VariantContext vc, final String field) throws IOException {
|
||||
final int numInFormatField = getNGenotypeFieldValues(field, vc);
|
||||
final VCFToBCFType type = getBCF2TypeFromHeader(field, null);
|
||||
|
||||
startGenotypeField(field, numInFormatField, type.bcfType);
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( ! g.hasAttribute(field) ) {
|
||||
encoder.encodeRawMissingValues(numInFormatField, type.bcfType);
|
||||
} else {
|
||||
final Object val = g.getAttribute(field);
|
||||
final Collection<Object> vals = numInFormatField == 1 ? Collections.singleton(val) : (Collection)val;
|
||||
encoder.encodeRawValues(vals, type.bcfType);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final class VCFToBCFType {
|
||||
VCFHeaderLineType vcfType;
|
||||
BCFType bcfType;
|
||||
|
||||
private VCFToBCFType(final VCFHeaderLineType vcfType, final BCFType bcfType) {
|
||||
this.vcfType = vcfType;
|
||||
this.bcfType = bcfType;
|
||||
}
|
||||
}
|
||||
|
||||
// TODO -- we really need explicit converters as first class objects
|
||||
private final VCFToBCFType getBCF2TypeFromHeader(final String field, final Object maybeIntValue) {
|
||||
// TODO -- need to generalize so we can enable vectors of compressed genotype ints
|
||||
final VCFCompoundHeaderLine metaData = VariantContext.getMetaDataForField(header, field);
|
||||
|
||||
// TODO -- no sense in allocating these over and over
|
||||
switch ( metaData.getType() ) {
|
||||
case Character: return new VCFToBCFType(metaData.getType(), BCFType.CHAR);
|
||||
case Flag: return new VCFToBCFType(metaData.getType(), BCFType.INT8);
|
||||
case String: return new VCFToBCFType(metaData.getType(), BCFType.CHAR);
|
||||
case Integer: return new VCFToBCFType(metaData.getType(), maybeIntValue != null ? encoder.determineIntegerType((Integer)maybeIntValue) : BCFType.INT32);
|
||||
case Float: return new VCFToBCFType(metaData.getType(), BCFType.FLOAT);
|
||||
default: throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
}
|
||||
}
|
||||
|
||||
private final void addGenotypeFilters(final VariantContext vc) throws IOException {
|
||||
logger.warn("Skipping genotype filter field");
|
||||
// // TODO -- FIXME -- string is wrong here -- need to compute string size...
|
||||
// startGenotypeField(VCFConstants.GENOTYPE_FILTER_KEY, 1, BCFType.CHAR);
|
||||
// for ( final Genotype g : vc.getGenotypes() ) {
|
||||
// if ( g.filtersWereApplied() && g.isFiltered() ) {
|
||||
// encoder.encodeString(ParsingUtils.join(";", ParsingUtils.sortList(g.getFilters())));
|
||||
// } else {
|
||||
// encoder.encodeRawMissingValues(1, BCFType.CHAR); // todo fixme
|
||||
// }
|
||||
// }
|
||||
}
|
||||
|
||||
private final void addGQ(final VariantContext vc) throws IOException {
|
||||
startGenotypeField(VCFConstants.GENOTYPE_QUALITY_KEY, 1, BCFType.INT8);
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.hasLog10PError() ) {
|
||||
final int GQ = (int)Math.round(Math.min(g.getPhredScaledQual(), VCFConstants.MAX_GENOTYPE_QUAL));
|
||||
if ( GQ > VCFConstants.MAX_GENOTYPE_QUAL ) throw new ReviewedStingException("Unexpectedly large GQ " + GQ + " at " + vc);
|
||||
encoder.encodeRawValue(GQ, BCFType.INT8);
|
||||
} else {
|
||||
encoder.encodeRawMissingValues(1, BCFType.INT8);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private final void addGenotypes(final VariantContext vc) throws IOException {
|
||||
if ( vc.getNAlleles() > 127 )
|
||||
throw new ReviewedStingException("Current BCF2 encoder cannot handle sites " +
|
||||
"with > 127 alleles, but you have " + vc.getNAlleles() + " at "
|
||||
+ vc.getChr() + ":" + vc.getStart());
|
||||
|
||||
final Map<Allele, String> alleleMap = StandardVCFWriter.buildAlleleMap(vc);
|
||||
final int requiredPloidy = 2; // TODO -- handle ploidy, will need padding / depadding
|
||||
startGenotypeField(VCFConstants.GENOTYPE_KEY, requiredPloidy, BCFType.INT8);
|
||||
for ( final Genotype g : vc.getGenotypes() ) {
|
||||
if ( g.getPloidy() != requiredPloidy ) throw new ReviewedStingException("Cannot currently handle non-diploid calls!");
|
||||
final List<Integer> encoding = new ArrayList<Integer>(requiredPloidy);
|
||||
for ( final Allele a : g.getAlleles() ) {
|
||||
final int offset = a.isNoCall() ? -1 : Integer.valueOf(alleleMap.get(a));
|
||||
encoding.add(((offset+1) << 1) | (g.isPhased() ? 0x01 : 0x00));
|
||||
}
|
||||
encoder.encodeRawValues(encoding, BCFType.INT8);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write the data in the encoder to the outputstream as a length encoded
|
||||
* block of data. After this call the encoder stream will be ready to
|
||||
* start a new data block
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
private void writeBlock(final byte[] infoBlock, final byte[] genotypesBlock) throws IOException {
|
||||
BCF2Encoder.encodePrimitive(infoBlock.length, BCFType.INT32, outputStream);
|
||||
BCF2Encoder.encodePrimitive(genotypesBlock.length, BCFType.INT32, outputStream);
|
||||
outputStream.write(infoBlock);
|
||||
outputStream.write(genotypesBlock);
|
||||
}
|
||||
|
||||
public final BCFType encodeStringByRef(final String string) throws IOException {
|
||||
return encodeStringsByRef(Collections.singleton(string));
|
||||
}
|
||||
|
||||
public final BCFType encodeStringsByRef(final Collection<String> strings) throws IOException {
|
||||
final List<Integer> offsets = new ArrayList<Integer>(strings.size());
|
||||
BCFType maxType = BCFType.INT8; // start with the smallest size
|
||||
|
||||
// iterate over strings until we find one that needs 16 bits, and break
|
||||
for ( final String string : strings ) {
|
||||
final int offset = stringDictionary.get(string);
|
||||
offsets.add(offset);
|
||||
final BCFType type1 = encoder.determineIntegerType(offset);
|
||||
switch ( type1 ) {
|
||||
case INT8: break;
|
||||
case INT16: if ( maxType == BCFType.INT8 ) maxType = BCFType.INT16; break;
|
||||
case INT32: maxType = BCFType.INT32; break;
|
||||
default: throw new ReviewedStingException("Unexpected type " + type1);
|
||||
}
|
||||
}
|
||||
|
||||
// we've checked the types for all strings, so write them out
|
||||
encoder.encodeTypedVector(offsets, maxType);
|
||||
return maxType;
|
||||
}
|
||||
|
||||
public final void startGenotypeField(final String key, final int size, final BCFType valueType) throws IOException {
|
||||
encodeStringByRef(key);
|
||||
encoder.encodeType(size, valueType);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
/**
|
||||
* BCF2 types and information
|
||||
*
|
||||
* @author depristo
|
||||
* @since 05/12
|
||||
*/
|
||||
public enum BCFType {
|
||||
RESERVED_0,
|
||||
INT8(1, BCF2Constants.INT8_MISSING_VALUE, -127, 127), // todo -- confirm range
|
||||
INT16(2, BCF2Constants.INT16_MISSING_VALUE, -32767, 32767),
|
||||
INT32(4, BCF2Constants.INT32_MISSING_VALUE, -2147483647, 2147483647),
|
||||
RESERVED_4,
|
||||
FLOAT(4, BCF2Constants.FLOAT_MISSING_VALUE),
|
||||
RESERVED_6,
|
||||
CHAR;
|
||||
|
||||
private final Object missingJavaValue;
|
||||
private final int missingBytes;
|
||||
private final int sizeInBytes;
|
||||
private final long minValue, maxValue;
|
||||
|
||||
BCFType() {
|
||||
this(-1, 0, 0, 0);
|
||||
}
|
||||
|
||||
BCFType(final int sizeInBytes, final int missingBytes) {
|
||||
this(sizeInBytes, missingBytes, 0, 0);
|
||||
}
|
||||
|
||||
BCFType(final int sizeInBytes, final int missingBytes, final long minValue, final long maxValue) {
|
||||
this.sizeInBytes = sizeInBytes;
|
||||
this.missingJavaValue = null;
|
||||
this.missingBytes = missingBytes;
|
||||
this.minValue = minValue;
|
||||
this.maxValue = maxValue;
|
||||
}
|
||||
|
||||
public int getSizeInBytes() {
|
||||
return sizeInBytes;
|
||||
}
|
||||
public int getID() { return ordinal(); }
|
||||
public final boolean withinRange(final long v) { return v >= minValue && v <= maxValue; }
|
||||
public Object getMissingJavaValue() { return missingJavaValue; }
|
||||
public int getMissingBytes() { return missingBytes; }
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
/**
|
||||
* Simple BCF decoder
|
||||
* @author Mark DePristo
|
||||
* @since 5/3/12
|
||||
*/
|
||||
public class TypeDescriptor {
|
||||
public static final int OVERFLOW_ELEMENT_MARKER = 15;
|
||||
public static final int MAX_INLINE_ELEMENTS = 14;
|
||||
|
||||
public final static BCFType[] INTEGER_TYPES_BY_SIZE = new BCFType[3];
|
||||
public final static BCFType[] DICTIONARY_TYPES_BY_SIZE = INTEGER_TYPES_BY_SIZE;
|
||||
private final static BCFType[] lookup = BCFType.values();
|
||||
|
||||
static {
|
||||
INTEGER_TYPES_BY_SIZE[0] = BCFType.INT8;
|
||||
INTEGER_TYPES_BY_SIZE[1] = BCFType.INT16;
|
||||
INTEGER_TYPES_BY_SIZE[2] = BCFType.INT32;
|
||||
}
|
||||
|
||||
public final static byte encodeTypeDescriptor(final int nElements, final BCFType type ) {
|
||||
int encodeSize = Math.min(nElements, OVERFLOW_ELEMENT_MARKER);
|
||||
byte typeByte = (byte)((0x0F & encodeSize) << 4 | (type.getID() & 0x0F));
|
||||
return typeByte;
|
||||
}
|
||||
|
||||
public final static int decodeSize(final byte typeDescriptor) {
|
||||
return (0xF0 & typeDescriptor) >> 4;
|
||||
}
|
||||
|
||||
public final static int decodeTypeID(final byte typeDescriptor) {
|
||||
return typeDescriptor & 0x0F;
|
||||
}
|
||||
|
||||
public final static BCFType decodeType(final byte typeDescriptor) {
|
||||
return lookup[decodeTypeID(typeDescriptor)];
|
||||
}
|
||||
|
||||
public final static boolean sizeIsOverflow(final byte typeDescriptor) {
|
||||
return decodeSize(typeDescriptor) == OVERFLOW_ELEMENT_MARKER;
|
||||
}
|
||||
|
||||
public final static boolean willOverflow(final long nElements) {
|
||||
return nElements > MAX_INLINE_ELEMENTS;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,362 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.utils.codecs.bcf2;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class EncoderDecoderUnitTest extends BaseTest {
|
||||
private final float FLOAT_TOLERANCE = (float)1e-8;
|
||||
final List<BCF2TypedValue> primitives = new ArrayList<BCF2TypedValue>();
|
||||
final List<BCF2TypedValue> basicTypes = new ArrayList<BCF2TypedValue>();
|
||||
final List<BCF2TypedValue> forCombinations = new ArrayList<BCF2TypedValue>();
|
||||
|
||||
@BeforeSuite
|
||||
public void before() {
|
||||
basicTypes.add(new BCF2TypedValue(1, BCFType.INT8));
|
||||
basicTypes.add(new BCF2TypedValue(1000, BCFType.INT16));
|
||||
basicTypes.add(new BCF2TypedValue(1000000, BCFType.INT32));
|
||||
basicTypes.add(new BCF2TypedValue(1.2345e6, BCFType.FLOAT));
|
||||
basicTypes.add(new BCF2TypedValue(new Byte((byte)'A'), BCFType.CHAR));
|
||||
|
||||
// small ints
|
||||
primitives.add(new BCF2TypedValue(0, BCFType.INT8));
|
||||
primitives.add(new BCF2TypedValue(10, BCFType.INT8));
|
||||
primitives.add(new BCF2TypedValue(-1, BCFType.INT8));
|
||||
primitives.add(new BCF2TypedValue(100, BCFType.INT8));
|
||||
primitives.add(new BCF2TypedValue(-100, BCFType.INT8));
|
||||
primitives.add(new BCF2TypedValue(-127, BCFType.INT8)); // last value in range
|
||||
primitives.add(new BCF2TypedValue( 127, BCFType.INT8)); // last value in range
|
||||
|
||||
// medium ints
|
||||
primitives.add(new BCF2TypedValue(-1000, BCFType.INT16));
|
||||
primitives.add(new BCF2TypedValue(1000, BCFType.INT16));
|
||||
primitives.add(new BCF2TypedValue(-128, BCFType.INT16)); // first value in range
|
||||
primitives.add(new BCF2TypedValue( 128, BCFType.INT16)); // first value in range
|
||||
primitives.add(new BCF2TypedValue(-32767, BCFType.INT16)); // last value in range
|
||||
primitives.add(new BCF2TypedValue( 32767, BCFType.INT16)); // last value in range
|
||||
|
||||
// larger ints
|
||||
primitives.add(new BCF2TypedValue(-32768, BCFType.INT32)); // first value in range
|
||||
primitives.add(new BCF2TypedValue( 32768, BCFType.INT32)); // first value in range
|
||||
primitives.add(new BCF2TypedValue(-100000, BCFType.INT32));
|
||||
primitives.add(new BCF2TypedValue(100000, BCFType.INT32));
|
||||
primitives.add(new BCF2TypedValue(-2147483647, BCFType.INT32));
|
||||
primitives.add(new BCF2TypedValue(2147483647, BCFType.INT32));
|
||||
|
||||
// floats
|
||||
primitives.add(new BCF2TypedValue(0.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-0.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.1, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.1, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(5.0 / 3.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-5.0 / 3.0, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.23e3, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.23e6, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.23e9, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.23e12, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(1.23e15, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.23e3, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.23e6, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.23e9, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.23e12, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(-1.23e15, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.MIN_VALUE, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.MAX_VALUE, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.NEGATIVE_INFINITY, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.POSITIVE_INFINITY, BCFType.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.NaN, BCFType.FLOAT));
|
||||
|
||||
// strings
|
||||
//primitives.add(new BCF2TypedValue("", BCFType.CHAR)); <- will be null (which is right)
|
||||
primitives.add(new BCF2TypedValue("S", BCFType.CHAR));
|
||||
primitives.add(new BCF2TypedValue("S2", BCFType.CHAR));
|
||||
primitives.add(new BCF2TypedValue("12345678910", BCFType.CHAR));
|
||||
primitives.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZ", BCFType.CHAR));
|
||||
primitives.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ", BCFType.CHAR));
|
||||
|
||||
// missing values
|
||||
for ( BCFType type : BCFType.values() ) {
|
||||
primitives.add(new BCF2TypedValue(null, type));
|
||||
}
|
||||
|
||||
forCombinations.add(new BCF2TypedValue(10, BCFType.INT8));
|
||||
forCombinations.add(new BCF2TypedValue(100, BCFType.INT8));
|
||||
forCombinations.add(new BCF2TypedValue(-100, BCFType.INT8));
|
||||
forCombinations.add(new BCF2TypedValue(-128, BCFType.INT16)); // first value in range
|
||||
forCombinations.add(new BCF2TypedValue( 128, BCFType.INT16)); // first value in range
|
||||
forCombinations.add(new BCF2TypedValue(-100000, BCFType.INT32));
|
||||
forCombinations.add(new BCF2TypedValue(100000, BCFType.INT32));
|
||||
forCombinations.add(new BCF2TypedValue(0.0, BCFType.FLOAT));
|
||||
forCombinations.add(new BCF2TypedValue(1.23e6, BCFType.FLOAT));
|
||||
forCombinations.add(new BCF2TypedValue(-1.23e6, BCFType.FLOAT));
|
||||
forCombinations.add(new BCF2TypedValue("S", BCFType.CHAR));
|
||||
forCombinations.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZ", BCFType.CHAR));
|
||||
forCombinations.add(new BCF2TypedValue("ABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZABCDEFGHIJKLMNOPQRSTUVWXYZ", BCFType.CHAR));
|
||||
|
||||
// missing values
|
||||
for ( BCFType type : BCFType.values() ) {
|
||||
forCombinations.add(new BCF2TypedValue(null, type));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// merge case Provider
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
private class BCF2TypedValue {
|
||||
final BCFType type;
|
||||
final Object value;
|
||||
|
||||
private BCF2TypedValue(final int value, final BCFType type) {
|
||||
this(new Integer(value), type);
|
||||
}
|
||||
|
||||
private BCF2TypedValue(final double value, final BCFType type) {
|
||||
this(new Float(value), type);
|
||||
}
|
||||
|
||||
private BCF2TypedValue(final Object value, final BCFType type) {
|
||||
this.type = type;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public boolean isMissing() { return value == null; }
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return String.format("%s of %s", value, type);
|
||||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
|
||||
public Object[][] BCF2EncodingTestProviderSingletons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv : primitives )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderBasicTypes")
|
||||
public Object[][] BCF2EncodingTestProviderBasicTypes() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv : basicTypes )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSequences")
|
||||
public Object[][] BCF2EncodingTestProviderSequences() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv1 : forCombinations )
|
||||
for ( BCF2TypedValue tv2 : forCombinations )
|
||||
for ( BCF2TypedValue tv3 : forCombinations )
|
||||
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
|
||||
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
List<Object> expected = Collections.nCopies(length, tv.value);
|
||||
encoder.encodeTypedVector(expected, tv.type);
|
||||
|
||||
BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
final Object decoded = decoder.decodeTypedValue();
|
||||
|
||||
if ( tv.type == BCFType.CHAR ) {
|
||||
Assert.assertTrue(decoded instanceof String);
|
||||
final String decodedString = (String)decoded;
|
||||
Assert.assertTrue(decodedString.length() == length);
|
||||
} else {
|
||||
Assert.assertTrue(decoded instanceof List);
|
||||
final List<Object> decodedList = (List<Object>)decoded;
|
||||
Assert.assertEquals(decodedList.size(), expected.size());
|
||||
for ( Object decodedValue : decodedList )
|
||||
myAssertEquals(tv, decodedValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
if ( tv.type != BCFType.CHAR ) {
|
||||
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
|
||||
final byte td = TypeDescriptor.encodeTypeDescriptor(1, tv.type);
|
||||
|
||||
final BCF2Encoder encoder = new BCF2Encoder();
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
|
||||
}
|
||||
|
||||
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
final Object decoded = decoder.decodeTypedValue(td);
|
||||
myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
|
||||
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences")
|
||||
public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block) throws IOException {
|
||||
testReadAndSkipWithMultipleBlocks(block, forCombinations);
|
||||
testReadAndSkipWithMultipleBlocks(forCombinations, block);
|
||||
}
|
||||
|
||||
public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block1, final List<BCF2TypedValue> block2) throws IOException {
|
||||
final byte[] record1 = encodeRecord(block1);
|
||||
final byte[] record2 = encodeRecord(block2);
|
||||
|
||||
// each record is individually good
|
||||
decodeRecord(block1, record1);
|
||||
decodeRecord(block2, record2);
|
||||
|
||||
BCF2Decoder decoder = new BCF2Decoder();
|
||||
|
||||
// test setting
|
||||
decoder.setRecordBytes(record1);
|
||||
decodeRecord(block1, decoder);
|
||||
decoder.setRecordBytes(record2);
|
||||
decodeRecord(block2, decoder);
|
||||
|
||||
// test combining the streams
|
||||
final byte[] combined = combineRecords(record1, record2);
|
||||
final List<BCF2TypedValue> combinedObjects = new ArrayList<BCF2TypedValue>(block1);
|
||||
combinedObjects.addAll(block2);
|
||||
|
||||
// the combined bytes is the same as the combined objects
|
||||
InputStream stream = new ByteArrayInputStream(combined);
|
||||
decoder.readNextBlock(record1.length, stream);
|
||||
decodeRecord(block1, decoder);
|
||||
decoder.readNextBlock(record2.length, stream);
|
||||
decodeRecord(block2, decoder);
|
||||
|
||||
// skipping the first block allows us to read the second block directly
|
||||
stream = new ByteArrayInputStream(combined);
|
||||
decoder.skipNextBlock(record1.length, stream);
|
||||
decoder.readNextBlock(record2.length, stream);
|
||||
decodeRecord(block2, decoder);
|
||||
}
|
||||
|
||||
private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
baos.write(record1);
|
||||
baos.write(record2);
|
||||
return baos.toByteArray();
|
||||
}
|
||||
|
||||
private final byte[] encodeRecord(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
BCF2Encoder encoder = new BCF2Encoder();
|
||||
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
if ( tv.isMissing() )
|
||||
encoder.encodeTypedMissing(tv.type);
|
||||
else {
|
||||
final BCFType encodedType = encoder.encode(tv.value);
|
||||
if ( tv.type != null ) // only if we have an expectation
|
||||
Assert.assertEquals(encodedType, tv.type);
|
||||
}
|
||||
}
|
||||
|
||||
// check output
|
||||
final byte[] record = encoder.getRecordBytes();
|
||||
Assert.assertNotNull(record);
|
||||
Assert.assertTrue(record.length > 0);
|
||||
return record;
|
||||
}
|
||||
|
||||
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final byte[] record) {
|
||||
decodeRecord(toEncode, new BCF2Decoder(record));
|
||||
}
|
||||
|
||||
private final void decodeRecord(final List<BCF2TypedValue> toEncode, final BCF2Decoder decoder) {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
Assert.assertFalse(decoder.blockIsFullyDecoded());
|
||||
final Object decoded = decoder.decodeTypedValue();
|
||||
|
||||
myAssertEquals(tv, decoded);
|
||||
}
|
||||
|
||||
Assert.assertTrue(decoder.blockIsFullyDecoded());
|
||||
}
|
||||
|
||||
private final void myAssertEquals(final BCF2TypedValue tv, final Object decoded) {
|
||||
if ( tv.value == null ) { // special needs for instanceof double
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
} else if ( tv.type == BCFType.FLOAT ) { // need tolerance for floats, and they aren't null
|
||||
Assert.assertTrue(decoded instanceof Double);
|
||||
|
||||
final float valueFloat = (float)(Float)tv.value;
|
||||
final float decodedFloat = (float)(double)(Double)decoded;
|
||||
|
||||
if ( Float.isNaN(valueFloat) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Float.isNaN(decodedFloat));
|
||||
else {
|
||||
Assert.assertEquals(decodedFloat, valueFloat, FLOAT_TOLERANCE);
|
||||
}
|
||||
} else
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue