Testing of BCF codec
-- Rev.d tribble -- Minor code cleanup -- BCF2 encoder / decoder use Double not Float internally everywhere -- Generalized VC testing framework
This commit is contained in:
parent
fb1911a1b6
commit
373ae39e86
|
|
@ -129,13 +129,20 @@ public class BCF2Codec implements FeatureCodec<VariantContext> {
|
|||
|
||||
@Override
|
||||
public boolean canDecode( final String path ) {
|
||||
FileInputStream fis = null;
|
||||
try {
|
||||
FileInputStream fis = new FileInputStream(path);
|
||||
fis = new FileInputStream(path);
|
||||
return BCF2Utils.startsWithBCF2Magic(fis);
|
||||
} catch ( FileNotFoundException e ) {
|
||||
return false;
|
||||
} catch ( IOException e ) {
|
||||
return false;
|
||||
} finally {
|
||||
try {
|
||||
if ( fis != null ) fis.close();
|
||||
} catch ( IOException e ) {
|
||||
; // do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -199,8 +199,8 @@ public class BCF2Decoder {
|
|||
return BCF2Utils.readInt(bytesForEachInt, recordStream);
|
||||
}
|
||||
|
||||
public final float rawFloatToFloat(final int rawFloat) {
|
||||
return Float.intBitsToFloat(rawFloat);
|
||||
public final double rawFloatToFloat(final int rawFloat) {
|
||||
return (double)Float.intBitsToFloat(rawFloat);
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -140,7 +140,7 @@ public class BCF2Encoder {
|
|||
case INT8:
|
||||
case INT16:
|
||||
case INT32: encodePrimitive((Integer)value, type); break;
|
||||
case FLOAT: encodeRawFloat((Float) value, type); break;
|
||||
case FLOAT: encodeRawFloat((Double) value, type); break;
|
||||
case CHAR: encodeRawChar((Byte) value); break;
|
||||
default: throw new ReviewedStingException("Illegal type encountered " + type);
|
||||
}
|
||||
|
|
@ -166,8 +166,8 @@ public class BCF2Encoder {
|
|||
encodeStream.write(c);
|
||||
}
|
||||
|
||||
public final void encodeRawFloat(final float value, final BCF2Type type) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits(value), type);
|
||||
public final void encodeRawFloat(final double value, final BCF2Type type) throws IOException {
|
||||
encodePrimitive(Float.floatToIntBits((float)value), type);
|
||||
}
|
||||
|
||||
public final void encodeType(final int size, final BCF2Type type) throws IOException {
|
||||
|
|
@ -217,7 +217,7 @@ public class BCF2Encoder {
|
|||
private final BCF2Type determinePrimitiveType(final Object v) {
|
||||
if ( v instanceof Integer )
|
||||
return determineIntegerType((Integer)v);
|
||||
else if ( v instanceof Float )
|
||||
else if ( v instanceof Double )
|
||||
return BCF2Type.FLOAT;
|
||||
else
|
||||
throw new ReviewedStingException("No native encoding for Object of type " + v.getClass().getSimpleName());
|
||||
|
|
|
|||
|
|
@ -232,7 +232,4 @@ public class BeagleCodec extends AsciiFeatureCodec<BeagleFeature> implements Ref
|
|||
|
||||
return bglFeature;
|
||||
}
|
||||
|
||||
public boolean canDecode(final String potentialInput) { return false; }
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -299,7 +299,15 @@ class BCF2Writer extends IndexingVariantContextWriter {
|
|||
case Character: return new VCFToBCFType(metaData.getType(), BCF2Type.CHAR);
|
||||
case Flag: return new VCFToBCFType(metaData.getType(), BCF2Type.INT8);
|
||||
case String: return new VCFToBCFType(metaData.getType(), BCF2Type.CHAR);
|
||||
case Integer: return new VCFToBCFType(metaData.getType(), maybeIntValue != null ? encoder.determineIntegerType((Integer)maybeIntValue) : BCF2Type.INT32);
|
||||
case Integer: // note integer calculation is a bit complex because of the need to determine sizes
|
||||
BCF2Type type;
|
||||
if ( maybeIntValue == null )
|
||||
type = BCF2Type.INT8;
|
||||
else if ( maybeIntValue instanceof List )
|
||||
type = encoder.determineIntegerType(((List<Integer>)maybeIntValue));
|
||||
else
|
||||
type = encoder.determineIntegerType((Integer)maybeIntValue);
|
||||
return new VCFToBCFType(metaData.getType(), type);
|
||||
case Float: return new VCFToBCFType(metaData.getType(), BCF2Type.FLOAT);
|
||||
default: throw new ReviewedStingException("Unexpected type for field " + field);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -64,12 +64,6 @@ public abstract class BaseTest {
|
|||
public static final String b37GoodNA12878BAM = validationDataLocation + "/NA12878.HiSeq.WGS.bwa.cleaned.recal.hg19.20.bam";
|
||||
public static final String b37_NA12878_OMNI = validationDataLocation + "/NA12878.omni.vcf";
|
||||
|
||||
public static final String refseqAnnotationLocation = annotationDataLocation + "refseq/";
|
||||
public static final String hg18Refseq = refseqAnnotationLocation + "refGene-big-table-hg18.txt";
|
||||
public static final String hg19Refseq = refseqAnnotationLocation + "refGene-big-table-hg19.txt";
|
||||
public static final String b36Refseq = refseqAnnotationLocation + "refGene-big-table-b36.txt";
|
||||
public static final String b37Refseq = refseqAnnotationLocation + "refGene-big-table-b37.txt";
|
||||
|
||||
public static final String dbsnpDataLocation = GATKDataLocation;
|
||||
public static final String b36dbSNP129 = dbsnpDataLocation + "dbsnp_129_b36.vcf";
|
||||
public static final String b37dbSNP129 = dbsnpDataLocation + "dbsnp_129_b37.vcf";
|
||||
|
|
@ -84,7 +78,7 @@ public abstract class BaseTest {
|
|||
public static final String hg19Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.interval_list";
|
||||
public static final String hg19Chr20Intervals = intervalsLocation + "whole_exome_agilent_1.1_refseq_plus_3_boosters.Homo_sapiens_assembly19.targets.chr20.interval_list";
|
||||
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = true;
|
||||
public static final boolean REQUIRE_NETWORK_CONNECTION = false;
|
||||
public static final String networkTempDir;
|
||||
public static final File networkTempDirFile;
|
||||
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ import java.util.*;
|
|||
|
||||
|
||||
public class EncoderDecoderUnitTest extends BaseTest {
|
||||
private final float FLOAT_TOLERANCE = (float)1e-8;
|
||||
private final double FLOAT_TOLERANCE = 1e-6;
|
||||
final List<BCF2TypedValue> primitives = new ArrayList<BCF2TypedValue>();
|
||||
final List<BCF2TypedValue> basicTypes = new ArrayList<BCF2TypedValue>();
|
||||
final List<BCF2TypedValue> forCombinations = new ArrayList<BCF2TypedValue>();
|
||||
|
|
@ -102,9 +102,9 @@ public class EncoderDecoderUnitTest extends BaseTest {
|
|||
primitives.add(new BCF2TypedValue(-1.23e15, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.MIN_VALUE, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.MAX_VALUE, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.NEGATIVE_INFINITY, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.POSITIVE_INFINITY, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Float.NaN, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Double.NEGATIVE_INFINITY, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Double.POSITIVE_INFINITY, BCF2Type.FLOAT));
|
||||
primitives.add(new BCF2TypedValue(Double.NaN, BCF2Type.FLOAT));
|
||||
|
||||
// strings
|
||||
//primitives.add(new BCF2TypedValue("", BCFType.CHAR)); <- will be null (which is right)
|
||||
|
|
@ -155,7 +155,7 @@ public class EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
|
||||
private BCF2TypedValue(final double value, final BCF2Type type) {
|
||||
this(new Float(value), type);
|
||||
this(new Double(value), type);
|
||||
}
|
||||
|
||||
private BCF2TypedValue(final Object value, final BCF2Type type) {
|
||||
|
|
@ -348,13 +348,17 @@ public class EncoderDecoderUnitTest extends BaseTest {
|
|||
} else if ( tv.type == BCF2Type.FLOAT ) { // need tolerance for floats, and they aren't null
|
||||
Assert.assertTrue(decoded instanceof Double);
|
||||
|
||||
final float valueFloat = (float)(Float)tv.value;
|
||||
final float decodedFloat = (float)(double)(Double)decoded;
|
||||
final double valueFloat = (Double)tv.value;
|
||||
final double decodedFloat = (Double)decoded;
|
||||
|
||||
if ( Float.isNaN(valueFloat) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Float.isNaN(decodedFloat));
|
||||
if ( Double.isNaN(valueFloat) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isNaN(decodedFloat));
|
||||
else if ( Double.isInfinite(valueFloat) ) // NaN == NaN => false unfortunately
|
||||
Assert.assertTrue(Double.isInfinite(decodedFloat));
|
||||
else {
|
||||
Assert.assertEquals(decodedFloat, valueFloat, FLOAT_TOLERANCE);
|
||||
final double delta = Math.abs(decodedFloat - valueFloat);
|
||||
final double ratio = Math.abs(decodedFloat / valueFloat - 1.0);
|
||||
Assert.assertTrue(delta < FLOAT_TOLERANCE || ratio < FLOAT_TOLERANCE);
|
||||
}
|
||||
} else
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
|
|
|
|||
|
|
@ -24,9 +24,20 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.variantcontext;
|
||||
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.Options;
|
||||
import org.broadinstitute.sting.utils.variantcontext.writer.VariantContextWriter;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.DataProvider;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
|
|
@ -37,21 +48,38 @@ import java.util.*;
|
|||
*/
|
||||
public class VariantContextTestProvider {
|
||||
final static VCFHeader header;
|
||||
final static List<VariantContextsTest> tests = new ArrayList<VariantContextsTest>();
|
||||
final static List<VariantContextTestData> TEST_DATAs = new ArrayList<VariantContextTestData>();
|
||||
final static VariantContext ROOT;
|
||||
|
||||
public static class VariantContextsTest {
|
||||
public abstract static class VariantContextIOTest {
|
||||
public String toString() {
|
||||
return "VariantContextIOTest:" + getExtension();
|
||||
}
|
||||
public abstract String getExtension();
|
||||
public abstract FeatureCodec<VariantContext> makeCodec();
|
||||
public abstract VariantContextWriter makeWriter(final File outputFile, final EnumSet<Options> baseOptions);
|
||||
|
||||
public List<VariantContext> preprocess(final VCFHeader header, List<VariantContext> vcsBeforeIO) {
|
||||
return vcsBeforeIO;
|
||||
}
|
||||
|
||||
public List<VariantContext> postprocess(final VCFHeader header, List<VariantContext> vcsAfterIO) {
|
||||
return vcsAfterIO;
|
||||
}
|
||||
}
|
||||
|
||||
public static class VariantContextTestData {
|
||||
public List<VariantContext> vcs;
|
||||
|
||||
public VariantContextsTest(final VariantContextBuilder builder) {
|
||||
public VariantContextTestData(final VariantContextBuilder builder) {
|
||||
this(Collections.singletonList(builder.make()));
|
||||
}
|
||||
|
||||
public VariantContextsTest(final VariantContext vc) {
|
||||
public VariantContextTestData(final VariantContext vc) {
|
||||
this(Collections.singletonList(vc));
|
||||
}
|
||||
|
||||
public VariantContextsTest(final List<VariantContext> vcs) {
|
||||
public VariantContextTestData(final List<VariantContext> vcs) {
|
||||
this.vcs = vcs;
|
||||
}
|
||||
|
||||
|
|
@ -65,10 +93,11 @@ public class VariantContextTestProvider {
|
|||
}
|
||||
|
||||
private final static void add(VariantContextBuilder builder) {
|
||||
tests.add(new VariantContextsTest(builder));
|
||||
TEST_DATAs.add(new VariantContextTestData(builder));
|
||||
}
|
||||
|
||||
static {
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
VariantContextBuilder rootBuilder = new VariantContextBuilder();
|
||||
rootBuilder.source("test");
|
||||
rootBuilder.loc("1", 10, 10);
|
||||
|
|
@ -89,6 +118,8 @@ public class VariantContextTestProvider {
|
|||
add(builder().passFilters());
|
||||
add(builder().filters("FILTER1"));
|
||||
add(builder().filters("FILTER1", "FILTER2"));
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
||||
|
||||
add(builder().log10PError(VariantContext.NO_LOG10_PERROR));
|
||||
add(builder().log10PError(-1));
|
||||
|
|
@ -97,21 +128,111 @@ public class VariantContextTestProvider {
|
|||
add(builder().noID());
|
||||
add(builder().id("rsID12345"));
|
||||
|
||||
|
||||
add(builder().attribute("INT1", 1));
|
||||
add(builder().attribute("INT1", 100));
|
||||
add(builder().attribute("INT1", 1000));
|
||||
add(builder().attribute("INT1", 100000));
|
||||
add(builder().attribute("INT1", null));
|
||||
add(builder().attribute("INT3", Arrays.asList(1, 2, 3)));
|
||||
add(builder().attribute("INT3", Arrays.asList(1000, 2000, 3000)));
|
||||
add(builder().attribute("INT3", Arrays.asList(100000, 200000, 300000)));
|
||||
add(builder().attribute("INT3", null));
|
||||
add(builder().attribute("INT20", Arrays.asList(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20)));
|
||||
metaData.add(new VCFInfoHeaderLine("INT1", 1, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("INT3", 3, VCFHeaderLineType.Integer, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("INT20", 20, VCFHeaderLineType.Integer, "x"));
|
||||
|
||||
add(builder().attribute("FLOAT1", 1.0));
|
||||
add(builder().attribute("FLOAT1", 100.0));
|
||||
add(builder().attribute("FLOAT1", 1000.0));
|
||||
add(builder().attribute("FLOAT1", 100000.0));
|
||||
add(builder().attribute("FLOAT1", null));
|
||||
add(builder().attribute("FLOAT3", Arrays.asList(1.0, 2.0, 3.0)));
|
||||
add(builder().attribute("FLOAT3", Arrays.asList(1000.0, 2000.0, 3000.0)));
|
||||
add(builder().attribute("FLOAT3", Arrays.asList(100000.0, 200000.0, 300000.0)));
|
||||
add(builder().attribute("FLOAT3", null));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT1", 1, VCFHeaderLineType.Float, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("FLOAT3", 3, VCFHeaderLineType.Float, "x"));
|
||||
|
||||
add(builder().attribute("FLAG", true));
|
||||
add(builder().attribute("FLAG", false));
|
||||
metaData.add(new VCFInfoHeaderLine("FLAG", 1, VCFHeaderLineType.Flag, "x"));
|
||||
|
||||
add(builder().attribute("STRING1", "s1"));
|
||||
add(builder().attribute("STRING1", null));
|
||||
// TODO - renable when BCF2 spec is fixed
|
||||
// add(builder().attribute("STRING3", Arrays.asList("s1", "s2", "s3")));
|
||||
// add(builder().attribute("STRING3", null));
|
||||
// add(builder().attribute("STRING20", Arrays.asList("s1", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13", "s14", "s15", "s16", "s17", "s18", "s19", "s20")));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING1", 1, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING3", 3, VCFHeaderLineType.String, "x"));
|
||||
metaData.add(new VCFInfoHeaderLine("STRING20", 20, VCFHeaderLineType.String, "x"));
|
||||
|
||||
addGenotypesData(new ArrayList<VariantContextTestData>(TEST_DATAs), metaData);
|
||||
|
||||
// prep the header
|
||||
Set<VCFHeaderLine> metaData = new TreeSet<VCFHeaderLine>();
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER1"));
|
||||
metaData.add(new VCFFilterHeaderLine("FILTER2"));
|
||||
metaData.add(new VCFContigHeaderLine(VCFHeader.CONTIG_KEY, Collections.singletonMap("ID", "1"), 0));
|
||||
|
||||
header = new VCFHeader(metaData);
|
||||
}
|
||||
|
||||
private static void addGenotypesData(final ArrayList<VariantContextTestData> sites, Set<VCFHeaderLine> metaData) {
|
||||
// TODO
|
||||
// for each sites VC, we are going to add create two root genotypes.
|
||||
// The first is the primary, and will be added to each new test
|
||||
// The second is variable. In some tests it's absent (testing 1 genotype), in others it is duplicated
|
||||
// 1 once, 10, 100, or 1000 times to test scaling
|
||||
// Also, create a "missing" genotype (corresponding to a . sample) in the VCF for inclusion as well.
|
||||
|
||||
// test GT
|
||||
|
||||
// test GQ
|
||||
|
||||
// test test Integer, Float, Flag, String atomic, vector, and missing types of different lengths per sample
|
||||
}
|
||||
|
||||
|
||||
public static VCFHeader getHeader() {
|
||||
return header;
|
||||
}
|
||||
|
||||
public static List<VariantContextsTest> generateSiteTests() {
|
||||
return tests;
|
||||
public static List<VariantContextTestData> generateSiteTests() {
|
||||
return TEST_DATAs;
|
||||
}
|
||||
|
||||
public static void testReaderWriter(final VariantContextIOTest tester, final VariantContextTestData data) throws IOException {
|
||||
final File tmpFile = File.createTempFile("testReaderWriter", tester.getExtension());
|
||||
tmpFile.deleteOnExit();
|
||||
|
||||
// todo -- test all options
|
||||
|
||||
// write
|
||||
final EnumSet<Options> options = EnumSet.of(Options.INDEX_ON_THE_FLY);
|
||||
final VariantContextWriter writer = tester.makeWriter(tmpFile, options);
|
||||
writer.writeHeader(VariantContextTestProvider.getHeader());
|
||||
final List<VariantContext> expected = data.vcs;
|
||||
for ( VariantContext vc : expected )
|
||||
writer.add(vc);
|
||||
writer.close();
|
||||
|
||||
// read in the features
|
||||
FeatureCodec<VariantContext> codec = tester.makeCodec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
// TODO -- test header quality
|
||||
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
final List<VariantContext> actual = new ArrayList<VariantContext>(expected.size());
|
||||
while ( ! pbs.isDone() ) { actual.add(codec.decode(pbs)); };
|
||||
|
||||
Assert.assertEquals(actual.size(), expected.size());
|
||||
|
||||
for ( int i = 0; i < expected.size(); i++ )
|
||||
VariantContextTestProvider.assertEquals(actual.get(i), expected.get(i));
|
||||
}
|
||||
|
||||
public static void assertEquals( final VariantContext actual, final VariantContext expected ) {
|
||||
|
|
|
|||
|
|
@ -1,130 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextBuilder;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class BCF2WriterCodecUnitTest extends BaseTest {
|
||||
private static File tmpFile;
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
||||
// private final static String START_VCF41_LINES = "##fileformat=VCFv4.1\n" +
|
||||
// "##reference=file://" + BaseTest.b37KGReference + "\n" +
|
||||
// "##contig=<ID=1,length=249250621,assembly=b37>\n" +
|
||||
// "##contig=<ID=2,length=243199373,assembly=b37>\n";
|
||||
//
|
||||
//// ##INFO=<ID=NS,Number=1,Type=Integer,Description="Number of Samples With Data">
|
||||
//// ##INFO=<ID=DP,Number=1,Type=Integer,Description="Total Depth">
|
||||
//// ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
|
||||
//// ##INFO=<ID=AA,Number=1,Type=String,Description="Ancestral Allele">
|
||||
//// ##INFO=<ID=DB,Number=0,Type=Flag,Description="dbSNP membership, build 129">
|
||||
//// ##INFO=<ID=H2,Number=0,Type=Flag,Description="HapMap2 membership">
|
||||
//// ##FILTER=<ID=q10,Description="Quality below 10">
|
||||
//// ##FILTER=<ID=s50,Description="Less than 50% of samples have data">
|
||||
//// ##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
|
||||
//// ##FORMAT=<ID=GQ,Number=1,Type=Integer,Description="Genotype Quality">
|
||||
//// ##FORMAT=<ID=DP,Number=1,Type=Integer,Description="Read Depth">
|
||||
//// ##FORMAT=<ID=HQ,Number=2,Type=Integer,Description="Haplotype Quality">
|
||||
//
|
||||
// private final static String SITES_HEADER_LINE = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT";
|
||||
|
||||
@BeforeSuite
|
||||
public void before() throws IOException {
|
||||
tmpFile = File.createTempFile("BCF2WriterCodecUnitTest", ".bcf");
|
||||
tmpFile.delete();
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dictionary = seq.getSequenceDictionary();
|
||||
}
|
||||
|
||||
@BeforeMethod
|
||||
public void beforeMethod() throws IOException {
|
||||
tmpFile.delete(); // cleanup the test file
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Provider of VariantContexts for testing
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "SiteVCs")
|
||||
public Object[][] SiteVCsTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( VariantContextTestProvider.VariantContextsTest test : VariantContextTestProvider.generateSiteTests() )
|
||||
tests.add(new Object[]{test.vcs});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "SiteVCs")
|
||||
public void testBCF2WriterReader(final List<VariantContext> contexts) throws IOException {
|
||||
// todo -- test all options
|
||||
|
||||
// write
|
||||
final VariantContextWriter writer = VariantContextWriterFactory.create(tmpFile, dictionary);
|
||||
writer.writeHeader(VariantContextTestProvider.getHeader());
|
||||
for ( VariantContext vc : contexts )
|
||||
writer.add(vc);
|
||||
writer.close();
|
||||
|
||||
// read in the features
|
||||
BCF2Codec codec = new BCF2Codec();
|
||||
PositionalBufferedStream pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
FeatureCodecHeader header = codec.readHeader(pbs);
|
||||
pbs.close();
|
||||
pbs = new PositionalBufferedStream(new FileInputStream(tmpFile));
|
||||
pbs.skip(header.getHeaderEnd());
|
||||
|
||||
Iterator<VariantContext> it = contexts.iterator();
|
||||
while ( ! pbs.isDone() ) {
|
||||
VariantContext vc = it.next();
|
||||
VariantContext bcf = codec.decode(pbs);
|
||||
VariantContextTestProvider.assertEquals(vc, bcf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,135 @@
|
|||
/*
|
||||
* Copyright (c) 2012, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
// our package
|
||||
package org.broadinstitute.sting.utils.variantcontext.writer;
|
||||
|
||||
|
||||
// the imports for unit testing.
|
||||
|
||||
|
||||
import net.sf.picard.reference.IndexedFastaSequenceFile;
|
||||
import net.sf.samtools.SAMSequenceDictionary;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.FeatureCodecHeader;
|
||||
import org.broad.tribble.readers.PositionalBufferedStream;
|
||||
import org.broadinstitute.sting.BaseTest;
|
||||
import org.broadinstitute.sting.utils.codecs.bcf2.*;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFCodec;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeader;
|
||||
import org.broadinstitute.sting.utils.fasta.CachingIndexedFastaSequenceFile;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextTestProvider;
|
||||
import org.testng.annotations.BeforeMethod;
|
||||
import org.testng.annotations.BeforeSuite;
|
||||
import org.testng.annotations.DataProvider;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.*;
|
||||
|
||||
|
||||
public class VariantContextWritersUnitTest extends BaseTest {
|
||||
private SAMSequenceDictionary dictionary;
|
||||
|
||||
@BeforeSuite
|
||||
public void before() throws IOException {
|
||||
IndexedFastaSequenceFile seq = new CachingIndexedFastaSequenceFile(new File(b37KGReference));
|
||||
dictionary = seq.getSequenceDictionary();
|
||||
}
|
||||
|
||||
@DataProvider(name = "VariantContextTest_SingleContexts")
|
||||
public Object[][] SiteVCsTest() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( VariantContextTestProvider.VariantContextTestData testData : VariantContextTestProvider.generateSiteTests() )
|
||||
tests.add(new Object[]{testData});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test BCF2 reader / writer
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Test(dataProvider = "VariantContextTest_SingleContexts")
|
||||
public void testBCF2WriterReader(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
|
||||
VariantContextTestProvider.testReaderWriter(new BCFIOTester(), testData);
|
||||
}
|
||||
|
||||
private class BCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
|
||||
@Override
|
||||
public String getExtension() {
|
||||
return ".bcf";
|
||||
}
|
||||
|
||||
@Override
|
||||
public FeatureCodec<VariantContext> makeCodec() {
|
||||
return new BCF2Codec();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContextWriter makeWriter(final File file, final EnumSet<Options> baseOptions) {
|
||||
return VariantContextWriterFactory.create(file, dictionary, baseOptions);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------------
|
||||
//
|
||||
// Test VCF reader / writer
|
||||
//
|
||||
// --------------------------------------------------------------------------------
|
||||
|
||||
@Test(enabled = false, dataProvider = "VariantContextTest_SingleContexts")
|
||||
public void testVCF4WriterReader(final VariantContextTestProvider.VariantContextTestData testData) throws IOException {
|
||||
VariantContextTestProvider.testReaderWriter(new VCFIOTester(), testData);
|
||||
}
|
||||
|
||||
private class VCFIOTester extends VariantContextTestProvider.VariantContextIOTest {
|
||||
@Override
|
||||
public String getExtension() {
|
||||
return ".vcf";
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<VariantContext> postprocess(final VCFHeader header, final List<VariantContext> vcsAfterIO) {
|
||||
final List<VariantContext> fullyDecoded = new ArrayList<VariantContext>(vcsAfterIO.size());
|
||||
|
||||
for ( final VariantContext withStrings : vcsAfterIO )
|
||||
fullyDecoded.add(withStrings.fullyDecode(header));
|
||||
|
||||
return fullyDecoded;
|
||||
}
|
||||
|
||||
@Override
|
||||
public FeatureCodec<VariantContext> makeCodec() {
|
||||
return new VCFCodec();
|
||||
}
|
||||
|
||||
@Override
|
||||
public VariantContextWriter makeWriter(final File file, final EnumSet<Options> baseOptions) {
|
||||
return VariantContextWriterFactory.create(file, dictionary, baseOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
|
@ -1,3 +1,3 @@
|
|||
<ivy-module version="1.0">
|
||||
<info organisation="org.broad" module="tribble" revision="101" status="integration" />
|
||||
<info organisation="org.broad" module="tribble" revision="107" status="integration" />
|
||||
</ivy-module>
|
||||
Loading…
Reference in New Issue