UnitTests for decodeIntArray method
This commit is contained in:
parent
5b8bd81991
commit
4a4d3cde3d
|
|
@ -109,8 +109,17 @@ public class BCF2GenotypeFieldDecoders {
|
|||
// a single cache for the encoded genotypes, since we don't actually need this vector
|
||||
final int[] tmp = new int[size];
|
||||
|
||||
// TODO -- fast path for size == 2 (diploid) and many samples
|
||||
// TODO -- by creating all 4 allele combinations and doing a straight lookup instead of allocations them
|
||||
// TODO -- fast path for many samples with diploid genotypes
|
||||
//
|
||||
// The way this would work is simple. Create a List<Allele> diploidGenotypes[] object
|
||||
// After decoding the offset, if that sample is diploid compute the
|
||||
// offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1
|
||||
// if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype
|
||||
// cache it and use that
|
||||
//
|
||||
// Some notes. If there are nAlleles at the site, there are implicitly actually
|
||||
// n + 1 options including
|
||||
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
final int[] encoded = decoder.decodeIntArray(size, type, tmp);
|
||||
if ( encoded == null )
|
||||
|
|
@ -123,6 +132,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
final List<Allele> gt = new ArrayList<Allele>(encoded.length);
|
||||
|
||||
for ( final int encode : encoded ) {
|
||||
// TODO -- handle padding!
|
||||
final int offset = encode >> 1;
|
||||
gt.add(offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,13 +172,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
|
||||
public Object[][] BCF2EncodingTestProviderSingletons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv : primitives )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding of basic types
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderBasicTypes")
|
||||
public Object[][] BCF2EncodingTestProviderBasicTypes() {
|
||||
|
|
@ -188,38 +186,6 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSequences")
|
||||
public Object[][] BCF2EncodingTestProviderSequences() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv1 : forCombinations )
|
||||
for ( BCF2TypedValue tv2 : forCombinations )
|
||||
for ( BCF2TypedValue tv3 : forCombinations )
|
||||
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
|
||||
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
@DataProvider(name = "ListOfStrings")
|
||||
public Object[][] listOfStringsProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ListOfStrings")
|
||||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
|
|
@ -240,6 +206,93 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
}
|
||||
}
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
|
||||
public Object[][] BCF2EncodingTestProviderSingletons() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv : primitives )
|
||||
tests.add(new Object[]{Arrays.asList(tv)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
|
||||
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding of vectors
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BCF2EncodingTestProviderSequences")
|
||||
public Object[][] BCF2EncodingTestProviderSequences() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
for ( BCF2TypedValue tv1 : forCombinations )
|
||||
for ( BCF2TypedValue tv2 : forCombinations )
|
||||
for ( BCF2TypedValue tv3 : forCombinations )
|
||||
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
if ( tv.type != BCF2Type.CHAR ) {
|
||||
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
|
||||
final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type);
|
||||
|
||||
final BCF2Encoder encoder = new BCF2Encoder();
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
|
||||
}
|
||||
|
||||
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
final Object decoded = decoder.decodeTypedValue(td);
|
||||
myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
|
||||
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test strings and lists of strings
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "ListOfStrings")
|
||||
public Object[][] listOfStringsProvider() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
|
||||
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "ListOfStrings")
|
||||
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
|
||||
final String collapsed = BCF2Utils.collapseStringList(strings);
|
||||
Assert.assertEquals(collapsed, expected);
|
||||
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Tests to determine the best type of arrays of integers
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "BestIntTypeTests")
|
||||
public Object[][] BestIntTypeTests() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
|
@ -266,35 +319,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
|
||||
}
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
|
||||
public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
for ( final BCF2TypedValue tv : toEncode ) {
|
||||
if ( tv.type != BCF2Type.CHAR ) {
|
||||
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
|
||||
final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type);
|
||||
|
||||
final BCF2Encoder encoder = new BCF2Encoder();
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
|
||||
}
|
||||
|
||||
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
|
||||
for ( int i = 0; i < length; i++ ) {
|
||||
final Object decoded = decoder.decodeTypedValue(td);
|
||||
myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
|
||||
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
|
||||
final byte[] record = encodeRecord(toEncode);
|
||||
decodeRecord(toEncode, record);
|
||||
}
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Tests managing and skipping multiple blocks
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences")
|
||||
public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block) throws IOException {
|
||||
|
|
@ -337,6 +366,82 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
decodeRecord(block2, decoder);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Test encoding / decoding arrays of ints
|
||||
//
|
||||
// This checks that we can encode and decode correctly with the
|
||||
// low-level decodeIntArray function arrays of values. This
|
||||
// has to be pretty comprehensive as decodeIntArray is a highly optimized
|
||||
// piece of code with lots of edge cases. The values we are encoding
|
||||
// don't really matter -- just that the values come back as expected.
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
@DataProvider(name = "IntArrays")
|
||||
public Object[][] makeIntArrays() {
|
||||
List<Object[]> tests = new ArrayList<Object[]>();
|
||||
|
||||
for ( int nValues : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
|
||||
for ( int nPad : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
|
||||
int nElements = nValues + nPad;
|
||||
|
||||
List<Integer> values = new ArrayList<Integer>(nElements);
|
||||
|
||||
// add nValues from 0 to nValues - 1
|
||||
for ( int i = 0; i < nValues; i++ )
|
||||
values.add(i);
|
||||
|
||||
// add nPad nulls
|
||||
for ( int i = 0; i < nPad; i++ )
|
||||
values.add(null);
|
||||
|
||||
tests.add(new Object[]{values});
|
||||
}
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
@Test(dataProvider = "IntArrays")
|
||||
public void testIntArrays(final List<Integer> ints) throws IOException {
|
||||
final BCF2Encoder encoder = new BCF2Encoder();
|
||||
encoder.encodeTyped(ints, BCF2Type.INT16);
|
||||
|
||||
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
|
||||
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
|
||||
// read the int[] with the low-level version
|
||||
final int[] decoded = decoder.decodeIntArray(typeDescriptor);
|
||||
|
||||
if ( isMissing(ints) ) {
|
||||
// we expect that the result is null in this case
|
||||
Assert.assertNull(decoded, "Encoded all missing values -- expected null");
|
||||
} else {
|
||||
// we expect at least some values to come back
|
||||
Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data");
|
||||
|
||||
// check corresponding values
|
||||
for ( int i = 0; i < ints.size(); i++ ) {
|
||||
final Integer expected = ints.get(i);
|
||||
|
||||
if ( expected == null ) {
|
||||
Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values");
|
||||
} else {
|
||||
Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array");
|
||||
Assert.assertEquals(decoded[i], (int)expected);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------
|
||||
//
|
||||
// Helper routines
|
||||
//
|
||||
// -----------------------------------------------------------------
|
||||
|
||||
private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
baos.write(record1);
|
||||
|
|
@ -392,4 +497,12 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
|
|||
} else
|
||||
Assert.assertEquals(decoded, tv.value);
|
||||
}
|
||||
|
||||
private final boolean isMissing(final List<Integer> values) {
|
||||
if ( values != null )
|
||||
for ( Integer value : values )
|
||||
if ( value != null )
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue