UnitTests for decodeIntArray method

This commit is contained in:
Mark DePristo 2012-06-05 12:44:55 -04:00
parent 5b8bd81991
commit 4a4d3cde3d
2 changed files with 193 additions and 70 deletions

View File

@ -109,8 +109,17 @@ public class BCF2GenotypeFieldDecoders {
// a single cache for the encoded genotypes, since we don't actually need this vector
final int[] tmp = new int[size];
// TODO -- fast path for size == 2 (diploid) and many samples
// TODO -- by creating all 4 allele combinations and doing a straight lookup instead of allocations them
// TODO -- fast path for many samples with diploid genotypes
//
// The way this would work is simple. Create a List<Allele> diploidGenotypes[] object
// After decoding the offset, if that sample is diploid compute the
// offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1
// if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype
// cache it and use that
//
// Some notes. If there are nAlleles at the site, there are implicitly actually
// n + 1 options including
for ( final GenotypeBuilder gb : gbs ) {
final int[] encoded = decoder.decodeIntArray(size, type, tmp);
if ( encoded == null )
@ -123,6 +132,7 @@ public class BCF2GenotypeFieldDecoders {
final List<Allele> gt = new ArrayList<Allele>(encoded.length);
for ( final int encode : encoded ) {
// TODO -- handle padding!
final int offset = encode >> 1;
gt.add(offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1));
}

View File

@ -172,13 +172,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
}
}
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
public Object[][] BCF2EncodingTestProviderSingletons() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( BCF2TypedValue tv : primitives )
tests.add(new Object[]{Arrays.asList(tv)});
return tests.toArray(new Object[][]{});
}
// -----------------------------------------------------------------
//
// Test encoding of basic types
//
// -----------------------------------------------------------------
@DataProvider(name = "BCF2EncodingTestProviderBasicTypes")
public Object[][] BCF2EncodingTestProviderBasicTypes() {
@ -188,38 +186,6 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
return tests.toArray(new Object[][]{});
}
@DataProvider(name = "BCF2EncodingTestProviderSequences")
public Object[][] BCF2EncodingTestProviderSequences() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( BCF2TypedValue tv1 : forCombinations )
for ( BCF2TypedValue tv2 : forCombinations )
for ( BCF2TypedValue tv3 : forCombinations )
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
final byte[] record = encodeRecord(toEncode);
decodeRecord(toEncode, record);
}
@DataProvider(name = "ListOfStrings")
public Object[][] listOfStringsProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "ListOfStrings")
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
final String collapsed = BCF2Utils.collapseStringList(strings);
Assert.assertEquals(collapsed, expected);
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
}
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
public void testBCF2EncodingVectors(final List<BCF2TypedValue> toEncode) throws IOException {
for ( final BCF2TypedValue tv : toEncode ) {
@ -240,6 +206,93 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
}
}
@DataProvider(name = "BCF2EncodingTestProviderSingletons")
public Object[][] BCF2EncodingTestProviderSingletons() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( BCF2TypedValue tv : primitives )
tests.add(new Object[]{Arrays.asList(tv)});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "BCF2EncodingTestProviderSingletons")
public void testBCF2EncodingSingletons(final List<BCF2TypedValue> toEncode) throws IOException {
final byte[] record = encodeRecord(toEncode);
decodeRecord(toEncode, record);
}
// -----------------------------------------------------------------
//
// Test encoding of vectors
//
// -----------------------------------------------------------------
@DataProvider(name = "BCF2EncodingTestProviderSequences")
public Object[][] BCF2EncodingTestProviderSequences() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( BCF2TypedValue tv1 : forCombinations )
for ( BCF2TypedValue tv2 : forCombinations )
for ( BCF2TypedValue tv3 : forCombinations )
tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
for ( final BCF2TypedValue tv : toEncode ) {
if ( tv.type != BCF2Type.CHAR ) {
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type);
final BCF2Encoder encoder = new BCF2Encoder();
for ( int i = 0; i < length; i++ ) {
encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
}
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
for ( int i = 0; i < length; i++ ) {
final Object decoded = decoder.decodeTypedValue(td);
myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
}
}
}
}
}
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
final byte[] record = encodeRecord(toEncode);
decodeRecord(toEncode, record);
}
// -----------------------------------------------------------------
//
// Test strings and lists of strings
//
// -----------------------------------------------------------------
@DataProvider(name = "ListOfStrings")
public Object[][] listOfStringsProvider() {
List<Object[]> tests = new ArrayList<Object[]>();
tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"});
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"});
tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"});
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "ListOfStrings")
public void testEncodingListOfString(List<String> strings, String expected) throws IOException {
final String collapsed = BCF2Utils.collapseStringList(strings);
Assert.assertEquals(collapsed, expected);
Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings);
}
// -----------------------------------------------------------------
//
// Tests to determine the best type of arrays of integers
//
// -----------------------------------------------------------------
@DataProvider(name = "BestIntTypeTests")
public Object[][] BestIntTypeTests() {
List<Object[]> tests = new ArrayList<Object[]>();
@ -266,35 +319,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType);
}
@Test(dataProvider = "BCF2EncodingTestProviderBasicTypes")
public void testBCF2EncodingVectorsWithMissing(final List<BCF2TypedValue> toEncode) throws IOException {
for ( final BCF2TypedValue tv : toEncode ) {
if ( tv.type != BCF2Type.CHAR ) {
for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) {
final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type);
final BCF2Encoder encoder = new BCF2Encoder();
for ( int i = 0; i < length; i++ ) {
encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type);
}
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
for ( int i = 0; i < length; i++ ) {
final Object decoded = decoder.decodeTypedValue(td);
myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded);
}
}
}
}
}
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons")
public void testBCF2EncodingTestProviderSequences(final List<BCF2TypedValue> toEncode) throws IOException {
final byte[] record = encodeRecord(toEncode);
decodeRecord(toEncode, record);
}
// -----------------------------------------------------------------
//
// Tests managing and skipping multiple blocks
//
// -----------------------------------------------------------------
@Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences")
public void testReadAndSkipWithMultipleBlocks(final List<BCF2TypedValue> block) throws IOException {
@ -337,6 +366,82 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
decodeRecord(block2, decoder);
}
// -----------------------------------------------------------------
//
// Test encoding / decoding arrays of ints
//
// This checks that we can encode and decode correctly with the
// low-level decodeIntArray function arrays of values. This
// has to be pretty comprehensive as decodeIntArray is a highly optimized
// piece of code with lots of edge cases. The values we are encoding
// don't really matter -- just that the values come back as expected.
//
// -----------------------------------------------------------------
@DataProvider(name = "IntArrays")
public Object[][] makeIntArrays() {
List<Object[]> tests = new ArrayList<Object[]>();
for ( int nValues : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
for ( int nPad : Arrays.asList(0, 1, 2, 5, 10, 100) ) {
int nElements = nValues + nPad;
List<Integer> values = new ArrayList<Integer>(nElements);
// add nValues from 0 to nValues - 1
for ( int i = 0; i < nValues; i++ )
values.add(i);
// add nPad nulls
for ( int i = 0; i < nPad; i++ )
values.add(null);
tests.add(new Object[]{values});
}
}
return tests.toArray(new Object[][]{});
}
@Test(dataProvider = "IntArrays")
public void testIntArrays(final List<Integer> ints) throws IOException {
final BCF2Encoder encoder = new BCF2Encoder();
encoder.encodeTyped(ints, BCF2Type.INT16);
final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes());
final byte typeDescriptor = decoder.readTypeDescriptor();
// read the int[] with the low-level version
final int[] decoded = decoder.decodeIntArray(typeDescriptor);
if ( isMissing(ints) ) {
// we expect that the result is null in this case
Assert.assertNull(decoded, "Encoded all missing values -- expected null");
} else {
// we expect at least some values to come back
Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data");
// check corresponding values
for ( int i = 0; i < ints.size(); i++ ) {
final Integer expected = ints.get(i);
if ( expected == null ) {
Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values");
} else {
Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array");
Assert.assertEquals(decoded[i], (int)expected);
}
}
}
}
// -----------------------------------------------------------------
//
// Helper routines
//
// -----------------------------------------------------------------
private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write(record1);
@ -392,4 +497,12 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest {
} else
Assert.assertEquals(decoded, tv.value);
}
private final boolean isMissing(final List<Integer> values) {
if ( values != null )
for ( Integer value : values )
if ( value != null )
return false;
return true;
}
}