diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 27d36cc57..fe59f0026 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -109,8 +109,17 @@ public class BCF2GenotypeFieldDecoders { // a single cache for the encoded genotypes, since we don't actually need this vector final int[] tmp = new int[size]; - // TODO -- fast path for size == 2 (diploid) and many samples - // TODO -- by creating all 4 allele combinations and doing a straight lookup instead of allocations them + // TODO -- fast path for many samples with diploid genotypes + // + // The way this would work is simple. Create a List diploidGenotypes[] object + // After decoding the offset, if that sample is diploid compute the + // offset into the alleles vector which is simply offset = allele0 * nAlleles + allele1 + // if there's a value at diploidGenotypes[offset], use it, otherwise create the genotype + // cache it and use that + // + // Some notes. If there are nAlleles at the site, there are implicitly actually + // n + 1 options including + for ( final GenotypeBuilder gb : gbs ) { final int[] encoded = decoder.decodeIntArray(size, type, tmp); if ( encoded == null ) @@ -123,6 +132,7 @@ public class BCF2GenotypeFieldDecoders { final List gt = new ArrayList(encoded.length); for ( final int encode : encoded ) { + // TODO -- handle padding! final int offset = encode >> 1; gt.add(offset == 0 ? Allele.NO_CALL : siteAlleles.get(offset - 1)); } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java index 7860efc08..5fca8660a 100644 --- a/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/bcf2/BCF2EncoderDecoderUnitTest.java @@ -172,13 +172,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { } } - @DataProvider(name = "BCF2EncodingTestProviderSingletons") - public Object[][] BCF2EncodingTestProviderSingletons() { - List tests = new ArrayList(); - for ( BCF2TypedValue tv : primitives ) - tests.add(new Object[]{Arrays.asList(tv)}); - return tests.toArray(new Object[][]{}); - } + // ----------------------------------------------------------------- + // + // Test encoding of basic types + // + // ----------------------------------------------------------------- @DataProvider(name = "BCF2EncodingTestProviderBasicTypes") public Object[][] BCF2EncodingTestProviderBasicTypes() { @@ -188,38 +186,6 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { return tests.toArray(new Object[][]{}); } - @DataProvider(name = "BCF2EncodingTestProviderSequences") - public Object[][] BCF2EncodingTestProviderSequences() { - List tests = new ArrayList(); - for ( BCF2TypedValue tv1 : forCombinations ) - for ( BCF2TypedValue tv2 : forCombinations ) - for ( BCF2TypedValue tv3 : forCombinations ) - tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)}); - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "BCF2EncodingTestProviderSingletons") - public void testBCF2EncodingSingletons(final List toEncode) throws IOException { - final byte[] record = encodeRecord(toEncode); - decodeRecord(toEncode, record); - } - - @DataProvider(name = "ListOfStrings") - public Object[][] listOfStringsProvider() { - List tests = new ArrayList(); - tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"}); - tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"}); - tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"}); - return tests.toArray(new Object[][]{}); - } - - @Test(dataProvider = "ListOfStrings") - public void testEncodingListOfString(List strings, String expected) throws IOException { - final String collapsed = BCF2Utils.collapseStringList(strings); - Assert.assertEquals(collapsed, expected); - Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings); - } - @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") public void testBCF2EncodingVectors(final List toEncode) throws IOException { for ( final BCF2TypedValue tv : toEncode ) { @@ -240,6 +206,93 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { } } + @DataProvider(name = "BCF2EncodingTestProviderSingletons") + public Object[][] BCF2EncodingTestProviderSingletons() { + List tests = new ArrayList(); + for ( BCF2TypedValue tv : primitives ) + tests.add(new Object[]{Arrays.asList(tv)}); + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "BCF2EncodingTestProviderSingletons") + public void testBCF2EncodingSingletons(final List toEncode) throws IOException { + final byte[] record = encodeRecord(toEncode); + decodeRecord(toEncode, record); + } + + // ----------------------------------------------------------------- + // + // Test encoding of vectors + // + // ----------------------------------------------------------------- + + @DataProvider(name = "BCF2EncodingTestProviderSequences") + public Object[][] BCF2EncodingTestProviderSequences() { + List tests = new ArrayList(); + for ( BCF2TypedValue tv1 : forCombinations ) + for ( BCF2TypedValue tv2 : forCombinations ) + for ( BCF2TypedValue tv3 : forCombinations ) + tests.add(new Object[]{Arrays.asList(tv1, tv2, tv3)}); + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") + public void testBCF2EncodingVectorsWithMissing(final List toEncode) throws IOException { + for ( final BCF2TypedValue tv : toEncode ) { + if ( tv.type != BCF2Type.CHAR ) { + for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { + final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type); + + final BCF2Encoder encoder = new BCF2Encoder(); + for ( int i = 0; i < length; i++ ) { + encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type); + } + + final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); + + for ( int i = 0; i < length; i++ ) { + final Object decoded = decoder.decodeTypedValue(td); + myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded); + } + } + } + } + } + + @Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons") + public void testBCF2EncodingTestProviderSequences(final List toEncode) throws IOException { + final byte[] record = encodeRecord(toEncode); + decodeRecord(toEncode, record); + } + + // ----------------------------------------------------------------- + // + // Test strings and lists of strings + // + // ----------------------------------------------------------------- + + @DataProvider(name = "ListOfStrings") + public Object[][] listOfStringsProvider() { + List tests = new ArrayList(); + tests.add(new Object[]{Arrays.asList("s1", "s2"), ",s1,s2"}); + tests.add(new Object[]{Arrays.asList("s1", "s2", "s3"), ",s1,s2,s3"}); + tests.add(new Object[]{Arrays.asList("s1", "s2", "s3", "s4"), ",s1,s2,s3,s4"}); + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "ListOfStrings") + public void testEncodingListOfString(List strings, String expected) throws IOException { + final String collapsed = BCF2Utils.collapseStringList(strings); + Assert.assertEquals(collapsed, expected); + Assert.assertEquals(BCF2Utils.exploreStringList(collapsed), strings); + } + + // ----------------------------------------------------------------- + // + // Tests to determine the best type of arrays of integers + // + // ----------------------------------------------------------------- + @DataProvider(name = "BestIntTypeTests") public Object[][] BestIntTypeTests() { List tests = new ArrayList(); @@ -266,35 +319,11 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { Assert.assertEquals(encoder.determineIntegerType(ArrayUtils.toPrimitive(ints.toArray(new Integer[0]))), expectedType); } - @Test(dataProvider = "BCF2EncodingTestProviderBasicTypes") - public void testBCF2EncodingVectorsWithMissing(final List toEncode) throws IOException { - for ( final BCF2TypedValue tv : toEncode ) { - if ( tv.type != BCF2Type.CHAR ) { - for ( final int length : Arrays.asList(2, 5, 10, 15, 20, 25) ) { - final byte td = BCF2Utils.encodeTypeDescriptor(1, tv.type); - - final BCF2Encoder encoder = new BCF2Encoder(); - for ( int i = 0; i < length; i++ ) { - encoder.encodeRawValue(i % 2 == 0 ? null : tv.value, tv.type); - } - - final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); - - for ( int i = 0; i < length; i++ ) { - final Object decoded = decoder.decodeTypedValue(td); - myAssertEquals(i % 2 == 0 ? new BCF2TypedValue(null, tv.type) : tv, decoded); - } - } - } - } - } - - - @Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingSingletons") - public void testBCF2EncodingTestProviderSequences(final List toEncode) throws IOException { - final byte[] record = encodeRecord(toEncode); - decodeRecord(toEncode, record); - } + // ----------------------------------------------------------------- + // + // Tests managing and skipping multiple blocks + // + // ----------------------------------------------------------------- @Test(dataProvider = "BCF2EncodingTestProviderSequences", dependsOnMethods = "testBCF2EncodingTestProviderSequences") public void testReadAndSkipWithMultipleBlocks(final List block) throws IOException { @@ -337,6 +366,82 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { decodeRecord(block2, decoder); } + // ----------------------------------------------------------------- + // + // Test encoding / decoding arrays of ints + // + // This checks that we can encode and decode correctly with the + // low-level decodeIntArray function arrays of values. This + // has to be pretty comprehensive as decodeIntArray is a highly optimized + // piece of code with lots of edge cases. The values we are encoding + // don't really matter -- just that the values come back as expected. + // + // ----------------------------------------------------------------- + + @DataProvider(name = "IntArrays") + public Object[][] makeIntArrays() { + List tests = new ArrayList(); + + for ( int nValues : Arrays.asList(0, 1, 2, 5, 10, 100) ) { + for ( int nPad : Arrays.asList(0, 1, 2, 5, 10, 100) ) { + int nElements = nValues + nPad; + + List values = new ArrayList(nElements); + + // add nValues from 0 to nValues - 1 + for ( int i = 0; i < nValues; i++ ) + values.add(i); + + // add nPad nulls + for ( int i = 0; i < nPad; i++ ) + values.add(null); + + tests.add(new Object[]{values}); + } + } + + return tests.toArray(new Object[][]{}); + } + + @Test(dataProvider = "IntArrays") + public void testIntArrays(final List ints) throws IOException { + final BCF2Encoder encoder = new BCF2Encoder(); + encoder.encodeTyped(ints, BCF2Type.INT16); + + final BCF2Decoder decoder = new BCF2Decoder(encoder.getRecordBytes()); + + final byte typeDescriptor = decoder.readTypeDescriptor(); + + // read the int[] with the low-level version + final int[] decoded = decoder.decodeIntArray(typeDescriptor); + + if ( isMissing(ints) ) { + // we expect that the result is null in this case + Assert.assertNull(decoded, "Encoded all missing values -- expected null"); + } else { + // we expect at least some values to come back + Assert.assertTrue(decoded.length > 0, "Must have at least 1 element for non-null encoded data"); + + // check corresponding values + for ( int i = 0; i < ints.size(); i++ ) { + final Integer expected = ints.get(i); + + if ( expected == null ) { + Assert.assertTrue(decoded.length <= i, "we expect decoded to be truncated for missing values"); + } else { + Assert.assertTrue(decoded.length > i, "we expected at least " + i + " values in decoded array"); + Assert.assertEquals(decoded[i], (int)expected); + } + } + } + } + + // ----------------------------------------------------------------- + // + // Helper routines + // + // ----------------------------------------------------------------- + private final byte[] combineRecords(final byte[] record1, final byte[] record2) throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(); baos.write(record1); @@ -392,4 +497,12 @@ public class BCF2EncoderDecoderUnitTest extends BaseTest { } else Assert.assertEquals(decoded, tv.value); } + + private final boolean isMissing(final List values) { + if ( values != null ) + for ( Integer value : values ) + if ( value != null ) + return false; + return true; + } } \ No newline at end of file