From 64d7e9320971076112c23df781f0c376542b32b8 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Wed, 27 Jun 2012 18:18:25 -0400 Subject: [PATCH] Massive bugfixes -- Previous version was reading the size of the encoded genotypes vector for each genotype. This only worked because I never wrote out genotype field values with > 15 elements. Mauricio's killer DiagnoseTargets VCF uncovered the bug. Unfortunately since symbolic allele clipping is still busted those tests are still diabled -- GenotypeContext getMaxPloidy was returning -1 in the case where there are no genotypes, but the answer should be 0. --- .../sting/utils/codecs/bcf2/BCF2Decoder.java | 7 +++-- .../bcf2/BCF2GenotypeFieldDecoders.java | 30 +++++++++---------- .../codecs/bcf2/BCF2LazyGenotypesDecoder.java | 3 +- .../variantcontext/GenotypeLikelihoods.java | 5 ++-- .../variantcontext/GenotypesContext.java | 1 + 5 files changed, 25 insertions(+), 21 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java index 1bb833868..7a6d96131 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2Decoder.java @@ -136,6 +136,10 @@ public final class BCF2Decoder { public final Object decodeTypedValue(final byte typeDescriptor) { final int size = decodeNumberOfElements(typeDescriptor); + return decodeTypedValue(typeDescriptor, size); + } + + public final Object decodeTypedValue(final byte typeDescriptor, final int size) { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); assert size >= 0; @@ -285,8 +289,7 @@ public final class BCF2Decoder { } } - public final int[] decodeIntArray(final byte typeDescriptor) { - final int size = decodeNumberOfElements(typeDescriptor); + public final int[] decodeIntArray(final byte typeDescriptor, final int size) { final BCF2Type type = BCF2Utils.decodeType(typeDescriptor); return decodeIntArray(size, type, null); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java index 5a4d1d0da..59537a329 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2GenotypeFieldDecoders.java @@ -104,19 +104,17 @@ public class BCF2GenotypeFieldDecoders { final String field, final BCF2Decoder decoder, final byte typeDescriptor, + final int numElements, final GenotypeBuilder[] gbs); } private class GTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { - // we have to do a bit of low-level processing here as we want to know the size upfronta - final int ploidy = decoder.decodeNumberOfElements(typeDescriptor); - - if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { + if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES ) fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs); else { - generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs); + generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs); } } @@ -218,7 +216,7 @@ public class BCF2GenotypeFieldDecoders { private class DPDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.DP(decoder.decodeInt(typeDescriptor, -1)); @@ -228,7 +226,7 @@ public class BCF2GenotypeFieldDecoders { private class GQDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { // the -1 is for missing gb.GQ(decoder.decodeInt(typeDescriptor, -1)); @@ -238,27 +236,27 @@ public class BCF2GenotypeFieldDecoders { private class ADDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { - gb.AD(decoder.decodeIntArray(typeDescriptor)); + gb.AD(decoder.decodeIntArray(typeDescriptor, numElements)); } } } private class PLDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { - gb.PL(decoder.decodeIntArray(typeDescriptor)); + gb.PL(decoder.decodeIntArray(typeDescriptor, numElements)); } } } private class GenericDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { - Object value = decoder.decodeTypedValue(typeDescriptor); + Object value = decoder.decodeTypedValue(typeDescriptor, numElements); if ( value != null ) { // don't add missing values if ( value instanceof List && ((List)value).size() == 1) { // todo -- I really hate this, and it suggests that the code isn't completely right @@ -275,9 +273,9 @@ public class BCF2GenotypeFieldDecoders { private class FTDecoder implements Decoder { @Override - public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) { + public void decode(final List siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) { for ( final GenotypeBuilder gb : gbs ) { - Object value = decoder.decodeTypedValue(typeDescriptor); + Object value = decoder.decodeTypedValue(typeDescriptor, numElements); if ( value != null ) { // don't add missing values gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List)value); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java index 7f10375bb..c749325fb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/bcf2/BCF2LazyGenotypesDecoder.java @@ -77,9 +77,10 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser { // the type of each element final byte typeDescriptor = decoder.readTypeDescriptor(); + final int numElements = decoder.decodeNumberOfElements(typeDescriptor); final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field); try { - fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders); + fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders); } catch ( ClassCastException e ) { throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field + " inconsistent with the value observed in the decoded value"); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index 7c745628a..bb4a5abb9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -58,7 +58,6 @@ public class GenotypeLikelihoods { static { // must be done before PLIndexToAlleleIndex for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) { - //numLikelihoodCache[numAlleles] = new int[NUM_LIKELIHOODS_CACHE_PLOIDY]; for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) { numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy); } @@ -364,11 +363,13 @@ public class GenotypeLikelihoods { * S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors) * S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself). * + * note that in the case where ploidy == 0 we assume that the ploidy actually == 2 + * * @param numAlleles Number of alleles (including ref) * @param ploidy Ploidy, or number of chromosomes in set * @return Number of likelihood elements we need to hold. */ - @Requires({"ploidy > 0", "numAlleles > 0"}) + @Requires({"ploidy >= 0", "numAlleles > 0"}) @Ensures("result > 0") public static int numLikelihoods(final int numAlleles, final int ploidy) { if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java index 9577a3e63..ba8668fa9 100644 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypesContext.java @@ -416,6 +416,7 @@ public class GenotypesContext implements List { @Ensures("result >= 0") public int getMaxPloidy() { if ( maxPloidy == -1 ) { + maxPloidy = 0; // necessary in the case where there are no genotypes for ( final Genotype g : getGenotypes() ) { maxPloidy = Math.max(g.getPloidy(), maxPloidy); }