Massive bugfixes

-- Previous version was reading the size of the encoded genotypes vector for each genotype.  This only worked because I never wrote out genotype field values with > 15 elements.  Mauricio's killer DiagnoseTargets VCF uncovered the bug.  Unfortunately since symbolic allele clipping is still busted those tests are still diabled
-- GenotypeContext getMaxPloidy was returning -1 in the case where there are no genotypes, but the answer should be 0.
This commit is contained in:
Mark DePristo 2012-06-27 18:18:25 -04:00
parent 7144154f53
commit 64d7e93209
5 changed files with 25 additions and 21 deletions

View File

@ -136,6 +136,10 @@ public final class BCF2Decoder {
public final Object decodeTypedValue(final byte typeDescriptor) {
final int size = decodeNumberOfElements(typeDescriptor);
return decodeTypedValue(typeDescriptor, size);
}
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
assert size >= 0;
@ -285,8 +289,7 @@ public final class BCF2Decoder {
}
}
public final int[] decodeIntArray(final byte typeDescriptor) {
final int size = decodeNumberOfElements(typeDescriptor);
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
return decodeIntArray(size, type, null);
}

View File

@ -104,19 +104,17 @@ public class BCF2GenotypeFieldDecoders {
final String field,
final BCF2Decoder decoder,
final byte typeDescriptor,
final int numElements,
final GenotypeBuilder[] gbs);
}
private class GTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
// we have to do a bit of low-level processing here as we want to know the size upfronta
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
else {
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs);
}
}
@ -218,7 +216,7 @@ public class BCF2GenotypeFieldDecoders {
private class DPDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.DP(decoder.decodeInt(typeDescriptor, -1));
@ -228,7 +226,7 @@ public class BCF2GenotypeFieldDecoders {
private class GQDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
// the -1 is for missing
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
@ -238,27 +236,27 @@ public class BCF2GenotypeFieldDecoders {
private class ADDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
gb.AD(decoder.decodeIntArray(typeDescriptor));
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
}
}
}
private class PLDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
gb.PL(decoder.decodeIntArray(typeDescriptor));
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
}
}
}
private class GenericDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor);
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
if ( value != null ) { // don't add missing values
if ( value instanceof List && ((List)value).size() == 1) {
// todo -- I really hate this, and it suggests that the code isn't completely right
@ -275,9 +273,9 @@ public class BCF2GenotypeFieldDecoders {
private class FTDecoder implements Decoder {
@Override
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
for ( final GenotypeBuilder gb : gbs ) {
Object value = decoder.decodeTypedValue(typeDescriptor);
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
if ( value != null ) { // don't add missing values
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
}

View File

@ -77,9 +77,10 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
// the type of each element
final byte typeDescriptor = decoder.readTypeDescriptor();
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
try {
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
} catch ( ClassCastException e ) {
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
+ " inconsistent with the value observed in the decoded value");

View File

@ -58,7 +58,6 @@ public class GenotypeLikelihoods {
static {
// must be done before PLIndexToAlleleIndex
for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) {
//numLikelihoodCache[numAlleles] = new int[NUM_LIKELIHOODS_CACHE_PLOIDY];
for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) {
numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy);
}
@ -364,11 +363,13 @@ public class GenotypeLikelihoods {
* S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors)
* S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself).
*
* note that in the case where ploidy == 0 we assume that the ploidy actually == 2
*
* @param numAlleles Number of alleles (including ref)
* @param ploidy Ploidy, or number of chromosomes in set
* @return Number of likelihood elements we need to hold.
*/
@Requires({"ploidy > 0", "numAlleles > 0"})
@Requires({"ploidy >= 0", "numAlleles > 0"})
@Ensures("result > 0")
public static int numLikelihoods(final int numAlleles, final int ploidy) {
if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES

View File

@ -416,6 +416,7 @@ public class GenotypesContext implements List<Genotype> {
@Ensures("result >= 0")
public int getMaxPloidy() {
if ( maxPloidy == -1 ) {
maxPloidy = 0; // necessary in the case where there are no genotypes
for ( final Genotype g : getGenotypes() ) {
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
}