Massive bugfixes
-- Previous version was reading the size of the encoded genotypes vector for each genotype. This only worked because I never wrote out genotype field values with > 15 elements. Mauricio's killer DiagnoseTargets VCF uncovered the bug. Unfortunately since symbolic allele clipping is still busted those tests are still diabled -- GenotypeContext getMaxPloidy was returning -1 in the case where there are no genotypes, but the answer should be 0.
This commit is contained in:
parent
7144154f53
commit
64d7e93209
|
|
@ -136,6 +136,10 @@ public final class BCF2Decoder {
|
|||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
return decodeTypedValue(typeDescriptor, size);
|
||||
}
|
||||
|
||||
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
|
||||
assert size >= 0;
|
||||
|
|
@ -285,8 +289,7 @@ public final class BCF2Decoder {
|
|||
}
|
||||
}
|
||||
|
||||
public final int[] decodeIntArray(final byte typeDescriptor) {
|
||||
final int size = decodeNumberOfElements(typeDescriptor);
|
||||
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
|
||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||
return decodeIntArray(size, type, null);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -104,19 +104,17 @@ public class BCF2GenotypeFieldDecoders {
|
|||
final String field,
|
||||
final BCF2Decoder decoder,
|
||||
final byte typeDescriptor,
|
||||
final int numElements,
|
||||
final GenotypeBuilder[] gbs);
|
||||
}
|
||||
|
||||
private class GTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||
else {
|
||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
||||
generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -218,7 +216,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class DPDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -228,7 +226,7 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class GQDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
// the -1 is for missing
|
||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||
|
|
@ -238,27 +236,27 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class ADDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
||||
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class PLDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
||||
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class GenericDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||
if ( value != null ) { // don't add missing values
|
||||
if ( value instanceof List && ((List)value).size() == 1) {
|
||||
// todo -- I really hate this, and it suggests that the code isn't completely right
|
||||
|
|
@ -275,9 +273,9 @@ public class BCF2GenotypeFieldDecoders {
|
|||
|
||||
private class FTDecoder implements Decoder {
|
||||
@Override
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||
for ( final GenotypeBuilder gb : gbs ) {
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
||||
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||
if ( value != null ) { // don't add missing values
|
||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -77,9 +77,10 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
|||
|
||||
// the type of each element
|
||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
|
||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||
try {
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
|
||||
} catch ( ClassCastException e ) {
|
||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||
+ " inconsistent with the value observed in the decoded value");
|
||||
|
|
|
|||
|
|
@ -58,7 +58,6 @@ public class GenotypeLikelihoods {
|
|||
static {
|
||||
// must be done before PLIndexToAlleleIndex
|
||||
for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) {
|
||||
//numLikelihoodCache[numAlleles] = new int[NUM_LIKELIHOODS_CACHE_PLOIDY];
|
||||
for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) {
|
||||
numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy);
|
||||
}
|
||||
|
|
@ -364,11 +363,13 @@ public class GenotypeLikelihoods {
|
|||
* S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors)
|
||||
* S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself).
|
||||
*
|
||||
* note that in the case where ploidy == 0 we assume that the ploidy actually == 2
|
||||
*
|
||||
* @param numAlleles Number of alleles (including ref)
|
||||
* @param ploidy Ploidy, or number of chromosomes in set
|
||||
* @return Number of likelihood elements we need to hold.
|
||||
*/
|
||||
@Requires({"ploidy > 0", "numAlleles > 0"})
|
||||
@Requires({"ploidy >= 0", "numAlleles > 0"})
|
||||
@Ensures("result > 0")
|
||||
public static int numLikelihoods(final int numAlleles, final int ploidy) {
|
||||
if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES
|
||||
|
|
|
|||
|
|
@ -416,6 +416,7 @@ public class GenotypesContext implements List<Genotype> {
|
|||
@Ensures("result >= 0")
|
||||
public int getMaxPloidy() {
|
||||
if ( maxPloidy == -1 ) {
|
||||
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||
for ( final Genotype g : getGenotypes() ) {
|
||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue