Massive bugfixes
-- Previous version was reading the size of the encoded genotypes vector for each genotype. This only worked because I never wrote out genotype field values with > 15 elements. Mauricio's killer DiagnoseTargets VCF uncovered the bug. Unfortunately since symbolic allele clipping is still busted those tests are still diabled -- GenotypeContext getMaxPloidy was returning -1 in the case where there are no genotypes, but the answer should be 0.
This commit is contained in:
parent
7144154f53
commit
64d7e93209
|
|
@ -136,6 +136,10 @@ public final class BCF2Decoder {
|
||||||
|
|
||||||
public final Object decodeTypedValue(final byte typeDescriptor) {
|
public final Object decodeTypedValue(final byte typeDescriptor) {
|
||||||
final int size = decodeNumberOfElements(typeDescriptor);
|
final int size = decodeNumberOfElements(typeDescriptor);
|
||||||
|
return decodeTypedValue(typeDescriptor, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
public final Object decodeTypedValue(final byte typeDescriptor, final int size) {
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
|
|
||||||
assert size >= 0;
|
assert size >= 0;
|
||||||
|
|
@ -285,8 +289,7 @@ public final class BCF2Decoder {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public final int[] decodeIntArray(final byte typeDescriptor) {
|
public final int[] decodeIntArray(final byte typeDescriptor, final int size) {
|
||||||
final int size = decodeNumberOfElements(typeDescriptor);
|
|
||||||
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
final BCF2Type type = BCF2Utils.decodeType(typeDescriptor);
|
||||||
return decodeIntArray(size, type, null);
|
return decodeIntArray(size, type, null);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -104,19 +104,17 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
final String field,
|
final String field,
|
||||||
final BCF2Decoder decoder,
|
final BCF2Decoder decoder,
|
||||||
final byte typeDescriptor,
|
final byte typeDescriptor,
|
||||||
|
final int numElements,
|
||||||
final GenotypeBuilder[] gbs);
|
final GenotypeBuilder[] gbs);
|
||||||
}
|
}
|
||||||
|
|
||||||
private class GTDecoder implements Decoder {
|
private class GTDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
// we have to do a bit of low-level processing here as we want to know the size upfronta
|
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && numElements == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
||||||
final int ploidy = decoder.decodeNumberOfElements(typeDescriptor);
|
|
||||||
|
|
||||||
if ( ENABLE_FASTPATH_GT && siteAlleles.size() == 2 && ploidy == 2 && gbs.length >= MIN_SAMPLES_FOR_FASTPATH_GENOTYPES )
|
|
||||||
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
fastBiallelicDiploidDecode(siteAlleles, decoder, typeDescriptor, gbs);
|
||||||
else {
|
else {
|
||||||
generalDecode(siteAlleles, ploidy, decoder, typeDescriptor, gbs);
|
generalDecode(siteAlleles, numElements, decoder, typeDescriptor, gbs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -218,7 +216,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class DPDecoder implements Decoder {
|
private class DPDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
// the -1 is for missing
|
// the -1 is for missing
|
||||||
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
gb.DP(decoder.decodeInt(typeDescriptor, -1));
|
||||||
|
|
@ -228,7 +226,7 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class GQDecoder implements Decoder {
|
private class GQDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
// the -1 is for missing
|
// the -1 is for missing
|
||||||
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
gb.GQ(decoder.decodeInt(typeDescriptor, -1));
|
||||||
|
|
@ -238,27 +236,27 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class ADDecoder implements Decoder {
|
private class ADDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
gb.AD(decoder.decodeIntArray(typeDescriptor));
|
gb.AD(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class PLDecoder implements Decoder {
|
private class PLDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
gb.PL(decoder.decodeIntArray(typeDescriptor));
|
gb.PL(decoder.decodeIntArray(typeDescriptor, numElements));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private class GenericDecoder implements Decoder {
|
private class GenericDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||||
if ( value != null ) { // don't add missing values
|
if ( value != null ) { // don't add missing values
|
||||||
if ( value instanceof List && ((List)value).size() == 1) {
|
if ( value instanceof List && ((List)value).size() == 1) {
|
||||||
// todo -- I really hate this, and it suggests that the code isn't completely right
|
// todo -- I really hate this, and it suggests that the code isn't completely right
|
||||||
|
|
@ -275,9 +273,9 @@ public class BCF2GenotypeFieldDecoders {
|
||||||
|
|
||||||
private class FTDecoder implements Decoder {
|
private class FTDecoder implements Decoder {
|
||||||
@Override
|
@Override
|
||||||
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final GenotypeBuilder[] gbs) {
|
public void decode(final List<Allele> siteAlleles, final String field, final BCF2Decoder decoder, final byte typeDescriptor, final int numElements, final GenotypeBuilder[] gbs) {
|
||||||
for ( final GenotypeBuilder gb : gbs ) {
|
for ( final GenotypeBuilder gb : gbs ) {
|
||||||
Object value = decoder.decodeTypedValue(typeDescriptor);
|
Object value = decoder.decodeTypedValue(typeDescriptor, numElements);
|
||||||
if ( value != null ) { // don't add missing values
|
if ( value != null ) { // don't add missing values
|
||||||
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
gb.filters(value instanceof String ? Collections.singletonList((String)value) : (List<String>)value);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -77,9 +77,10 @@ class BCF2LazyGenotypesDecoder implements LazyGenotypesContext.LazyParser {
|
||||||
|
|
||||||
// the type of each element
|
// the type of each element
|
||||||
final byte typeDescriptor = decoder.readTypeDescriptor();
|
final byte typeDescriptor = decoder.readTypeDescriptor();
|
||||||
|
final int numElements = decoder.decodeNumberOfElements(typeDescriptor);
|
||||||
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
final BCF2GenotypeFieldDecoders.Decoder fieldDecoder = codec.getGenotypeFieldDecoder(field);
|
||||||
try {
|
try {
|
||||||
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, builders);
|
fieldDecoder.decode(siteAlleles, field, decoder, typeDescriptor, numElements, builders);
|
||||||
} catch ( ClassCastException e ) {
|
} catch ( ClassCastException e ) {
|
||||||
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
throw new UserException.MalformedBCF2("BUG: expected encoding of field " + field
|
||||||
+ " inconsistent with the value observed in the decoded value");
|
+ " inconsistent with the value observed in the decoded value");
|
||||||
|
|
|
||||||
|
|
@ -58,7 +58,6 @@ public class GenotypeLikelihoods {
|
||||||
static {
|
static {
|
||||||
// must be done before PLIndexToAlleleIndex
|
// must be done before PLIndexToAlleleIndex
|
||||||
for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) {
|
for ( int numAlleles = 1; numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES; numAlleles++ ) {
|
||||||
//numLikelihoodCache[numAlleles] = new int[NUM_LIKELIHOODS_CACHE_PLOIDY];
|
|
||||||
for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) {
|
for ( int ploidy = 1; ploidy < NUM_LIKELIHOODS_CACHE_PLOIDY; ploidy++ ) {
|
||||||
numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy);
|
numLikelihoodCache[numAlleles][ploidy] = calcNumLikelihoods(numAlleles, ploidy);
|
||||||
}
|
}
|
||||||
|
|
@ -364,11 +363,13 @@ public class GenotypeLikelihoods {
|
||||||
* S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors)
|
* S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors)
|
||||||
* S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself).
|
* S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself).
|
||||||
*
|
*
|
||||||
|
* note that in the case where ploidy == 0 we assume that the ploidy actually == 2
|
||||||
|
*
|
||||||
* @param numAlleles Number of alleles (including ref)
|
* @param numAlleles Number of alleles (including ref)
|
||||||
* @param ploidy Ploidy, or number of chromosomes in set
|
* @param ploidy Ploidy, or number of chromosomes in set
|
||||||
* @return Number of likelihood elements we need to hold.
|
* @return Number of likelihood elements we need to hold.
|
||||||
*/
|
*/
|
||||||
@Requires({"ploidy > 0", "numAlleles > 0"})
|
@Requires({"ploidy >= 0", "numAlleles > 0"})
|
||||||
@Ensures("result > 0")
|
@Ensures("result > 0")
|
||||||
public static int numLikelihoods(final int numAlleles, final int ploidy) {
|
public static int numLikelihoods(final int numAlleles, final int ploidy) {
|
||||||
if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES
|
if ( numAlleles < NUM_LIKELIHOODS_CACHE_N_ALLELES
|
||||||
|
|
|
||||||
|
|
@ -416,6 +416,7 @@ public class GenotypesContext implements List<Genotype> {
|
||||||
@Ensures("result >= 0")
|
@Ensures("result >= 0")
|
||||||
public int getMaxPloidy() {
|
public int getMaxPloidy() {
|
||||||
if ( maxPloidy == -1 ) {
|
if ( maxPloidy == -1 ) {
|
||||||
|
maxPloidy = 0; // necessary in the case where there are no genotypes
|
||||||
for ( final Genotype g : getGenotypes() ) {
|
for ( final Genotype g : getGenotypes() ) {
|
||||||
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
maxPloidy = Math.max(g.getPloidy(), maxPloidy);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue