Fix up broken non-pool UG tests: GenotypeLikelihoods.calcNumLikelihoods now expects total # of alleles, not # of alt ones. Add doc to new function implementation. Add unit test for function. Add unit test for PoolGenotypeLikelihoods (not fully done yet)
This commit is contained in:
parent
5a10f173ea
commit
05d8400468
|
|
@ -250,7 +250,32 @@ public class GenotypeLikelihoods {
|
||||||
return cache;
|
return cache;
|
||||||
}
|
}
|
||||||
|
|
||||||
// how many likelihoods are associated with the given number of alternate alleles?
|
/**
|
||||||
|
* Compute how many likelihood elements are associated with the given number of alleles
|
||||||
|
* Equivalent to asking in how many ways N non-negative integers can add up to P is S(N,P)
|
||||||
|
* where P = ploidy (number of chromosomes) and N = total # of alleles.
|
||||||
|
* Each chromosome can be in one single state (0,...,N-1) and there are P of them.
|
||||||
|
* Naive solution would be to store N*P likelihoods, but this is not necessary because we can't distinguish chromosome states, but rather
|
||||||
|
* only total number of alt allele counts in all chromosomes.
|
||||||
|
*
|
||||||
|
* For example, S(3,2) = 6: For alleles A,B,C, on a diploid organism we have six possible genotypes:
|
||||||
|
* AA,AB,BB,AB,BC,CC.
|
||||||
|
* Another way of expressing is with vector (#of A alleles, # of B alleles, # of C alleles)
|
||||||
|
* which is then, for ordering above, (2,0,0), (1,1,0), (0,2,0), (1,1,0), (0,1,1), (0,0,2)
|
||||||
|
* In general, for P=2 (regular biallelic), then S(N,2) = N*(N+1)/2
|
||||||
|
*
|
||||||
|
* Recursive implementation:
|
||||||
|
* S(N,P) = sum_{k=0}^P S(N-1,P-k)
|
||||||
|
* because if we have N integers, we can condition 1 integer to be = k, and then N-1 integers have to sum to P-K
|
||||||
|
* With initial conditions
|
||||||
|
* S(N,1) = N (only way to have N integers add up to 1 is all-zeros except one element with a one. There are N of these vectors)
|
||||||
|
* S(1,P) = 1 (only way to have 1 integer add to P is with that integer P itself).
|
||||||
|
*
|
||||||
|
* @param numAlleles Number of alleles (including ref)
|
||||||
|
* @param ploidy Ploidy, or number of chromosomes in set
|
||||||
|
* @return Number of likelihood elements we need to hold.
|
||||||
|
*/
|
||||||
|
|
||||||
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
public static int calculateNumLikelihoods(final int numAlleles, final int ploidy) {
|
||||||
|
|
||||||
if (numAlleles == 1)
|
if (numAlleles == 1)
|
||||||
|
|
@ -263,10 +288,7 @@ public class GenotypeLikelihoods {
|
||||||
acc += calculateNumLikelihoods(numAlleles-1, ploidy-k);
|
acc += calculateNumLikelihoods(numAlleles-1, ploidy-k);
|
||||||
|
|
||||||
return acc;
|
return acc;
|
||||||
/* int numLikelihoods = 1;
|
|
||||||
for ( int i = 1; i <= numAltAlleles; i++ )
|
|
||||||
numLikelihoods += i + 1;
|
|
||||||
return numLikelihoods; */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
// As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j.
|
||||||
|
|
|
||||||
|
|
@ -1117,7 +1117,8 @@ public class VariantContextUtils {
|
||||||
altAlleleIndexToUse[i] = true;
|
altAlleleIndexToUse[i] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
// calculateNumLikelihoods takes total # of alleles. Use default # of chromosomes (ploidy) = 2
|
||||||
|
final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(1+numOriginalAltAlleles, UnifiedGenotyperEngine.DEFAULT_PLOIDY);
|
||||||
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) {
|
||||||
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex);
|
||||||
// consider this entry only if both of the alleles are good
|
// consider this entry only if both of the alleles are good
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,17 @@ public class GenotypeLikelihoodsUnitTest {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testCalculateNumLikelihoods() {
|
||||||
|
|
||||||
|
for (int nAlleles=2; nAlleles<=5; nAlleles++)
|
||||||
|
// simplest case: diploid
|
||||||
|
Assert.assertEquals(GenotypeLikelihoods.calculateNumLikelihoods(nAlleles, 2), nAlleles*(nAlleles+1)/2);
|
||||||
|
|
||||||
|
// some special cases: ploidy = 20, #alleles = 4
|
||||||
|
Assert.assertEquals(GenotypeLikelihoods.calculateNumLikelihoods(4, 20), 1771);
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetLog10GQ(){
|
public void testGetLog10GQ(){
|
||||||
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
|
GenotypeLikelihoods gl = new GenotypeLikelihoods(vPLString);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue