From 41068b698513ffafa38df40bb9b07ced2fcbd75b Mon Sep 17 00:00:00 2001 From: Eric Banks Date: Thu, 15 Mar 2012 16:08:58 -0400 Subject: [PATCH] The commit constitutes a major refactoring of the UG as far as the genotype likelihoods are concerned. I hate to do this in stable, but the VCFs currently being produced by the UG are totally busted. I am trying to make just the necessary changes in stable, doing everything else in unstable later. Now all GL calculations are unified into the GenotypeLikelihoods class - please try and use this functionality from now on instead of duplicating the code. --- .../walkers/annotator/InbreedingCoeff.java | 2 +- .../genotyper/ExactAFCalculationModel.java | 51 +++++----- ...NPGenotypeLikelihoodsCalculationModel.java | 32 ++++--- .../genotyper/UnifiedGenotyperEngine.java | 43 ++------- .../variantcontext/GenotypeLikelihoods.java | 92 ++++++++++++++++++- .../utils/variantcontext/VariantContext.java | 39 ++------ .../ExactAFCalculationModelUnitTest.java | 4 +- .../GenotypeLikelihoodsUnitTest.java | 34 ++++++- 8 files changed, 178 insertions(+), 119 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java index 640ab036b..6366890d5 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/InbreedingCoeff.java @@ -42,7 +42,7 @@ public class InbreedingCoeff extends InfoFieldAnnotation implements StandardAnno if (!vc.isBiallelic()) { // for non-bliallelic case, do test with most common alt allele. // Get then corresponding indeces in GL vectors to retrieve GL of AA,AB and BB. - int[] idxVector = vc.getGLIndecesOfAllele(vc.getAltAlleleWithHighestAlleleCount()); + int[] idxVector = vc.getGLIndecesOfAlternateAllele(vc.getAltAlleleWithHighestAlleleCount()); idxAA = idxVector[0]; idxAB = idxVector[1]; idxBB = idxVector[2]; diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java index ecdcb4c26..6c7dc0dcd 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModel.java @@ -84,21 +84,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { for ( int i = 0; i < numOriginalAltAlleles; i++ ) likelihoodSums[i] = new LikelihoodSum(vc.getAlternateAllele(i)); - // make sure that we've cached enough data - if ( numOriginalAltAlleles > UnifiedGenotyperEngine.PLIndexToAlleleIndex.length - 1 ) - UnifiedGenotyperEngine.calculatePLcache(numOriginalAltAlleles); - // based on the GLs, find the alternate alleles with the most probability; sum the GLs for the most likely genotype final ArrayList GLs = getGLs(vc.getGenotypes()); for ( final double[] likelihoods : GLs ) { final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); if ( PLindexOfBestGL != PL_INDEX_OF_HOM_REF ) { - int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numOriginalAltAlleles][PLindexOfBestGL]; - if ( alleles[0] != 0 ) - likelihoodSums[alleles[0]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindexOfBestGL); + if ( alleles.alleleIndex1 != 0 ) + likelihoodSums[alleles.alleleIndex1-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; // don't double-count it - if ( alleles[1] != 0 && alleles[1] != alleles[0] ) - likelihoodSums[alleles[1]-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; + if ( alleles.alleleIndex2 != 0 && alleles.alleleIndex2 != alleles.alleleIndex1 ) + likelihoodSums[alleles.alleleIndex2-1].sum += likelihoods[PLindexOfBestGL] - likelihoods[PL_INDEX_OF_HOM_REF]; } } @@ -227,10 +223,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final double[][] log10AlleleFrequencyPriors, final AlleleFrequencyCalculationResult result) { - // make sure the PL cache has been initialized - if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null ) - UnifiedGenotyperEngine.calculatePLcache(5); - final ArrayList genotypeLikelihoods = getGLs(GLs); final int numSamples = genotypeLikelihoods.size()-1; final int numChr = 2*numSamples; @@ -305,15 +297,13 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { final int numAltAlleles = set.ACcounts.getCounts().length; - // genotype likelihoods are a linear vector that can be thought of as a row-wise upper triangular matrix of log10Likelihoods. - // so e.g. with 2 alt alleles the likelihoods are AA,AB,AC,BB,BC,CC and with 3 alt alleles they are AA,AB,AC,AD,BB,BC,BD,CC,CD,DD. - // add conformations for the k+1 case - int PLindex = 0; for ( int allele = 0; allele < numAltAlleles; allele++ ) { final int[] ACcountsClone = set.ACcounts.getCounts().clone(); ACcountsClone[allele]++; - updateACset(ACcountsClone, numChr, set, ++PLindex, ACqueue, indexesToACset, genotypeLikelihoods); + // to get to this conformation, a sample would need to be AB (remember that ref=0) + final int PLindex = GenotypeLikelihoods.calculatePLindex(0, allele+1); + updateACset(ACcountsClone, numChr, set, PLindex, ACqueue, indexesToACset, genotypeLikelihoods); } // add conformations for the k+2 case if it makes sense; note that the 2 new alleles may be the same or different @@ -327,10 +317,12 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { ACcountsClone[allele_i]++; ACcountsClone[allele_j]++; + // to get to this conformation, a sample would need to be BB or BC (remember that ref=0, so add one to the index) + final int PLindex = GenotypeLikelihoods.calculatePLindex(allele_i+1, allele_j+1); if ( allele_i == allele_j ) - sameAlleles.add(new DependentSet(ACcountsClone, ++PLindex)); + sameAlleles.add(new DependentSet(ACcountsClone, PLindex)); else - differentAlleles.add(new DependentSet(ACcountsClone, ++PLindex)); + differentAlleles.add(new DependentSet(ACcountsClone, PLindex)); } } @@ -448,25 +440,26 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel { // BC: 2 * k_b * k_c // CC: k_c * (k_c - 1) - final int numAltAlleles = ACcounts.length; + // find the 2 alleles that are represented by this PL index + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); + + // *** note that throughout this method we subtract one from the alleleIndex because ACcounts *** + // *** doesn't consider the reference allele whereas the GenotypeLikelihoods PL cache does. *** // the AX het case - if ( PLindex <= numAltAlleles ) - return MathUtils.log10Cache[2*ACcounts[PLindex-1]] + MathUtils.log10Cache[2*j-totalK]; + if ( alleles.alleleIndex1 == 0 ) + return MathUtils.log10Cache[2*ACcounts[alleles.alleleIndex2-1]] + MathUtils.log10Cache[2*j-totalK]; - // find the 2 alternate alleles that are represented by this PL index - int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[numAltAlleles][PLindex]; - - final int k_i = ACcounts[alleles[0]-1]; // subtract one because ACcounts doesn't consider the reference allele + final int k_i = ACcounts[alleles.alleleIndex1-1]; // the hom var case (e.g. BB, CC, DD) final double coeff; - if ( alleles[0] == alleles[1] ) { + if ( alleles.alleleIndex1 == alleles.alleleIndex2 ) { coeff = MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_i - 1]; } // the het non-ref case (e.g. BC, BD, CD) else { - final int k_j = ACcounts[alleles[1]-1]; + final int k_j = ACcounts[alleles.alleleIndex2-1]; coeff = MathUtils.log10Cache[2] + MathUtils.log10Cache[k_i] + MathUtils.log10Cache[k_j]; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java index dd21681f0..20b1b0b14 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/SNPGenotypeLikelihoodsCalculationModel.java @@ -53,10 +53,6 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC protected SNPGenotypeLikelihoodsCalculationModel(UnifiedArgumentCollection UAC, Logger logger) { super(UAC, logger); useAlleleFromVCF = UAC.GenotypingMode == GENOTYPING_MODE.GENOTYPE_GIVEN_ALLELES; - - // make sure the PL cache has been initialized with enough alleles - if ( UnifiedGenotyperEngine.PLIndexToAlleleIndex == null || UnifiedGenotyperEngine.PLIndexToAlleleIndex.length < 4 ) // +1 for 0 alt alleles - UnifiedGenotyperEngine.calculatePLcache(3); } public VariantContext getLikelihoods(final RefMetaDataTracker tracker, @@ -133,6 +129,16 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC } builder.alleles(alleles); + // create the PL ordering to use based on the allele ordering. + final int[] PLordering = new int[numLikelihoods]; + for ( int i = 0; i <= numAltAlleles; i++ ) { + for ( int j = i; j <= numAltAlleles; j++ ) { + // As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j. + // In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc." + PLordering[(j * (j+1) / 2) + i] = DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal(); + } + } + // create the genotypes; no-call everyone for now final GenotypesContext genotypes = GenotypesContext.create(); final List noCall = new ArrayList(); @@ -142,12 +148,8 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final double[] allLikelihoods = sampleData.GL.getLikelihoods(); final double[] myLikelihoods = new double[numLikelihoods]; - int myLikelihoodsIndex = 0; - for ( int i = 0; i <= numAltAlleles; i++ ) { - for ( int j = i; j <= numAltAlleles; j++ ) { - myLikelihoods[myLikelihoodsIndex++] = allLikelihoods[DiploidGenotype.createDiploidGenotype(alleleOrdering[i], alleleOrdering[j]).ordinal()]; - } - } + for ( int i = 0; i < numLikelihoods; i++ ) + myLikelihoods[i] = allLikelihoods[PLordering[i]]; // normalize in log space so that max element is zero. final GenotypeLikelihoods likelihoods = GenotypeLikelihoods.fromLog10Likelihoods(MathUtils.normalizeFromLog10(myLikelihoods, false, true)); @@ -174,12 +176,12 @@ public class SNPGenotypeLikelihoodsCalculationModel extends GenotypeLikelihoodsC final double[] likelihoods = sampleData.GL.getLikelihoods(); final int PLindexOfBestGL = MathUtils.maxElementIndex(likelihoods); if ( PLindexOfBestGL != PLindexOfRef ) { - int[] alleles = UnifiedGenotyperEngine.PLIndexToAlleleIndex[3][PLindexOfBestGL]; - if ( alleles[0] != baseIndexOfRef ) - likelihoodSums[alleles[0]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(3, PLindexOfBestGL); + if ( alleles.alleleIndex1 != baseIndexOfRef ) + likelihoodSums[alleles.alleleIndex1] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; // don't double-count it - if ( alleles[1] != baseIndexOfRef && alleles[1] != alleles[0] ) - likelihoodSums[alleles[1]] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; + if ( alleles.alleleIndex2 != baseIndexOfRef && alleles.alleleIndex2 != alleles.alleleIndex1 ) + likelihoodSums[alleles.alleleIndex2] += likelihoods[PLindexOfBestGL] - likelihoods[PLindexOfRef]; } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java index 9f07e97d9..7edcf61a2 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyperEngine.java @@ -104,10 +104,6 @@ public class UnifiedGenotyperEngine { private final GenomeLocParser genomeLocParser; private final boolean BAQEnabledOnCMDLine; - // a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles - // the representation is int[number of alternate alleles][PL index][pair of allele indexes (where reference = 0)] - protected static int[][][] PLIndexToAlleleIndex; - // --------------------------------------------------------------------------------------------------------- // @@ -140,27 +136,6 @@ public class UnifiedGenotyperEngine { genotypePriorsIndels = createGenotypePriors(GenotypeLikelihoodsCalculationModel.Model.INDEL); filter.add(LOW_QUAL_FILTER_NAME); - calculatePLcache(UAC.MAX_ALTERNATE_ALLELES); - } - - protected static void calculatePLcache(int maxAltAlleles) { - PLIndexToAlleleIndex = new int[maxAltAlleles+1][][]; - PLIndexToAlleleIndex[0] = new int[][]{ new int[]{0, 0} }; - int numLikelihoods = 1; - - // for each count of alternate alleles - for ( int altAlleles = 1; altAlleles <= maxAltAlleles; altAlleles++ ) { - numLikelihoods += altAlleles + 1; - PLIndexToAlleleIndex[altAlleles] = new int[numLikelihoods][]; - int PLindex = 0; - - // for all possible combinations of the 2 alt alleles - for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) { - for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) { - PLIndexToAlleleIndex[altAlleles][PLindex++] = new int[]{ allele1, allele2 }; - } - } - } } /** @@ -794,21 +769,17 @@ public class UnifiedGenotyperEngine { if ( numNewAltAlleles != numOriginalAltAlleles && numNewAltAlleles > 0 ) { likelihoodIndexesToUse = new ArrayList(30); - // make sure that we've cached enough data - if ( numOriginalAltAlleles > PLIndexToAlleleIndex.length - 1 ) - calculatePLcache(numOriginalAltAlleles); - final int[][] PLcache = PLIndexToAlleleIndex[numOriginalAltAlleles]; - final boolean[] altAlleleIndexToUse = new boolean[numOriginalAltAlleles]; for ( int i = 0; i < numOriginalAltAlleles; i++ ) { if ( allelesToUse.contains(vc.getAlternateAllele(i)) ) altAlleleIndexToUse[i] = true; } - for ( int PLindex = 0; PLindex < PLcache.length; PLindex++ ) { - final int[] alleles = PLcache[PLindex]; + final int numLikelihoods = GenotypeLikelihoods.calculateNumLikelihoods(numOriginalAltAlleles); + for ( int PLindex = 0; PLindex < numLikelihoods; PLindex++ ) { + final GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); // consider this entry only if both of the alleles are good - if ( (alleles[0] == 0 || altAlleleIndexToUse[alleles[0] - 1]) && (alleles[1] == 0 || altAlleleIndexToUse[alleles[1] - 1]) ) + if ( (alleles.alleleIndex1 == 0 || altAlleleIndexToUse[alleles.alleleIndex1 - 1]) && (alleles.alleleIndex2 == 0 || altAlleleIndexToUse[alleles.alleleIndex2 - 1]) ) likelihoodIndexesToUse.add(PLindex); } } @@ -861,11 +832,11 @@ public class UnifiedGenotyperEngine { protected static Genotype assignGenotype(final Genotype originalGT, final double[] newLikelihoods, final List allelesToUse, final int numNewAltAlleles, final Map attrs) { // find the genotype with maximum likelihoods int PLindex = numNewAltAlleles == 0 ? 0 : MathUtils.maxElementIndex(newLikelihoods); - int[] alleles = PLIndexToAlleleIndex[numNewAltAlleles][PLindex]; + GenotypeLikelihoods.GenotypeLikelihoodsAllelePair alleles = GenotypeLikelihoods.getAllelePair(PLindex); ArrayList myAlleles = new ArrayList(); - myAlleles.add(allelesToUse.get(alleles[0])); - myAlleles.add(allelesToUse.get(alleles[1])); + myAlleles.add(allelesToUse.get(alleles.alleleIndex1)); + myAlleles.add(allelesToUse.get(alleles.alleleIndex2)); final double qual = numNewAltAlleles == 0 ? Genotype.NO_LOG10_PERROR : GenotypeLikelihoods.getQualFromLikelihoods(PLindex, newLikelihoods); return new Genotype(originalGT.getSampleName(), myAlleles, qual, null, attrs, false); diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java index a5e4e5774..54b4cf230 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoods.java @@ -25,13 +25,10 @@ package org.broadinstitute.sting.utils.variantcontext; import org.broad.tribble.TribbleException; -import org.broadinstitute.sting.gatk.io.DirectOutputTracker; import org.broadinstitute.sting.utils.MathUtils; import org.broadinstitute.sting.utils.codecs.vcf.VCFConstants; -import org.jgrapht.util.MathUtil; import java.util.EnumMap; -import java.util.Map; public class GenotypeLikelihoods { public static final boolean CAP_PLS = false; @@ -201,4 +198,93 @@ public class GenotypeLikelihoods { return s.toString(); } + + + // ------------------------------------------------------------------------------------- + // + // Static conversion utilities, going from GL/PL index to allele index and vice versa. + // + // ------------------------------------------------------------------------------------- + + /* + * Class representing the 2 alleles (or rather their indexes into VariantContext.getAllele()) corresponding to a specific PL index. + * Note that the reference allele is always index=0. + */ + public static class GenotypeLikelihoodsAllelePair { + public final int alleleIndex1, alleleIndex2; + + public GenotypeLikelihoodsAllelePair(final int alleleIndex1, final int alleleIndex2) { + this.alleleIndex1 = alleleIndex1; + this.alleleIndex2 = alleleIndex2; + } + } + + /* + * a cache of the PL index to the 2 alleles it represents over all possible numbers of alternate alleles + */ + private static GenotypeLikelihoodsAllelePair[] PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[]{ new GenotypeLikelihoodsAllelePair(0, 0) }; + + private static void calculatePLcache(final int minIndex) { + // how many alternate alleles do we need to calculate for? + int altAlleles = 0; + int numLikelihoods = 1; + while ( numLikelihoods <= minIndex ) { + altAlleles++; + numLikelihoods += altAlleles + 1; + } + + PLIndexToAlleleIndex = new GenotypeLikelihoodsAllelePair[numLikelihoods]; + + // for all possible combinations of 2 alleles + for ( int allele1 = 0; allele1 <= altAlleles; allele1++ ) { + for ( int allele2 = allele1; allele2 <= altAlleles; allele2++ ) { + PLIndexToAlleleIndex[calculatePLindex(allele1, allele2)] = new GenotypeLikelihoodsAllelePair(allele1, allele2); + } + } + } + + // how many likelihoods are associated with the given number of alternate alleles? + public static int calculateNumLikelihoods(int numAltAlleles) { + int numLikelihoods = 1; + for ( int i = 1; i <= numAltAlleles; i++ ) + numLikelihoods += i + 1; + return numLikelihoods; + } + + // As per the VCF spec: "the ordering of genotypes for the likelihoods is given by: F(j/k) = (k*(k+1)/2)+j. + // In other words, for biallelic sites the ordering is: AA,AB,BB; for triallelic sites the ordering is: AA,AB,BB,AC,BC,CC, etc." + // Assumes that allele1Index < allele2Index + public static int calculatePLindex(final int allele1Index, final int allele2Index) { + return (allele2Index * (allele2Index+1) / 2) + allele1Index; + } + + /** + * get the allele index pair for the given PL + * + * @param PLindex the PL index + * @return the allele index pair + */ + public static GenotypeLikelihoodsAllelePair getAllelePair(final int PLindex) { + // make sure that we've cached enough data + if ( PLindex >= PLIndexToAlleleIndex.length ) + calculatePLcache(PLindex); + + return PLIndexToAlleleIndex[PLindex]; + } + + /** + * get the PL indexes (AA, AB, BB) for the given allele pair; assumes allele1Index <= allele2Index. + * + * @param allele1Index the index in VariantContext.getAllele() of the first allele + * @param allele2Index the index in VariantContext.getAllele() of the second allele + * @return the PL indexes + */ + public static int[] getPLIndecesOfAlleles(final int allele1Index, final int allele2Index) { + + final int[] indexes = new int[3]; + indexes[0] = calculatePLindex(allele1Index, allele1Index); + indexes[1] = calculatePLindex(allele1Index, allele2Index); + indexes[2] = calculatePLindex(allele2Index, allele2Index); + return indexes; + } } diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java index f5c57ca44..e2e46cbe9 100755 --- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java +++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java @@ -1246,40 +1246,15 @@ public class VariantContext implements Feature { // to enable tribble intergrati return best; } - public int[] getGLIndecesOfAllele(Allele inputAllele) { + public int[] getGLIndecesOfAlternateAllele(Allele targetAllele) { - // TODO -- this information is cached statically by the UnifiedGenotyperEngine; pull it out into a common utils class for all to use - - int[] idxVector = new int[3]; - int numAlleles = this.getAlleles().size(); - - int idxDiag = numAlleles; - int incr = numAlleles - 1; - int k=1; - for (Allele a: getAlternateAlleles()) { - // multi-allelic approximation, part 1: Ideally - // for each alt allele compute marginal (suboptimal) posteriors - - // compute indices for AA,AB,BB for current allele - genotype likelihoods are a linear vector that can be thought of - // as a row-wise upper triangular matrix of likelihoods. - // So, for example, with 2 alt alleles, likelihoods have AA,AB,AC,BB,BC,CC. - // 3 alt alleles: AA,AB,AC,AD BB BC BD CC CD DD - - int idxAA = 0; - int idxAB = k++; - // yy is always element on the diagonal. - // 2 alleles: BBelement 2 - // 3 alleles: BB element 3. CC element 5 - // 4 alleles: - int idxBB = idxDiag; - - if (a.equals(inputAllele)) { - idxVector[0] = idxAA; - idxVector[1] = idxAB; - idxVector[2] = idxBB; + int index = 1; + for ( Allele allele : getAlternateAlleles() ) { + if ( allele.equals(targetAllele) ) break; - } - idxDiag += incr--; + index++; } - return idxVector; + + return GenotypeLikelihoods.getPLIndecesOfAlleles(0, index); } } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java index 6f259f699..c7d196b53 100644 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/genotyper/ExactAFCalculationModelUnitTest.java @@ -27,8 +27,8 @@ public class ExactAFCalculationModelUnitTest extends BaseTest { BB1 = new double[]{-20.0, -20.0, 0.0}; AA2 = new double[]{0.0, -20.0, -20.0, -20.0, -20.0, -20.0}; AB2 = new double[]{-20.0, 0.0, -20.0, -20.0, -20.0, -20.0}; - AC2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; - BB2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; + AC2 = new double[]{-20.0, -20.0, -20.0, 0.0, -20.0, -20.0}; + BB2 = new double[]{-20.0, -20.0, 0.0, -20.0, -20.0, -20.0}; BC2 = new double[]{-20.0, -20.0, -20.0, -20.0, 0.0, -20.0}; CC2 = new double[]{-20.0, -20.0, -20.0, -20.0, -20.0, 0.0}; } diff --git a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java index a66c78f3c..6320a140d 100755 --- a/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java +++ b/public/java/test/org/broadinstitute/sting/utils/variantcontext/GenotypeLikelihoodsUnitTest.java @@ -117,7 +117,7 @@ public class GenotypeLikelihoodsUnitTest { } @Test - public void testgetQualFromLikelihoods(){ + public void testgetQualFromLikelihoods() { double[] likelihoods = new double[]{-1, 0, -2}; // qual values we expect for each possible "best" genotype double[] expectedQuals = new double[]{-0.04100161, -1, -0.003930294}; @@ -134,4 +134,36 @@ public class GenotypeLikelihoodsUnitTest { Assert.assertEquals(v1[i], v2[i], 1e-6); } } + + @Test + public void testCalculatePLindex(){ + // AA,AB,BB,AC,BC,CC,AD,BD,CD,DD called in the order of AA,AB,AC,AD,BB,BC,BD,CC,CD,DD + int[] indexes = new int[]{0, 1, 3, 6, 2, 4, 7, 5, 8, 9}; + + int counter = 0; + for ( int i = 0; i <= 3; i++ ) { + for ( int j = i; j <= 3; j++ ) { + Assert.assertEquals(GenotypeLikelihoods.calculatePLindex(i, j), indexes[counter++], "PL index of alleles " + i + "," + j + " was not calculated correctly"); + } + } + } + + @Test + public void testGetAllelePair(){ + allelePairTest(0, 0, 0); + allelePairTest(1, 0, 1); + allelePairTest(2, 1, 1); + allelePairTest(3, 0, 2); + allelePairTest(4, 1, 2); + allelePairTest(5, 2, 2); + allelePairTest(6, 0, 3); + allelePairTest(7, 1, 3); + allelePairTest(8, 2, 3); + allelePairTest(9, 3, 3); + } + + private void allelePairTest(int PLindex, int allele1, int allele2) { + Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex1, allele1, "allele index " + allele1 + " from PL index " + PLindex + " was not calculated correctly"); + Assert.assertEquals(GenotypeLikelihoods.getAllelePair(PLindex).alleleIndex2, allele2, "allele index " + allele2 + " from PL index " + PLindex + " was not calculated correctly"); + } } \ No newline at end of file