Fixing edge case bug in the Exact model (both standard and generalized) where we could abort prematurely in the special case of multiple polymorphic alleles and samples with widely different depths of coverage (e.g. exome and low-pass). In these cases it was possible to call the site bi-allelic when in fact it was multi-allelic (but it wouldn't cause it to create a monomorphic call).

This commit is contained in:
Eric Banks 2012-09-20 10:59:42 -04:00
parent ccb65a03e8
commit 4b7edc72d1
3 changed files with 42 additions and 13 deletions

View File

@ -224,12 +224,16 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset ACset = ACqueue.remove();
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLog10L, ACqueue, indexesToACset);
maxLog10L = Math.max(maxLog10L, log10LofKs);
final double log10LofKs = calculateACConformationAndUpdateQueue(ACset, newPool, originalPool, newGL, log10AlleleFrequencyPriors, originalPloidy, newGLPloidy, result, maxLikelihoodSeen, ACqueue, indexesToACset);
// adjust max likelihood seen if needed
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, ACset.ACcounts);
// clean up memory
indexesToACset.remove(ACset.ACcounts);
if ( VERBOSE )
@ -250,7 +254,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
* @param originalPloidy Total ploidy of original combined pool
* @param newGLPloidy Ploidy of GL vector
* @param result AFResult object
* @param maxLog10L max likelihood observed so far
* @param maxLikelihoodSeen max likelihood observed so far
* @param ACqueue Queue of conformations to compute
* @param indexesToACset AC indices of objects in queue
* @return max log likelihood
@ -263,7 +267,7 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
final int originalPloidy,
final int newGLPloidy,
final AlleleFrequencyCalculationResult result,
final double maxLog10L,
final MaxLikelihoodSeen maxLikelihoodSeen,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset) {
@ -277,9 +281,9 @@ public class GeneralPloidyExactAFCalculationModel extends AlleleFrequencyCalcula
if (!Double.isInfinite(log10LofK))
newPool.add(set);
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
if ( VERBOSE )
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLikelihoodSeen.maxLog10L);
return log10LofK;
}

View File

@ -204,4 +204,28 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
return (obj instanceof ExactACset) && ACcounts.equals(((ExactACset)obj).ACcounts);
}
}
protected static final class MaxLikelihoodSeen {
double maxLog10L = Double.NEGATIVE_INFINITY;
ExactACcounts ACs = null;
public MaxLikelihoodSeen() {}
public void update(final double maxLog10L, final ExactACcounts ACs) {
this.maxLog10L = maxLog10L;
this.ACs = ACs;
}
// returns true iff all ACs in this object are less than or equal to their corresponding ACs in the provided set
public boolean isLowerAC(final ExactACcounts otherACs) {
final int[] myACcounts = this.ACs.getCounts();
final int[] otherACcounts = otherACs.getCounts();
for ( int i = 0; i < myACcounts.length; i++ ) {
if ( myACcounts[i] > otherACcounts[i] )
return false;
}
return true;
}
}
}

View File

@ -68,7 +68,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static final int PL_INDEX_OF_HOM_REF = 0;
private static final List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
private static List<Allele> chooseMostLikelyAlternateAlleles(VariantContext vc, int numAllelesToChoose) {
final int numOriginalAltAlleles = vc.getAlternateAlleles().size();
final LikelihoodSum[] likelihoodSums = new LikelihoodSum[numOriginalAltAlleles];
for ( int i = 0; i < numOriginalAltAlleles; i++ )
@ -132,14 +132,15 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
indexesToACset.put(zeroSet.ACcounts, zeroSet);
// keep processing while we have AC conformations that need to be calculated
double maxLog10L = Double.NEGATIVE_INFINITY;
MaxLikelihoodSeen maxLikelihoodSeen = new MaxLikelihoodSeen();
while ( !ACqueue.isEmpty() ) {
// compute log10Likelihoods
final ExactACset set = ACqueue.remove();
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLikelihoodSeen, numChr, ACqueue, indexesToACset, log10AlleleFrequencyPriors, result);
// adjust max likelihood seen if needed
maxLog10L = Math.max(maxLog10L, log10LofKs);
if ( log10LofKs > maxLikelihoodSeen.maxLog10L )
maxLikelihoodSeen.update(log10LofKs, set.ACcounts);
// clean up memory
indexesToACset.remove(set.ACcounts);
@ -160,7 +161,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
private static double calculateAlleleCountConformation(final ExactACset set,
final ArrayList<double[]> genotypeLikelihoods,
final double maxLog10L,
final MaxLikelihoodSeen maxLikelihoodSeen,
final int numChr,
final LinkedList<ExactACset> ACqueue,
final HashMap<ExactACcounts, ExactACset> indexesToACset,
@ -176,7 +177,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
// can we abort early because the log10Likelihoods are so small?
if ( log10LofK < maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY ) {
if ( log10LofK < maxLikelihoodSeen.maxLog10L - MAX_LOG10_ERROR_TO_STOP_EARLY && maxLikelihoodSeen.isLowerAC(set.ACcounts) ) {
//if ( DEBUG )
// System.out.printf(" *** breaking early set=%s log10L=%.2f maxLog10L=%.2f%n", set.ACcounts, log10LofK, maxLog10L);
return log10LofK;