Bugfixes to make new AFCalc system pass integrationtests

-- GeneralPloidyExactAFCalc turns -Infinity values into -Double.MAX_VALUE, so our calculations pass unit tests
-- Bugfix for GeneralPloidyGenotypeLikelihoodsCalculationModel, return a null VC when the only allele we get from our final alleles to use method is the reference base
-- Fix calculation of reference posteriors when P(AF == 0) = 0.0 and P(AF == 0) = X for some meaningful value of X.  Added unit test to ensure this behavior is correct
-- Fix horrible sorting bug in IndependentAllelesDiploidExactAFCalc that applied the theta^N priors in the wrong order.  Add contract to ensure this doesn't ever happen again
-- Bugfix in GLBasedSampleSelector, where VCs without any polymorphic alleles were being sent to the exact model
--
This commit is contained in:
Mark DePristo 2012-10-19 14:06:41 -04:00
parent 695cf83675
commit 326f429270
6 changed files with 37 additions and 7 deletions

View File

@ -245,7 +245,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
// find the alternate allele(s) that we should be using
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
if (alleles == null || alleles.isEmpty())
if (alleles == null || alleles.isEmpty() || (alleles.size() == 1 && alleles.get(0).isReference()))
return null;
// start making the VariantContext
final GenomeLoc loc = ref.getLocus();

View File

@ -448,7 +448,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
// update the MLE if necessary
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
getStateTracker().updateMLEifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
// apply the priors over each alternate allele
for (final int ACcount : altCounts ) {
@ -456,7 +456,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
log10LofK += log10AlleleFrequencyPriors[ACcount];
}
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
getStateTracker().updateMAPifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
return log10LofK;
}

View File

@ -56,6 +56,11 @@ public class AFCalcResultUnitTest extends BaseTest {
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
}
// test that a non-ref site gets reasonable posteriors with an ~0.0 value doesn't get lost
for ( final double nonRefL : Arrays.asList(-100.0, -50.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0)) {
tests.add(new Object[]{new MyTest(new double[]{0.0, nonRefL}, new double[]{0.0, nonRefL})});
}
return tests.toArray(new Object[][]{});
}

View File

@ -185,7 +185,7 @@ public class AFCalcUnitTest extends BaseTest {
testResultSimple(cfg);
}
@Test(enabled = true, dataProvider = "badGLs")
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "badGLs")
public void testBadGLs(GetGLsTest cfg) {
testResultSimple(cfg);
}

View File

@ -100,7 +100,7 @@ import java.util.*;
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
@Override
public int compare(AFCalcResult o1, AFCalcResult o2) {
return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
}
}
@ -313,6 +313,7 @@ import java.util.*;
*
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
*/
@Requires("sortedByPosteriorGT(sortedResultsWithThetaNPriors)")
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
int nEvaluations = 0;
@ -321,8 +322,9 @@ import java.util.*;
final double[] log10PriorsOfAC = new double[2];
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
// this value is a sum in log space
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
double log10PosteriorOfACEq0Sum = 0.0;
double log10PosteriorOfACGt0Sum = 0.0;
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
@ -337,6 +339,7 @@ import java.util.*;
// the AF > 0 case requires us to store the normalized likelihood for later summation
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
@ -348,7 +351,16 @@ import java.util.*;
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
//
// note we need to handle the case where the posterior of AF == 0 is 0.0, in which case we
// use the summed log10PosteriorOfACGt0Sum directly. This happens in cases where
// AF > 0 : 0.0 and AF == 0 : -16, and if you use the inverse calculation you get 0.0 and MathUtils.LOG10_P_OF_ZERO
final double log10PosteriorOfACGt0;
if ( log10PosteriorOfACEq0Sum == 0.0 )
log10PosteriorOfACGt0 = log10PosteriorOfACGt0Sum;
else
log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
final double[] log10LikelihoodsOfAC = new double[] {
// L + prior = posterior => L = poster - prior
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
@ -362,4 +374,14 @@ import java.util.*;
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
}
private static boolean sortedByPosteriorGT(final List<AFCalcResult> sortedVCs) {
double lastPosteriorGt0 = sortedVCs.get(0).getLog10PosteriorOfAFGT0();
for ( final AFCalcResult vc : sortedVCs ) {
if ( vc.getLog10PosteriorOfAFGT0() > lastPosteriorGt0 )
return false;
lastPosteriorGt0 = vc.getLog10PosteriorOfAFGT0();
}
return true;
}
}

View File

@ -48,6 +48,9 @@ public class GLBasedSampleSelector extends SampleSelector {
// first subset to the samples
VariantContext subContext = vc.subContextFromSamples(samples);
if ( ! subContext.isPolymorphicInSamples() )
return false;
// now check to see (using EXACT model) whether this should be variant
// do we want to apply a prior? maybe user-spec?
if ( flatPriors == null ) {