Bugfixes to make new AFCalc system pass integrationtests
-- GeneralPloidyExactAFCalc turns -Infinity values into -Double.MAX_VALUE, so our calculations pass unit tests -- Bugfix for GeneralPloidyGenotypeLikelihoodsCalculationModel, return a null VC when the only allele we get from our final alleles to use method is the reference base -- Fix calculation of reference posteriors when P(AF == 0) = 0.0 and P(AF == 0) = X for some meaningful value of X. Added unit test to ensure this behavior is correct -- Fix horrible sorting bug in IndependentAllelesDiploidExactAFCalc that applied the theta^N priors in the wrong order. Add contract to ensure this doesn't ever happen again -- Bugfix in GLBasedSampleSelector, where VCs without any polymorphic alleles were being sent to the exact model --
This commit is contained in:
parent
695cf83675
commit
326f429270
|
|
@ -245,7 +245,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
||||||
|
|
||||||
// find the alternate allele(s) that we should be using
|
// find the alternate allele(s) that we should be using
|
||||||
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
||||||
if (alleles == null || alleles.isEmpty())
|
if (alleles == null || alleles.isEmpty() || (alleles.size() == 1 && alleles.get(0).isReference()))
|
||||||
return null;
|
return null;
|
||||||
// start making the VariantContext
|
// start making the VariantContext
|
||||||
final GenomeLoc loc = ref.getLocus();
|
final GenomeLoc loc = ref.getLocus();
|
||||||
|
|
|
||||||
|
|
@ -448,7 +448,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
||||||
// update the MLE if necessary
|
// update the MLE if necessary
|
||||||
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
||||||
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
||||||
getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
|
getStateTracker().updateMLEifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
|
||||||
|
|
||||||
// apply the priors over each alternate allele
|
// apply the priors over each alternate allele
|
||||||
for (final int ACcount : altCounts ) {
|
for (final int ACcount : altCounts ) {
|
||||||
|
|
@ -456,7 +456,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
||||||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||||
}
|
}
|
||||||
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
||||||
getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
|
getStateTracker().updateMAPifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
|
||||||
|
|
||||||
return log10LofK;
|
return log10LofK;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -56,6 +56,11 @@ public class AFCalcResultUnitTest extends BaseTest {
|
||||||
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
|
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// test that a non-ref site gets reasonable posteriors with an ~0.0 value doesn't get lost
|
||||||
|
for ( final double nonRefL : Arrays.asList(-100.0, -50.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0)) {
|
||||||
|
tests.add(new Object[]{new MyTest(new double[]{0.0, nonRefL}, new double[]{0.0, nonRefL})});
|
||||||
|
}
|
||||||
|
|
||||||
return tests.toArray(new Object[][]{});
|
return tests.toArray(new Object[][]{});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -185,7 +185,7 @@ public class AFCalcUnitTest extends BaseTest {
|
||||||
testResultSimple(cfg);
|
testResultSimple(cfg);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test(enabled = true, dataProvider = "badGLs")
|
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "badGLs")
|
||||||
public void testBadGLs(GetGLsTest cfg) {
|
public void testBadGLs(GetGLsTest cfg) {
|
||||||
testResultSimple(cfg);
|
testResultSimple(cfg);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -100,7 +100,7 @@ import java.util.*;
|
||||||
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
|
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
|
||||||
@Override
|
@Override
|
||||||
public int compare(AFCalcResult o1, AFCalcResult o2) {
|
public int compare(AFCalcResult o1, AFCalcResult o2) {
|
||||||
return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
|
return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -313,6 +313,7 @@ import java.util.*;
|
||||||
*
|
*
|
||||||
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
||||||
*/
|
*/
|
||||||
|
@Requires("sortedByPosteriorGT(sortedResultsWithThetaNPriors)")
|
||||||
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
||||||
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
|
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
|
||||||
int nEvaluations = 0;
|
int nEvaluations = 0;
|
||||||
|
|
@ -321,8 +322,9 @@ import java.util.*;
|
||||||
final double[] log10PriorsOfAC = new double[2];
|
final double[] log10PriorsOfAC = new double[2];
|
||||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||||
|
|
||||||
// this value is a sum in log space
|
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
|
||||||
double log10PosteriorOfACEq0Sum = 0.0;
|
double log10PosteriorOfACEq0Sum = 0.0;
|
||||||
|
double log10PosteriorOfACGt0Sum = 0.0;
|
||||||
|
|
||||||
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
||||||
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
||||||
|
|
@ -337,6 +339,7 @@ import java.util.*;
|
||||||
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
||||||
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
||||||
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
||||||
|
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||||
|
|
||||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||||
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
||||||
|
|
@ -348,7 +351,16 @@ import java.util.*;
|
||||||
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
|
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
|
||||||
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
|
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
|
||||||
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
|
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
|
||||||
final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
|
//
|
||||||
|
// note we need to handle the case where the posterior of AF == 0 is 0.0, in which case we
|
||||||
|
// use the summed log10PosteriorOfACGt0Sum directly. This happens in cases where
|
||||||
|
// AF > 0 : 0.0 and AF == 0 : -16, and if you use the inverse calculation you get 0.0 and MathUtils.LOG10_P_OF_ZERO
|
||||||
|
final double log10PosteriorOfACGt0;
|
||||||
|
if ( log10PosteriorOfACEq0Sum == 0.0 )
|
||||||
|
log10PosteriorOfACGt0 = log10PosteriorOfACGt0Sum;
|
||||||
|
else
|
||||||
|
log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
|
||||||
|
|
||||||
final double[] log10LikelihoodsOfAC = new double[] {
|
final double[] log10LikelihoodsOfAC = new double[] {
|
||||||
// L + prior = posterior => L = poster - prior
|
// L + prior = posterior => L = poster - prior
|
||||||
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
|
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
|
||||||
|
|
@ -362,4 +374,14 @@ import java.util.*;
|
||||||
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
|
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
|
||||||
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static boolean sortedByPosteriorGT(final List<AFCalcResult> sortedVCs) {
|
||||||
|
double lastPosteriorGt0 = sortedVCs.get(0).getLog10PosteriorOfAFGT0();
|
||||||
|
for ( final AFCalcResult vc : sortedVCs ) {
|
||||||
|
if ( vc.getLog10PosteriorOfAFGT0() > lastPosteriorGt0 )
|
||||||
|
return false;
|
||||||
|
lastPosteriorGt0 = vc.getLog10PosteriorOfAFGT0();
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -48,6 +48,9 @@ public class GLBasedSampleSelector extends SampleSelector {
|
||||||
// first subset to the samples
|
// first subset to the samples
|
||||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||||
|
|
||||||
|
if ( ! subContext.isPolymorphicInSamples() )
|
||||||
|
return false;
|
||||||
|
|
||||||
// now check to see (using EXACT model) whether this should be variant
|
// now check to see (using EXACT model) whether this should be variant
|
||||||
// do we want to apply a prior? maybe user-spec?
|
// do we want to apply a prior? maybe user-spec?
|
||||||
if ( flatPriors == null ) {
|
if ( flatPriors == null ) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue