Bugfixes to make new AFCalc system pass integrationtests
-- GeneralPloidyExactAFCalc turns -Infinity values into -Double.MAX_VALUE, so our calculations pass unit tests -- Bugfix for GeneralPloidyGenotypeLikelihoodsCalculationModel, return a null VC when the only allele we get from our final alleles to use method is the reference base -- Fix calculation of reference posteriors when P(AF == 0) = 0.0 and P(AF == 0) = X for some meaningful value of X. Added unit test to ensure this behavior is correct -- Fix horrible sorting bug in IndependentAllelesDiploidExactAFCalc that applied the theta^N priors in the wrong order. Add contract to ensure this doesn't ever happen again -- Bugfix in GLBasedSampleSelector, where VCs without any polymorphic alleles were being sent to the exact model --
This commit is contained in:
parent
695cf83675
commit
326f429270
|
|
@ -245,7 +245,7 @@ public abstract class GeneralPloidyGenotypeLikelihoodsCalculationModel extends G
|
|||
|
||||
// find the alternate allele(s) that we should be using
|
||||
final List<Allele> alleles = getFinalAllelesToUse(tracker, ref, allAllelesToUse, GLs);
|
||||
if (alleles == null || alleles.isEmpty())
|
||||
if (alleles == null || alleles.isEmpty() || (alleles.size() == 1 && alleles.get(0).isReference()))
|
||||
return null;
|
||||
// start making the VariantContext
|
||||
final GenomeLoc loc = ref.getLocus();
|
||||
|
|
|
|||
|
|
@ -448,7 +448,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
// update the MLE if necessary
|
||||
final int altCounts[] = Arrays.copyOfRange(set.getACcounts().getCounts(),1, set.getACcounts().getCounts().length);
|
||||
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
||||
getStateTracker().updateMLEifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
|
||||
getStateTracker().updateMLEifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
|
||||
|
||||
// apply the priors over each alternate allele
|
||||
for (final int ACcount : altCounts ) {
|
||||
|
|
@ -456,7 +456,7 @@ public class GeneralPloidyExactAFCalc extends ExactAFCalc {
|
|||
log10LofK += log10AlleleFrequencyPriors[ACcount];
|
||||
}
|
||||
// TODO -- GUILLERMO THIS CODE MAY PRODUCE POSITIVE LIKELIHOODS OR -INFINITY
|
||||
getStateTracker().updateMAPifNeeded(MathUtils.goodLog10Probability(log10LofK) ? log10LofK : MathUtils.LOG10_P_OF_ZERO, altCounts);
|
||||
getStateTracker().updateMAPifNeeded(Math.max(Math.min(log10LofK, 0.0), -Double.MAX_VALUE), altCounts);
|
||||
|
||||
return log10LofK;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -56,6 +56,11 @@ public class AFCalcResultUnitTest extends BaseTest {
|
|||
tests.add(new Object[]{new MyTest(new double[]{-1e-9, badL}, new double[]{0.0, badL})});
|
||||
}
|
||||
|
||||
// test that a non-ref site gets reasonable posteriors with an ~0.0 value doesn't get lost
|
||||
for ( final double nonRefL : Arrays.asList(-100.0, -50.0, -10.0, -9.0, -8.0, -7.0, -6.0, -5.0)) {
|
||||
tests.add(new Object[]{new MyTest(new double[]{0.0, nonRefL}, new double[]{0.0, nonRefL})});
|
||||
}
|
||||
|
||||
return tests.toArray(new Object[][]{});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ public class AFCalcUnitTest extends BaseTest {
|
|||
testResultSimple(cfg);
|
||||
}
|
||||
|
||||
@Test(enabled = true, dataProvider = "badGLs")
|
||||
@Test(enabled = true && !DEBUG_ONLY, dataProvider = "badGLs")
|
||||
public void testBadGLs(GetGLsTest cfg) {
|
||||
testResultSimple(cfg);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -100,7 +100,7 @@ import java.util.*;
|
|||
private final static class CompareAFCalcResultsByPNonRef implements Comparator<AFCalcResult> {
|
||||
@Override
|
||||
public int compare(AFCalcResult o1, AFCalcResult o2) {
|
||||
return Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
|
||||
return -1 * Double.compare(o1.getLog10PosteriorOfAFGT0(), o2.getLog10PosteriorOfAFGT0());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -313,6 +313,7 @@ import java.util.*;
|
|||
*
|
||||
* @param sortedResultsWithThetaNPriors the pNonRef result for each allele independently
|
||||
*/
|
||||
@Requires("sortedByPosteriorGT(sortedResultsWithThetaNPriors)")
|
||||
protected AFCalcResult combineIndependentPNonRefs(final VariantContext vc,
|
||||
final List<AFCalcResult> sortedResultsWithThetaNPriors) {
|
||||
int nEvaluations = 0;
|
||||
|
|
@ -321,8 +322,9 @@ import java.util.*;
|
|||
final double[] log10PriorsOfAC = new double[2];
|
||||
final Map<Allele, Double> log10pNonRefByAllele = new HashMap<Allele, Double>(nAltAlleles);
|
||||
|
||||
// this value is a sum in log space
|
||||
// the sum of the log10 posteriors for AF == 0 and AF > 0 to determine joint probs
|
||||
double log10PosteriorOfACEq0Sum = 0.0;
|
||||
double log10PosteriorOfACGt0Sum = 0.0;
|
||||
|
||||
for ( final AFCalcResult sortedResultWithThetaNPriors : sortedResultsWithThetaNPriors ) {
|
||||
final Allele altAllele = sortedResultWithThetaNPriors.getAllelesUsedInGenotyping().get(1);
|
||||
|
|
@ -337,6 +339,7 @@ import java.util.*;
|
|||
// the AF > 0 case requires us to store the normalized likelihood for later summation
|
||||
if ( sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0() > MIN_LOG10_CONFIDENCE_TO_INCLUDE_ALLELE_IN_POSTERIOR )
|
||||
log10PosteriorOfACEq0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFEq0();
|
||||
log10PosteriorOfACGt0Sum += sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0();
|
||||
|
||||
// bind pNonRef for allele to the posterior value of the AF > 0 with the new adjusted prior
|
||||
log10pNonRefByAllele.put(altAllele, sortedResultWithThetaNPriors.getLog10PosteriorOfAFGT0());
|
||||
|
|
@ -348,7 +351,16 @@ import java.util.*;
|
|||
// In principle, if B_p = x and C_p = y are the probabilities of being poly for alleles B and C,
|
||||
// the probability of being poly is (1 - B_p) * (1 - C_p) = (1 - x) * (1 - y). We want to estimate confidently
|
||||
// log10((1 - x) * (1 - y)) which is log10(1 - x) + log10(1 - y). This sum is log10PosteriorOfACEq0
|
||||
final double log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
|
||||
//
|
||||
// note we need to handle the case where the posterior of AF == 0 is 0.0, in which case we
|
||||
// use the summed log10PosteriorOfACGt0Sum directly. This happens in cases where
|
||||
// AF > 0 : 0.0 and AF == 0 : -16, and if you use the inverse calculation you get 0.0 and MathUtils.LOG10_P_OF_ZERO
|
||||
final double log10PosteriorOfACGt0;
|
||||
if ( log10PosteriorOfACEq0Sum == 0.0 )
|
||||
log10PosteriorOfACGt0 = log10PosteriorOfACGt0Sum;
|
||||
else
|
||||
log10PosteriorOfACGt0 = Math.max(Math.log10(1 - Math.pow(10, log10PosteriorOfACEq0Sum)), MathUtils.LOG10_P_OF_ZERO);
|
||||
|
||||
final double[] log10LikelihoodsOfAC = new double[] {
|
||||
// L + prior = posterior => L = poster - prior
|
||||
log10PosteriorOfACEq0Sum - log10PriorsOfAC[0],
|
||||
|
|
@ -362,4 +374,14 @@ import java.util.*;
|
|||
MathUtils.normalizeFromLog10(log10PriorsOfAC, true),
|
||||
log10pNonRefByAllele, sortedResultsWithThetaNPriors);
|
||||
}
|
||||
|
||||
private static boolean sortedByPosteriorGT(final List<AFCalcResult> sortedVCs) {
|
||||
double lastPosteriorGt0 = sortedVCs.get(0).getLog10PosteriorOfAFGT0();
|
||||
for ( final AFCalcResult vc : sortedVCs ) {
|
||||
if ( vc.getLog10PosteriorOfAFGT0() > lastPosteriorGt0 )
|
||||
return false;
|
||||
lastPosteriorGt0 = vc.getLog10PosteriorOfAFGT0();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -48,6 +48,9 @@ public class GLBasedSampleSelector extends SampleSelector {
|
|||
// first subset to the samples
|
||||
VariantContext subContext = vc.subContextFromSamples(samples);
|
||||
|
||||
if ( ! subContext.isPolymorphicInSamples() )
|
||||
return false;
|
||||
|
||||
// now check to see (using EXACT model) whether this should be variant
|
||||
// do we want to apply a prior? maybe user-spec?
|
||||
if ( flatPriors == null ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue