The Exact model now computes both the likelihoods and posteriors (in separate arrays); likelihoods are used for assigning genotypes, not the posteriors.
This commit is contained in:
parent
aa4a8c5303
commit
442ceb6ad9
|
|
@ -62,24 +62,26 @@ public abstract class AlleleFrequencyCalculationModel implements Cloneable {
|
|||
|
||||
/**
|
||||
* Must be overridden by concrete subclasses
|
||||
* @param GLs genotype likelihoods
|
||||
* @param Alleles Alleles corresponding to GLs
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store results
|
||||
* @param GLs genotype likelihoods
|
||||
* @param Alleles Alleles corresponding to GLs
|
||||
* @param log10AlleleFrequencyPriors priors
|
||||
* @param log10AlleleFrequencyLikelihoods array (pre-allocated) to store likelihoods results
|
||||
* @param log10AlleleFrequencyPosteriors array (pre-allocated) to store posteriors results
|
||||
*/
|
||||
protected abstract void getLog10PNonRef(GenotypesContext GLs, List<Allele> Alleles,
|
||||
double[][] log10AlleleFrequencyPriors,
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
double[][] log10AlleleFrequencyPosteriors);
|
||||
|
||||
/**
|
||||
* Can be overridden by concrete subclasses
|
||||
* @param vc variant context with genotype likelihoods
|
||||
* @param log10AlleleFrequencyPosteriors allele frequency results
|
||||
* @param log10AlleleFrequencyLikelihoods allele frequency results
|
||||
* @param AFofMaxLikelihood allele frequency of max likelihood
|
||||
*
|
||||
* @return calls
|
||||
*/
|
||||
protected abstract GenotypesContext assignGenotypes(VariantContext vc,
|
||||
double[][] log10AlleleFrequencyPosteriors,
|
||||
int AFofMaxLikelihood);
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
int AFofMaxLikelihood);
|
||||
}
|
||||
|
|
@ -52,15 +52,17 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
USE_MULTI_ALLELIC_CALCULATION = UAC.MULTI_ALLELIC;
|
||||
}
|
||||
|
||||
public void getLog10PNonRef(GenotypesContext GLs, List<Allele> alleles,
|
||||
double[][] log10AlleleFrequencyPriors,
|
||||
double[][] log10AlleleFrequencyPosteriors) {
|
||||
public void getLog10PNonRef(final GenotypesContext GLs,
|
||||
final List<Allele> alleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
final int numAlleles = alleles.size();
|
||||
|
||||
if ( USE_MULTI_ALLELIC_CALCULATION )
|
||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors, false);
|
||||
linearExactMultiAllelic(GLs, numAlleles - 1, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||
else
|
||||
linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyPosteriors);
|
||||
linearExact(GLs, log10AlleleFrequencyPriors[0], log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
}
|
||||
|
||||
private static final ArrayList<double[]> getGLs(GenotypesContext GLs) {
|
||||
|
|
@ -125,6 +127,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
public int linearExact(GenotypesContext GLs,
|
||||
double[] log10AlleleFrequencyPriors,
|
||||
double[][] log10AlleleFrequencyLikelihoods,
|
||||
double[][] log10AlleleFrequencyPosteriors) {
|
||||
final ArrayList<double[]> genotypeLikelihoods = getGLs(GLs);
|
||||
final int numSamples = genotypeLikelihoods.size()-1;
|
||||
|
|
@ -176,6 +179,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
|
||||
// update the posteriors vector
|
||||
final double log10LofK = kMinus0[numSamples];
|
||||
log10AlleleFrequencyLikelihoods[0][k] = log10LofK;
|
||||
log10AlleleFrequencyPosteriors[0][k] = log10LofK + log10AlleleFrequencyPriors[k];
|
||||
|
||||
// can we abort early?
|
||||
|
|
@ -320,6 +324,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
public static void linearExactMultiAllelic(final GenotypesContext GLs,
|
||||
final int numAlternateAlleles,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors,
|
||||
final boolean preserveData) {
|
||||
|
||||
|
|
@ -344,7 +349,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
while ( !ACqueue.isEmpty() ) {
|
||||
// compute log10Likelihoods
|
||||
final ExactACset set = ACqueue.remove();
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
||||
final double log10LofKs = calculateAlleleCountConformation(set, genotypeLikelihoods, maxLog10L, numChr, preserveData, ACqueue, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
|
||||
// adjust max likelihood seen if needed
|
||||
maxLog10L = Math.max(maxLog10L, log10LofKs);
|
||||
|
|
@ -359,13 +364,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final Queue<ExactACset> ACqueue,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
|
||||
if ( DEBUG )
|
||||
System.out.printf(" *** computing LofK for set=%s%n", set.ACcounts);
|
||||
|
||||
// compute the log10Likelihoods
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors);
|
||||
computeLofK(set, genotypeLikelihoods, indexesToACset, log10AlleleFrequencyPriors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors);
|
||||
|
||||
// clean up memory
|
||||
if ( !preserveData ) {
|
||||
|
|
@ -471,6 +477,7 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
final ArrayList<double[]> genotypeLikelihoods,
|
||||
final HashMap<ExactACcounts, ExactACset> indexesToACset,
|
||||
final double[][] log10AlleleFrequencyPriors,
|
||||
final double[][] log10AlleleFrequencyLikelihoods,
|
||||
final double[][] log10AlleleFrequencyPosteriors) {
|
||||
|
||||
set.log10Likelihoods[0] = 0.0; // the zero case
|
||||
|
|
@ -517,7 +524,6 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
}
|
||||
}
|
||||
|
||||
// update the posteriors vector
|
||||
final double log10LofK = set.log10Likelihoods[set.log10Likelihoods.length-1];
|
||||
|
||||
// determine the power of theta to use
|
||||
|
|
@ -527,11 +533,14 @@ public class ExactAFCalculationModel extends AlleleFrequencyCalculationModel {
|
|||
nonRefAlleles++;
|
||||
}
|
||||
|
||||
// update the posteriors vector which is a collapsed view of each of the various ACs
|
||||
// update the likelihoods/posteriors vectors which are collapsed views of each of the various ACs
|
||||
for ( int i = 0; i < set.ACcounts.getCounts().length; i++ ) {
|
||||
int AC = set.ACcounts.getCounts()[i];
|
||||
log10AlleleFrequencyLikelihoods[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyLikelihoods[i][AC], log10LofK);
|
||||
|
||||
// for k=0 we still want to use theta
|
||||
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][set.ACcounts.getCounts()[i]];
|
||||
log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][set.ACcounts.getCounts()[i]], log10LofK + prior);
|
||||
final double prior = (nonRefAlleles == 0) ? log10AlleleFrequencyPriors[0][0] : log10AlleleFrequencyPriors[nonRefAlleles-1][AC];
|
||||
log10AlleleFrequencyPosteriors[i][AC] = approximateLog10SumLog10(log10AlleleFrequencyPosteriors[i][AC], log10LofK + prior);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ public class UnifiedGenotyperEngine {
|
|||
private final double[][] log10AlleleFrequencyPriorsIndels;
|
||||
|
||||
// the allele frequency likelihoods (allocated once as an optimization)
|
||||
private ThreadLocal<double[][]> log10AlleleFrequencyLikelihoods = new ThreadLocal<double[][]>();
|
||||
private ThreadLocal<double[][]> log10AlleleFrequencyPosteriors = new ThreadLocal<double[][]>();
|
||||
|
||||
// the priors object
|
||||
|
|
@ -294,6 +295,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
}
|
||||
|
|
@ -311,8 +313,9 @@ public class UnifiedGenotyperEngine {
|
|||
generateEmptyContext(tracker, refContext, stratifiedContexts, rawContext));
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]);
|
||||
|
|
@ -350,7 +353,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// create the genotypes
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess);
|
||||
|
||||
// print out stats if we have a writer
|
||||
if ( verboseWriter != null )
|
||||
|
|
@ -369,16 +372,18 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the overall lod
|
||||
VariantContext vcOverall = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.COMPLETE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcOverall.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//double overallLog10PofNull = log10AlleleFrequencyPosteriors.get()[0];
|
||||
double overallLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
//if ( DEBUG_SLOD ) System.out.println("overallLog10PofF=" + overallLog10PofF);
|
||||
|
||||
// the forward lod
|
||||
VariantContext vcForward = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.FORWARD, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcForward.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//double[] normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double forwardLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||
double forwardLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
|
|
@ -386,8 +391,9 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// the reverse lod
|
||||
VariantContext vcReverse = calculateLikelihoods(tracker, refContext, stratifiedContexts, AlignmentContextUtils.ReadOrientation.REVERSE, vc.getAlternateAllele(0), false, model);
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vcReverse.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
//normalizedLog10Posteriors = MathUtils.normalizeFromLog10(log10AlleleFrequencyPosteriors.get(), true);
|
||||
double reverseLog10PofNull = log10AlleleFrequencyPosteriors.get()[0][0];
|
||||
double reverseLog10PofF = MathUtils.log10sumLog10(log10AlleleFrequencyPosteriors.get()[0], 1);
|
||||
|
|
@ -445,6 +451,7 @@ public class UnifiedGenotyperEngine {
|
|||
|
||||
// initialize the data for this thread if that hasn't been done yet
|
||||
if ( afcm.get() == null ) {
|
||||
log10AlleleFrequencyLikelihoods.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
log10AlleleFrequencyPosteriors.set(new double[UAC.MAX_ALTERNATE_ALLELES][N+1]);
|
||||
afcm.set(getAlleleFrequencyCalculationObject(N, logger, verboseWriter, UAC));
|
||||
}
|
||||
|
|
@ -460,8 +467,9 @@ public class UnifiedGenotyperEngine {
|
|||
return null;
|
||||
|
||||
// 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
|
||||
clearAFarray(log10AlleleFrequencyLikelihoods.get());
|
||||
clearAFarray(log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyPosteriors.get());
|
||||
afcm.get().getLog10PNonRef(vc.getGenotypes(), vc.getAlleles(), getAlleleFrequencyPriors(model), log10AlleleFrequencyLikelihoods.get(), log10AlleleFrequencyPosteriors.get());
|
||||
|
||||
// find the most likely frequency
|
||||
int bestAFguess = MathUtils.maxElementIndex(log10AlleleFrequencyPosteriors.get()[0]);
|
||||
|
|
@ -499,7 +507,7 @@ public class UnifiedGenotyperEngine {
|
|||
}
|
||||
|
||||
// create the genotypes
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyPosteriors.get(), bestAFguess);
|
||||
GenotypesContext genotypes = afcm.get().assignGenotypes(vc, log10AlleleFrequencyLikelihoods.get(), bestAFguess);
|
||||
|
||||
// *** note that calculating strand bias involves overwriting data structures, so we do that last
|
||||
HashMap<String, Object> attributes = new HashMap<String, Object>();
|
||||
|
|
|
|||
|
|
@ -83,14 +83,16 @@ public class ExactAFCalculationModelUnitTest extends BaseTest {
|
|||
@Test(dataProvider = "getGLs")
|
||||
public void testGLs(GetGLsTest cfg) {
|
||||
|
||||
final double[][] log10AlleleFrequencyLikelihoods = new double[2][2*numSamples+1];
|
||||
final double[][] log10AlleleFrequencyPosteriors = new double[2][2*numSamples+1];
|
||||
for ( int i = 0; i < 2; i++ ) {
|
||||
for ( int j = 0; j < 2*numSamples+1; j++ ) {
|
||||
log10AlleleFrequencyLikelihoods[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
log10AlleleFrequencyPosteriors[i][j] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
|
||||
}
|
||||
}
|
||||
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyPosteriors, false);
|
||||
ExactAFCalculationModel.linearExactMultiAllelic(cfg.GLs, cfg.numAltAlleles, priors, log10AlleleFrequencyLikelihoods, log10AlleleFrequencyPosteriors, false);
|
||||
|
||||
int nameIndex = 1;
|
||||
for ( int allele = 0; allele < cfg.numAltAlleles; allele++, nameIndex+=2 ) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue