Merge pull request #1098 from broadinstitute/vrr_rcm_is_active_calculateGenotypes
Faster implementation of the active state profile value calculation w…
This commit is contained in:
commit
f73871c865
|
|
@ -504,7 +504,7 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
* @return never {@code null}, an array with exactly <code>total-ploidy(vc) + 1</code> positions.
|
||||
*/
|
||||
protected final double[] getAlleleFrequencyPriors( final VariantContext vc, final int defaultPloidy, final GenotypeLikelihoodsCalculationModel.Model model ) {
|
||||
final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc,defaultPloidy);
|
||||
final int totalPloidy = GATKVariantContextUtils.totalPloidy(vc, defaultPloidy);
|
||||
switch (model) {
|
||||
case SNP:
|
||||
case GENERALPLOIDYSNP:
|
||||
|
|
@ -668,4 +668,50 @@ public abstract class GenotypingEngine<Config extends StandardCallerArgumentColl
|
|||
return MLEfrequencies;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates the active state profile value for a single sample.
|
||||
*
|
||||
* @param log10GenotypeLikelihoods the single sample genotype likelihoods.
|
||||
* @return log10 probability from 0 to -Infinity.
|
||||
*/
|
||||
public double calculateSingleSampleRefVsAnyActiveStateProfileValue(final double[] log10GenotypeLikelihoods) {
|
||||
if (log10GenotypeLikelihoods == null) {
|
||||
throw new IllegalArgumentException("the input likelihoods cannot be null");
|
||||
} else if (log10GenotypeLikelihoods.length != this.configuration.genotypeArgs.samplePloidy + 1) {
|
||||
throw new IllegalArgumentException("wrong likelihoods dimensions");
|
||||
} else {
|
||||
final double[] log10Priors = log10AlleleFrequencyPriorsSNPs.forTotalPloidy(this.configuration.genotypeArgs.samplePloidy);
|
||||
final double log10ACeq0Likelihood = log10GenotypeLikelihoods[0];
|
||||
final double log10ACeq0Prior = log10Priors[0];
|
||||
final double log10ACeq0Posterior = log10ACeq0Likelihood + log10ACeq0Prior;
|
||||
|
||||
// If the Maximum a-posteriori AC is 0 then the profile value must be 0.0 as per existing code; it does
|
||||
// not matter whether a AC > 0 is at all plausible.
|
||||
boolean mapACeq0 = true;
|
||||
for (int AC = 1; AC < log10Priors.length; AC++)
|
||||
if (log10Priors[AC] + log10GenotypeLikelihoods[AC] > log10ACeq0Posterior) {
|
||||
mapACeq0 = false;
|
||||
break;
|
||||
}
|
||||
if (mapACeq0)
|
||||
return 0.0;
|
||||
|
||||
//TODO bad way to calculate AC > 0 posterior that follows the current behaviour of ExactAFCalculator (StateTracker)
|
||||
//TODO this is the lousy part... this code just adds up lks and priors of AC != 0 before as if
|
||||
//TODO Sum(a_i * b_i) is equivalent to Sum(a_i) * Sum(b_i)
|
||||
//TODO This has to be changed not just here but also in the AFCalculators (StateTracker).
|
||||
final double log10ACgt0Likelihood = MathUtils.approximateLog10SumLog10(log10GenotypeLikelihoods, 1, log10GenotypeLikelihoods.length);
|
||||
final double log10ACgt0Prior = MathUtils.approximateLog10SumLog10(log10Priors, 1, log10Priors.length);
|
||||
final double log10ACgt0Posterior = log10ACgt0Likelihood + log10ACgt0Prior;
|
||||
final double log10PosteriorNormalizationConstant = MathUtils.approximateLog10SumLog10(log10ACeq0Posterior, log10ACgt0Posterior);
|
||||
//TODO End of lousy part.
|
||||
|
||||
final double normalizedLog10ACeq0Posterior = log10ACeq0Posterior - log10PosteriorNormalizationConstant;
|
||||
// This is another condition to return a 0.0 also present in AFCalculator code as well.
|
||||
if (normalizedLog10ACeq0Posterior >= QualityUtils.qualToErrorProbLog10(configuration.genotypeArgs.STANDARD_CONFIDENCE_FOR_EMITTING))
|
||||
return 0.0;
|
||||
|
||||
return 1.0 - Math.pow(10.0, normalizedLog10ACeq0Posterior);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -222,6 +222,7 @@ final class StateTracker {
|
|||
@Requires("allelesUsedInGenotyping != null")
|
||||
protected AFCalculationResult toAFCalculationResult(final double[] log10PriorsByAC) {
|
||||
final int [] subACOfMLE = Arrays.copyOf(alleleCountsOfMLE, allelesUsedInGenotyping.size() - 1);
|
||||
//TODO bad calculation of normalized log10 ACeq0 and ACgt0 likelihoods, priors and consequently posteriors calculated in AFCalculationResult constructor.
|
||||
final double[] log10Likelihoods = MathUtils.normalizeFromLog10(new double[]{getLog10LikelihoodOfAFzero(), getLog10LikelihoodOfAFNotZero()}, true);
|
||||
final double[] log10Priors = MathUtils.normalizeFromLog10(new double[]{log10PriorsByAC[0], MathUtils.log10sumLog10(log10PriorsByAC, 1)}, true);
|
||||
|
||||
|
|
|
|||
|
|
@ -811,9 +811,16 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
}
|
||||
|
||||
final List<Allele> alleles = Arrays.asList(FAKE_REF_ALLELE , FAKE_ALT_ALLELE);
|
||||
final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
final double isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb( vcOut.getPhredScaledQual() );
|
||||
final double isActiveProb;
|
||||
|
||||
if (genotypes.size() == 1) {
|
||||
// Faster implementation avoiding the costly and over complicated Exact AFCalculator machinery:
|
||||
// This is the case when doing GVCF output.
|
||||
isActiveProb = activeRegionEvaluationGenotyperEngine.calculateSingleSampleRefVsAnyActiveStateProfileValue(genotypes.get(0).getLikelihoods().getAsVector());
|
||||
} else {
|
||||
final VariantCallContext vcOut = activeRegionEvaluationGenotyperEngine.calculateGenotypes(new VariantContextBuilder("HCisActive!", context.getContig(), context.getLocation().getStart(), context.getLocation().getStop(), alleles).genotypes(genotypes).make(), GenotypeLikelihoodsCalculationModel.Model.SNP);
|
||||
isActiveProb = vcOut == null ? 0.0 : QualityUtils.qualToProb(vcOut.getPhredScaledQual());
|
||||
}
|
||||
return new ActivityProfileState( ref.getLocus(), isActiveProb, averageHQSoftClips.mean() > AVERAGE_HQ_SOFTCLIPS_HQ_BASES_THRESHOLD ? ActivityProfileState.Type.HIGH_QUALITY_SOFT_CLIPS : ActivityProfileState.Type.NONE, averageHQSoftClips.mean() );
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue