From f726d8130a076202cf2246a9fdb5123e0e91af30 Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 20 Jun 2013 15:15:17 -0400 Subject: [PATCH] VariantRecalibrator bugfix for bad log10sumlog10 values -- The VR, when the model is bad, may evaluate log10sumlog10 where some of the values in the vector are NaN. This case is now trapped in VR and handled as previously -- indicating that the model has failed and evaluation continues. --- .../GaussianMixtureModel.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 92b0d4df2..efc24d5f9 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -225,6 +225,20 @@ public class GaussianMixtureModel { isModelReadyForEvaluation = true; } + /** + * A version of Log10SumLog10 that tolerates NaN values in the array + * + * In the case where one or more of the values are NaN, this function returns NaN + * + * @param values a non-null vector of doubles + * @return log10 of the sum of the log10 values, or NaN + */ + private double nanTolerantLog10SumLog10(final double[] values) { + for ( final double value : values ) + if ( Double.isNaN(value) ) return Double.NaN; + return MathUtils.log10sumLog10(values); + } + public double evaluateDatum( final VariantDatum datum ) { for( final boolean isNull : datum.isNull ) { if( isNull ) { return evaluateDatumMarginalized( datum ); } @@ -235,7 +249,7 @@ public class GaussianMixtureModel { for( final MultivariateGaussian gaussian : gaussians ) { pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum ); } - return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) + return nanTolerantLog10SumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) } // Used only to decide which covariate dimension is most divergent in order to report in the culprit info field annotation @@ -247,7 +261,7 @@ public class GaussianMixtureModel { for( final MultivariateGaussian gaussian : gaussians ) { pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + MathUtils.normalDistributionLog10(gaussian.mu[iii], gaussian.sigma.get(iii, iii), datum.annotations[iii]); } - return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) + return nanTolerantLog10SumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k)) } public double evaluateDatumMarginalized( final VariantDatum datum ) {