diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java index 3fa9c3883..82776ca2e 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/GaussianMixtureModel.java @@ -52,6 +52,7 @@ public class GaussianMixtureModel { private final double[] empiricalMu; private final Matrix empiricalSigma; public boolean isModelReadyForEvaluation; + public boolean failedToConverge = false; public GaussianMixtureModel( final int numGaussians, final int numAnnotations, final double shrinkage, final double dirichletParameter, final double priorCounts ) { diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index f60a94a22..20b000978 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -309,8 +309,23 @@ public class VariantRecalibrator extends RodWalker negativeTrainingData = dataManager.selectWorstVariants( VRAC.PERCENT_BAD_VARIANTS, VRAC.MIN_NUM_BAD_VARIANTS ); + GaussianMixtureModel badModel = engine.generateModel( negativeTrainingData ); engine.evaluateData( dataManager.getData(), badModel, true ); + + // Detect if the negative model failed to converge because of too few points and/or too many Gaussians and try again + while( badModel.failedToConverge && VRAC.MAX_GAUSSIANS > 4 ) { + logger.info("Negative model failed to converge. Retrying..."); + VRAC.MAX_GAUSSIANS--; + badModel = engine.generateModel( negativeTrainingData ); + engine.evaluateData( dataManager.getData(), goodModel, false ); + engine.evaluateData( dataManager.getData(), badModel, true ); + } + + if( badModel.failedToConverge || goodModel.failedToConverge ) { + throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)"); + } + engine.calculateWorstPerformingAnnotation( dataManager.getData(), goodModel, badModel ); // Find the VQSLOD cutoff values which correspond to the various tranches of calls requested by the user diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index adfb38a25..6d2ac643b 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -67,14 +67,20 @@ public class VariantRecalibratorEngine { public void evaluateData( final List data, final GaussianMixtureModel model, final boolean evaluateContrastively ) { if( !model.isModelReadyForEvaluation ) { - model.precomputeDenominatorForEvaluation(); + try { + model.precomputeDenominatorForEvaluation(); + } catch( Exception e ) { + model.failedToConverge = true; + return; + } } logger.info("Evaluating full set of " + data.size() + " variants..."); for( final VariantDatum datum : data ) { final double thisLod = evaluateDatum( datum, model ); if( Double.isNaN(thisLod) ) { - throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)"); + model.failedToConverge = true; + return; } datum.lod = ( evaluateContrastively ?