diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 0854b452c..268b94fba 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -27,9 +27,10 @@ public class VariantDataManager { public final ArrayList annotationKeys; private final ExpandingArrayList trainingSets; private final VariantRecalibratorArgumentCollection VRAC; - + private static boolean warnedUserMissingValue = false; protected final static Logger logger = Logger.getLogger(VariantDataManager.class); + public VariantDataManager( final List annotationKeys, final VariantRecalibratorArgumentCollection VRAC ) { this.data = null; this.annotationKeys = new ArrayList( annotationKeys ); @@ -214,6 +215,10 @@ public class VariantDataManager { if(annotationKey.equals("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } catch( final Exception e ) { value = Double.NaN; // The VQSR works with missing data now by marginalizing over the missing dimension when evaluating clusters. + if( !warnedUserMissingValue ) { + logger.warn("WARNING: Missing value detected for " + annotationKey + ". The VQSR will work with missing data by marginalizing over this dimension for this variant. This warning message is only shown once but there may be other annotations missing as well."); + warnedUserMissingValue = true; + } } } return value; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java index da92ab98b..a0fbc572d 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -1,6 +1,7 @@ package org.broadinstitute.sting.gatk.walkers.variantrecalibration; import org.apache.log4j.Logger; +import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.List; @@ -45,6 +46,13 @@ public class VariantRecalibratorEngine { logger.info("Evaluating full set of " + data.size() + " variants..."); for( final VariantDatum datum : data ) { final double thisLod = evaluateDatum( datum, model ); + if( Double.isNaN(thisLod) ) { + if( evaluateContrastively ) { + throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)"); + } else { + throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe."); + } + } datum.lod = ( evaluateContrastively ? (datum.prior + datum.lod - thisLod) : thisLod ); } } @@ -62,7 +70,7 @@ public class VariantRecalibratorEngine { model.expectationStep( data ); double currentChangeInMixtureCoefficients; int iteration = 0; - logger.info("Finished iteration " + iteration ); + logger.info("Finished iteration " + iteration + "."); while( iteration < VRAC.MAX_ITERATIONS ) { iteration++; model.maximizationStep( data );