From 17e17d3c3cf8cfa6fdaff338c557875b6fd14708 Mon Sep 17 00:00:00 2001 From: rpoplin Date: Thu, 9 Jun 2011 18:37:37 +0000 Subject: [PATCH] Misc cleanup in VQSR. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5972 348d0f76-0448-11de-a6fe-93d51630548a --- .../ApplyRecalibration.java | 2 +- .../MultivariateGaussian.java | 2 +- .../VariantDataManager.java | 10 ++++---- .../VariantRecalibrator.java | 23 ++++++++++++++----- ...ntRecalibrationWalkersIntegrationTest.java | 2 +- 5 files changed, 25 insertions(+), 14 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java index 3e4637b7f..6b7c8ed43 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/ApplyRecalibration.java @@ -123,7 +123,7 @@ public class ApplyRecalibration extends RodWalker { // setup the header fields final Set hInfo = new HashSet(); hInfo.addAll(VCFUtils.getHeaderFields(getToolkit(), inputNames)); - hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "log odds ratio of being a true variant versus being false under the trained gaussian mixture model")); + hInfo.add(new VCFInfoHeaderLine(VariantRecalibrator.VQS_LOD_KEY, 1, VCFHeaderLineType.Float, "Log odds ratio of being a true variant versus being false under the trained gaussian mixture model")); final TreeSet samples = new TreeSet(); samples.addAll(SampleUtils.getUniqueSamplesFromRods(getToolkit(), inputNames)); diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java index 87c614bda..0b2edfd10 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/MultivariateGaussian.java @@ -93,7 +93,7 @@ public class MultivariateGaussian { try { cachedSigmaInverse = sigma.inverse(); } catch( Exception e ) { - throw new UserException("Error during clustering. Most likely there are too few variants used during Gaussian mixture modeling."); + throw new UserException("Error during clustering. Most likely there are too few variants used during Gaussian mixture modeling. Please consider raising the number of variants used to train the negative model (via --percentBadVariants 0.05, for example) or lowering the maximum number of Gaussians to use in the model (via --maxGaussians 4, for example)."); } } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index 202742a5d..a3bc4d3df 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -144,7 +144,7 @@ public class VariantDataManager { int numAdded = 0; while( numAdded < numToAdd ) { final VariantDatum datum = data.get(index++); - if( !datum.failingSTDThreshold ) { + if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { trainingData.add( datum ); datum.usedForTraining = -1; numAdded++; @@ -209,15 +209,16 @@ public class VariantDataManager { double value; try { - if( annotationKey.equals("QUAL") ) { + if( annotationKey.equalsIgnoreCase("QUAL") ) { value = vc.getPhredScaledQual(); } else { value = Double.parseDouble( (String)vc.getAttribute( annotationKey ) ); if( Double.isInfinite(value) ) { value = Double.NaN; } - if( jitter && ( annotationKey.equalsIgnoreCase("HRUN") ) ) { // Integer valued annotations must be jittered a bit to work in this GMM + if( annotationKey.equalsIgnoreCase("InbreedingCoeff") && value > 0.01 ) { value = Double.NaN; } + if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } - if( annotationKey.equals("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } + if( annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } } } catch( final Exception e ) { @@ -226,7 +227,6 @@ public class VariantDataManager { logger.warn("WARNING: Missing value detected for " + annotationKey + ". The VQSR will work with missing data by marginalizing over this dimension for this variant. This warning message is only shown once so there may be other annotations missing as well."); warnedUserMissingValue = true; } - } return value; diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index 85336afa4..3c76b48f5 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -97,6 +97,9 @@ public class VariantRecalibrator extends RodWalker