From 5a74a3190c1a3637b5a9b21e55330affd8f73d4a Mon Sep 17 00:00:00 2001 From: Mark DePristo Date: Thu, 11 Apr 2013 10:52:59 -0400 Subject: [PATCH] Improvements to the VariantRecalibrator R plots -- VariantRecalibrator now emits plots with denormlized values (original values) instead of their normalized (x - mu / sigma) which helps to understand the distribution of values that are good and bad --- .../VariantDataManager.java | 62 ++++++++++++------- .../VariantRecalibrator.java | 10 ++- 2 files changed, 47 insertions(+), 25 deletions(-) diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index a830a801e..40032a886 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -127,30 +127,46 @@ public class VariantDataManager { } } - public void addTrainingSet( final TrainingSet trainingSet ) { - trainingSets.add( trainingSet ); - } + /** + * Convert a normalized point to it's original annotation value + * + * norm = (orig - mu) / sigma + * orig = norm * sigma + mu + * + * @param normalizedValue the normalized value of the ith annotation + * @param annI the index of the annotation value + * @return the denormalized value for the annotation + */ + public double denormalizeDatum(final double normalizedValue, final int annI) { + final double mu = meanVector[annI]; + final double sigma = varianceVector[annI]; + return normalizedValue * sigma + mu; + } - public boolean checkHasTrainingSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isTraining ) { return true; } - } - return false; - } + public void addTrainingSet( final TrainingSet trainingSet ) { + trainingSets.add( trainingSet ); + } - public boolean checkHasTruthSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isTruth ) { return true; } - } - return false; - } + public boolean checkHasTrainingSet() { + for( final TrainingSet trainingSet : trainingSets ) { + if( trainingSet.isTraining ) { return true; } + } + return false; + } - public boolean checkHasKnownSet() { - for( final TrainingSet trainingSet : trainingSets ) { - if( trainingSet.isKnown ) { return true; } - } - return false; - } + public boolean checkHasTruthSet() { + for( final TrainingSet trainingSet : trainingSets ) { + if( trainingSet.isTruth ) { return true; } + } + return false; + } + + public boolean checkHasKnownSet() { + for( final TrainingSet trainingSet : trainingSets ) { + if( trainingSet.isKnown ) { return true; } + } + return false; + } public ExpandingArrayList getTrainingData() { final ExpandingArrayList trainingData = new ExpandingArrayList(); @@ -260,7 +276,7 @@ public class VariantDataManager { value = vc.getAttributeAsDouble( annotationKey, Double.NaN ); if( Double.isInfinite(value) ) { value = Double.NaN; } if( jitter && annotationKey.equalsIgnoreCase("HRUN") ) { // Integer valued annotations must be jittered a bit to work in this GMM - value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); + value += -0.25 + 0.5 * GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } if( jitter && annotationKey.equalsIgnoreCase("HaplotypeScore") && MathUtils.compareDoubles(value, 0.0, 0.0001) == 0 ) { value = -0.2 + 0.4*GenomeAnalysisEngine.getRandomGenerator().nextDouble(); } @@ -297,7 +313,7 @@ public class VariantDataManager { private boolean isValidVariant( final VariantContext evalVC, final VariantContext trainVC, final boolean TRUST_ALL_POLYMORPHIC) { return trainVC != null && trainVC.isNotFiltered() && trainVC.isVariant() && checkVariationClass( evalVC, trainVC ) && - (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); + (TRUST_ALL_POLYMORPHIC || !trainVC.hasGenotypes() || trainVC.isPolymorphicInSamples()); } protected static boolean checkVariationClass( final VariantContext evalVC, final VariantContext trainVC ) { diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java index b0970cce2..bee695e2a 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java @@ -435,14 +435,20 @@ public class VariantRecalibrator extends RodWalker