Improvements to the VariantRecalibrator R plots

-- VariantRecalibrator now emits plots with denormlized values (original values) instead of their normalized (x - mu / sigma) which helps to understand the distribution of values that are good and bad
2013-04-11 10:52:59 -04:00 · 2013-04-11 10:52:59 -04:00 · 5a74a3190c
parent 564fe36d22
commit 5a74a3190c
2 changed files with 47 additions and 25 deletions
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java
@ -127,6 +127,22 @@ public class VariantDataManager {
        }
    }
    /**
     * Convert a normalized point to it's original annotation value
     *
     * norm = (orig - mu) / sigma
     * orig = norm * sigma + mu
     *
     * @param normalizedValue the normalized value of the ith annotation
     * @param annI the index of the annotation value
     * @return the denormalized value for the annotation
     */
    public double denormalizeDatum(final double normalizedValue, final int annI) {
        final double mu = meanVector[annI];
        final double sigma = varianceVector[annI];
        return normalizedValue * sigma + mu;
    }
    public void addTrainingSet( final TrainingSet trainingSet ) {
        trainingSets.add( trainingSet );
    }
--- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
+++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantRecalibrator.java
@ -435,14 +435,20 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
                stream.print("surface <- c(");
                for( final VariantDatum datum : fakeData ) {
-                    stream.print(String.format("%.3f, %.3f, %.3f, ", datum.annotations[iii], datum.annotations[jjj], Math.min(4.0, Math.max(-4.0, datum.lod))));
+                    stream.print(String.format("%.3f, %.3f, %.3f, ",
                            dataManager.denormalizeDatum(datum.annotations[iii], iii),
                            dataManager.denormalizeDatum(datum.annotations[jjj], jjj),
                            Math.min(4.0, Math.max(-4.0, datum.lod))));
                }
                stream.println("NA,NA,NA)");
                stream.println("s <- matrix(surface,ncol=3,byrow=T)");
                stream.print("data <- c(");
                for( final VariantDatum datum : randomData ) {
-                    stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,", datum.annotations[iii], datum.annotations[jjj], (datum.lod < lodCutoff ? -1.0 : 1.0),
+                    stream.print(String.format("%.3f, %.3f, %.3f, %d, %d,",
                            dataManager.denormalizeDatum(datum.annotations[iii], iii),
                            dataManager.denormalizeDatum(datum.annotations[jjj], jjj),
                            (datum.lod < lodCutoff ? -1.0 : 1.0),
                            (datum.atAntiTrainingSite ? -1 : (datum.atTrainingSite ? 1 : 0)), (datum.isKnown ? 1 : -1)));
                }
                stream.println("NA,NA,NA,NA,1)");