From f5d133df8796f7c199596fc94c608814248d953f Mon Sep 17 00:00:00 2001 From: Samuel Friedman Date: Wed, 3 May 2017 18:37:52 -0400 Subject: [PATCH] scientific notation in model report and basic test --- .../GaussianMixtureModel.java | 5 +++ .../VariantDataManager.java | 2 +- .../VariantRecalibrator.java | 14 +++------ .../VariantRecalibratorEngine.java | 4 ++- ...ariantRecalibratorModelOutputUnitTest.java | 31 +++++++++++++++++++ 5 files changed, 45 insertions(+), 11 deletions(-) diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java index 2a9b2b5cc..37dc1ae43 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/GaussianMixtureModel.java @@ -110,6 +110,11 @@ public class GaussianMixtureModel { this.shrinkage = shrinkage; this.dirichletParameter = dirichletParameter; this.priorCounts = priorCounts; + for( final MultivariateGaussian gaussian : gaussians ) { + gaussian.hyperParameter_a = priorCounts; + gaussian.hyperParameter_b = shrinkage; + gaussian.hyperParameter_lambda = dirichletParameter; + } empiricalMu = new double[numAnnotations]; empiricalSigma = new Matrix(numAnnotations, numAnnotations); isModelReadyForEvaluation = false; diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java index b1b19433c..d4304d147 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantDataManager.java @@ -277,7 +277,7 @@ public class VariantDataManager { } } - logger.info( "Training with worst " + trainingData.size() + " scoring variants --> variants with LOD <= " + String.format("%.4f", VRAC.BAD_LOD_CUTOFF) + "." ); + logger.info( "Selected worst " + trainingData.size() + " scoring variants --> variants with LOD <= " + String.format("%.4f", VRAC.BAD_LOD_CUTOFF) + "." ); return trainingData; } diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java index 8e473b5d5..b4792665b 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibrator.java @@ -376,10 +376,7 @@ public class VariantRecalibrator extends RodWalker hInfo = new HashSet<>(); + final Set hInfo = new HashSet<>(); ApplyRecalibration.addVQSRStandardHeaderLines(hInfo); recalWriter.writeHeader( new VCFHeader(hInfo) ); @@ -513,12 +510,11 @@ public class VariantRecalibrator extends RodWalker positiveTrainingData = dataManager.getTrainingData(); final List negativeTrainingData; if (goodModel != null && badModel != null){ // GMMs were loaded from a file - // Keeping this to maintain reproducibility between runs with and without serialized GMMs + logger.info("Using serialized GMMs from file..."); engine.evaluateData(dataManager.getData(), goodModel, false); negativeTrainingData = dataManager.selectWorstVariants(); } else { // Generate the GMMs from scratch @@ -527,12 +523,12 @@ public class VariantRecalibrator extends RodWalker gaussianList = new ArrayList<>(); int curAnnotation = 0; @@ -665,7 +661,7 @@ public class VariantRecalibrator extends RodWalker annotationList) { - final String formatString = "%.8f"; + final String formatString = "%.8E"; final GATKReport report = new GATKReport(); if (dataManager != null) { //for unit test diff --git a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorEngine.java b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorEngine.java index f86099113..d8a8bb653 100644 --- a/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorEngine.java +++ b/protected/gatk-tools-protected/src/main/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorEngine.java @@ -98,6 +98,7 @@ public class VariantRecalibratorEngine { try { model.precomputeDenominatorForEvaluation(); } catch( Exception e ) { + logger.warn("Model could not pre-compute denominators."); model.failedToConverge = true; return; } @@ -107,6 +108,7 @@ public class VariantRecalibratorEngine { for( final VariantDatum datum : data ) { final double thisLod = evaluateDatum( datum, model ); if( Double.isNaN(thisLod) ) { + logger.warn("Evaluate datum returned a NaN."); model.failedToConverge = true; return; } @@ -142,7 +144,7 @@ public class VariantRecalibratorEngine { // Private Methods used for generating a GaussianMixtureModel ///////////////////////////// - private void variationalBayesExpectationMaximization( final GaussianMixtureModel model, final List data ) { + protected void variationalBayesExpectationMaximization(final GaussianMixtureModel model, final List data) { model.initializeRandomModel( data, VRAC.NUM_KMEANS_ITERATIONS ); diff --git a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorModelOutputUnitTest.java b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorModelOutputUnitTest.java index 56e515029..7b6f708a9 100644 --- a/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorModelOutputUnitTest.java +++ b/protected/gatk-tools-protected/src/test/java/org/broadinstitute/gatk/tools/walkers/variantrecalibration/VariantRecalibratorModelOutputUnitTest.java @@ -170,8 +170,20 @@ public class VariantRecalibratorModelOutputUnitTest { Assert.assertEquals(badGaussian1.sigma.get(i,j), (Double)badSigma.get(i,annotationList.get(j)), epsilon); } } + + // Now test model report reading + // Read the gaussian weighting tables + final GATKReportTable nPMixTable = report.getTable("BadGaussianPMix"); + final GATKReportTable pPMixTable = report.getTable("GoodGaussianPMix"); + + GaussianMixtureModel goodModelFromFile = vqsr.GMMFromTables(goodMus, goodSigma, pPMixTable, annotationList.size()); + GaussianMixtureModel badModelFromFile = vqsr.GMMFromTables(badMus, badSigma, nPMixTable, annotationList.size()); + + testGMMsForEquality(goodModel, goodModelFromFile, epsilon); + testGMMsForEquality(badModel, badModelFromFile, epsilon); } + @Test //This is tested separately to avoid setting up a VariantDataManager and populating it with fake data public void testAnnotationNormalizationOutput() { @@ -211,4 +223,23 @@ public class VariantRecalibratorModelOutputUnitTest { return returnString; } + private void testGMMsForEquality(GaussianMixtureModel gmm1, GaussianMixtureModel gmm2, double epsilon){ + Assert.assertEquals(gmm1.getModelGaussians().size(), gmm2.getModelGaussians().size(), 0); + + for(int k = 0; k < gmm1.getModelGaussians().size(); k++) { + final MultivariateGaussian g = gmm1.getModelGaussians().get(k); + final MultivariateGaussian gFile = gmm2.getModelGaussians().get(k); + + for(int i = 0; i < g.mu.length; i++){ + Assert.assertEquals(g.mu[i], gFile.mu[i], epsilon); + } + + for(int i = 0; i < g.sigma.getRowDimension(); i++) { + for (int j = 0; j < g.sigma.getColumnDimension(); j++) { + Assert.assertEquals(g.sigma.get(i, j), gFile.sigma.get(i, j), epsilon); + } + } + } + } + } \ No newline at end of file