From 781c0c33a42a4df39624e33da7eb300a580c61b3 Mon Sep 17 00:00:00 2001 From: Ryan Poplin Date: Sat, 2 Jul 2011 17:55:10 -0400 Subject: [PATCH] Use the worst X% of calls in addition to the bad training sites list. Don't include the already added calls in the calculation of X% --- .../walkers/variantrecalibration/VariantDataManager.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java index cd739f9fc..309d1b8fa 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/variantrecalibration/VariantDataManager.java @@ -151,13 +151,13 @@ public class VariantDataManager { int index = 0, numAdded = 0; while( numAdded < numToAdd ) { final VariantDatum datum = data.get(index++); - if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { + if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { datum.atAntiTrainingSite = true; trainingData.add( datum ); numAdded++; } } - logger.info( "Additionally training with worst " + (float) bottomPercentage * 100.0f + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." ); + logger.info( "Additionally training with worst " + String.format("%0.3f", (float) bottomPercentage * 100.0f) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." ); return trainingData; } @@ -243,6 +243,7 @@ public class VariantDataManager { datum.isKnown = false; datum.atTruthSite = false; datum.atTrainingSite = false; + datum.atAntiTrainingSite = false; datum.prior = 2.0; datum.consensusCount = 0;