Use the worst X% of calls in addition to the bad training sites list. Don't include the already added calls in the calculation of X%

This commit is contained in:
Ryan Poplin 2011-07-02 17:55:10 -04:00
parent fdc2ebb321
commit 781c0c33a4
1 changed files with 3 additions and 2 deletions

View File

@ -151,13 +151,13 @@ public class VariantDataManager {
int index = 0, numAdded = 0;
while( numAdded < numToAdd ) {
final VariantDatum datum = data.get(index++);
if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) {
if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) {
datum.atAntiTrainingSite = true;
trainingData.add( datum );
numAdded++;
}
}
logger.info( "Additionally training with worst " + (float) bottomPercentage * 100.0f + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." );
logger.info( "Additionally training with worst " + String.format("%0.3f", (float) bottomPercentage * 100.0f) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." );
return trainingData;
}
@ -243,6 +243,7 @@ public class VariantDataManager {
datum.isKnown = false;
datum.atTruthSite = false;
datum.atTrainingSite = false;
datum.atAntiTrainingSite = false;
datum.prior = 2.0;
datum.consensusCount = 0;