Use the worst X% of calls in addition to the bad training sites list. Don't include the already added calls in the calculation of X%

This commit is contained in:
Ryan Poplin 2011-07-02 17:55:10 -04:00
parent fdc2ebb321
commit 781c0c33a4
1 changed files with 3 additions and 2 deletions

View File

@ -151,13 +151,13 @@ public class VariantDataManager {
int index = 0, numAdded = 0; int index = 0, numAdded = 0;
while( numAdded < numToAdd ) { while( numAdded < numToAdd ) {
final VariantDatum datum = data.get(index++); final VariantDatum datum = data.get(index++);
if( !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) { if( !datum.atAntiTrainingSite && !datum.failingSTDThreshold && !Double.isInfinite(datum.lod) ) {
datum.atAntiTrainingSite = true; datum.atAntiTrainingSite = true;
trainingData.add( datum ); trainingData.add( datum );
numAdded++; numAdded++;
} }
} }
logger.info( "Additionally training with worst " + (float) bottomPercentage * 100.0f + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." ); logger.info( "Additionally training with worst " + String.format("%0.3f", (float) bottomPercentage * 100.0f) + "% of passing data --> " + (trainingData.size() - numBadSitesAdded) + " variants with LOD <= " + String.format("%.4f", data.get(index).lod) + "." );
return trainingData; return trainingData;
} }
@ -243,6 +243,7 @@ public class VariantDataManager {
datum.isKnown = false; datum.isKnown = false;
datum.atTruthSite = false; datum.atTruthSite = false;
datum.atTrainingSite = false; datum.atTrainingSite = false;
datum.atAntiTrainingSite = false;
datum.prior = 2.0; datum.prior = 2.0;
datum.consensusCount = 0; datum.consensusCount = 0;