Bug fix in variant optimizer for when there are more novel variants than known variants in the callset. Changing the magic numbers related to the starting sigma values for the gaussian clusters.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2952 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2010-03-08 13:02:08 +00:00
parent e4360bac6a
commit b42e0a398e
1 changed files with 24 additions and 15 deletions

View File

@ -74,21 +74,28 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
numSubset++; numSubset++;
} }
} }
final VariantDatum[] data = new VariantDatum[numSubset*2]; VariantDatum[] data;
int iii = 0;
for( final VariantDatum datum : dataManager.data ) { if( numSubset * 2 * 1.3 < dataManager.numVariants ) {
if( !datum.isKnown ) { data = new VariantDatum[numSubset*2];
data[iii++] = datum; int iii = 0;
for( final VariantDatum datum : dataManager.data ) {
if( !datum.isKnown ) {
data[iii++] = datum;
}
} }
} while( iii < numSubset*2 ) { // grab an equal number of known variants at random
while( iii < numSubset*2 ) { // grab an equal number of known variants at random final VariantDatum datum = dataManager.data[rand.nextInt(dataManager.numVariants)];
final VariantDatum datum = dataManager.data[rand.nextInt(dataManager.numVariants)]; if( datum.isKnown ) {
if( datum.isKnown ) { data[iii++] = datum;
data[iii++] = datum; }
} }
} else {
data = dataManager.data;
} }
System.out.println("Clustering with " + numSubset*2 + " variants..."); System.out.println("Clustering with " + data.length + " variants...");
if( data.length == dataManager.numVariants ) { System.out.println(" (used all variants since 2*numNovel is so large compared to the full set) "); }
createClusters( data ); // Using a subset of the data createClusters( data ); // Using a subset of the data
System.out.println("Printing out cluster parameters..."); System.out.println("Printing out cluster parameters...");
printClusters( outputPrefix ); printClusters( outputPrefix );
@ -124,7 +131,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
final double[] randSigma = new double[numAnnotations]; final double[] randSigma = new double[numAnnotations];
if( dataManager.isNormalized ) { if( dataManager.isNormalized ) {
for( int jjj = 0; jjj < numAnnotations; jjj++ ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
randSigma[jjj] = 0.9 + 0.2 * rand.nextDouble(); randSigma[jjj] = 0.75 + 0.4 * rand.nextDouble();
} }
} else { // BUGBUG: if not normalized then the varianceVector hasn't been calculated --> null pointer } else { // BUGBUG: if not normalized then the varianceVector hasn't been calculated --> null pointer
for( int jjj = 0; jjj < numAnnotations; jjj++ ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
@ -414,7 +421,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
// Find a place to put the example variant // Find a place to put the example variant
if( rrr == 0 ) { // Replace the big cluster that kicked this process off if( rrr == 0 ) { // Replace the big cluster that kicked this process off
mu[kkk] = data[randVarIndex].annotations; mu[kkk] = data[randVarIndex].annotations;
//sigma[kkk] = savedSigma;
pCluster[kkk] = 1.0 / ((double) numGaussians); pCluster[kkk] = 1.0 / ((double) numGaussians);
} else { // Replace the cluster with the minimum prob } else { // Replace the cluster with the minimum prob
double minProb = pCluster[0]; double minProb = pCluster[0];
@ -428,7 +434,10 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
mu[minClusterIndex] = data[randVarIndex].annotations; mu[minClusterIndex] = data[randVarIndex].annotations;
sigma[minClusterIndex] = savedSigma; sigma[minClusterIndex] = savedSigma;
for( int jjj = 0; jjj < numAnnotations; jjj++ ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
sigma[minClusterIndex][jjj] += -0.02 + 0.04 * rand.nextDouble(); sigma[minClusterIndex][jjj] += -0.06 + 0.12 * rand.nextDouble();
if( sigma[minClusterIndex][jjj] < MIN_SUM_PROB ) {
sigma[minClusterIndex][jjj] = MIN_SUM_PROB;
}
} }
pCluster[minClusterIndex] = 1.0 / ((double) numGaussians); pCluster[minClusterIndex] = 1.0 / ((double) numGaussians);
} }
@ -444,7 +453,7 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
final double[] randSigma = new double[numAnnotations]; final double[] randSigma = new double[numAnnotations];
if( dataManager.isNormalized ) { if( dataManager.isNormalized ) {
for( int jjj = 0; jjj < numAnnotations; jjj++ ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
randSigma[jjj] = 0.9 + 0.2 * rand.nextDouble(); randSigma[jjj] = 0.6 + 0.4 * rand.nextDouble(); // Explore a wider range
} }
} else { // BUGBUG: if not normalized then the varianceVector hasn't been calculated --> null pointer } else { // BUGBUG: if not normalized then the varianceVector hasn't been calculated --> null pointer
for( int jjj = 0; jjj < numAnnotations; jjj++ ) { for( int jjj = 0; jjj < numAnnotations; jjj++ ) {