Removing untrue comments in the GaussianMixtureModel

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5369 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
rpoplin 2011-03-03 18:18:47 +00:00
parent 9e12cd1312
commit f7ef35b8f5
1 changed files with 79 additions and 75 deletions

View File

@ -445,41 +445,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
return value;
}
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
final double[] log10pVarInCluster = new double[maxGaussians];
final double[] annotations = new double[dataManager.numAnnotations];
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false );
double z = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
// switch ( dataManager.getAnnotationType(jjj) ) {
// case TWO_TAILED: // z is already fine
// break;
// case SMALL_IS_GOOD:
// if ( z < 0.0 ) {
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
// z = 0.0;
// }
// break;
// case BIG_IS_GOOD:
// if ( z > 0.0 ) {
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
// z = 0.0;
// }
// break;
// default:
// throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
// }
annotations[jjj] = z;
}
return evaluateGaussiansForSingleVariant( annotations, log10pVarInCluster );
}
// ---------------------------------------------------------------------------------------------------------
//
// Code to determine FDR tranches for VariantDatum[]
@ -713,6 +678,85 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
// Code to evaluate vectors given an already trained model
//
// ---------------------------------------------------------------------------------------------------------
public final double evaluateVariant( GenomeLocParser genomeLocParser, final VariantContext vc ) {
final double[] log10pVarInCluster = new double[maxGaussians];
final double[] annotations = new double[dataManager.numAnnotations];
for( int jjj = 0; jjj < dataManager.numAnnotations; jjj++ ) {
final double value = decodeAnnotation( genomeLocParser, dataManager.annotationKeys.get(jjj), vc, false );
double z = (value - dataManager.meanVector[jjj]) / dataManager.varianceVector[jjj];
// switch ( dataManager.getAnnotationType(jjj) ) {
// case TWO_TAILED: // z is already fine
// break;
// case SMALL_IS_GOOD:
// if ( z < 0.0 ) {
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
// z = 0.0;
// }
// break;
// case BIG_IS_GOOD:
// if ( z > 0.0 ) {
//// logger.info(String.format("One-tailed test correction: %s %.2f mu=%.2f sigma=%.2f z=%.2f, newZ = 0",
//// dataManager.annotationKeys.get(jjj), value, dataManager.meanVector[jjj], dataManager.varianceVector[jjj], z));
// z = 0.0;
// }
// break;
// default:
// throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
// }
annotations[jjj] = z;
}
return evaluateGaussiansForSingleVariant( annotations, log10pVarInCluster );
}
private double evaluateGaussiansForSingleVariant( final double[] annotations, final double[] log10pVarInCluster ) {
final int numAnnotations = annotations.length;
for( int kkk = 0; kkk < maxGaussians; kkk++ ) {
final double sigmaVals[][] = sigmaInverse[kkk];
double sum = 0.0;
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
double value = 0.0;
for( int ppp = 0; ppp < numAnnotations; ppp++ ) {
final double myMu = mu[kkk][ppp];
double myAnn = annotations[ppp];
final double mySigma = sigmaVals[ppp][jjj];
double z = (myAnn - myMu) * mySigma;
switch ( dataManager.getAnnotationType(jjj) ) {
case TWO_TAILED: break;
case SMALL_IS_GOOD: if( myAnn < myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
case BIG_IS_GOOD: if( myAnn > myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
default: throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
}
value += z;
}
final double jNorm = annotations[jjj] - mu[kkk][jjj];
final double prod = value * jNorm;
sum += prod;
}
final double denomLog10 = CONSTANT_GAUSSIAN_DENOM_LOG10 + sqrtDeterminantLog10[kkk];
final double evalGaussianPDFLog10 = (( -0.5 * sum ) / Math.log(10.0)) - denomLog10;
//final double pVar1 = Math.pow(10.0, pClusterLog10[kkk] + evalGaussianPDFLog10);
final double pVar1 = pClusterLog10[kkk] + evalGaussianPDFLog10;
log10pVarInCluster[kkk] = pVar1;
}
return MathUtils.log10sumLog10(log10pVarInCluster);
}
// ---------------------------------------------------------------------------------------------------------
//
// Code to implement the steps of the VBEM algorithm
//
// ---------------------------------------------------------------------------------------------------------
private double evaluateGaussians( final VariantDatum[] data, final double[][] pVarInCluster, final int startCluster, final int stopCluster ) {
final int numAnnotations = data[0].annotations.length;
@ -797,46 +841,6 @@ public final class VariantGaussianMixtureModel extends VariantOptimizationModel
return likelihood / sumWeight;
}
private double evaluateGaussiansForSingleVariant( final double[] annotations, final double[] log10pVarInCluster ) {
final int numAnnotations = annotations.length;
for( int kkk = 0; kkk < maxGaussians; kkk++ ) {
final double sigmaVals[][] = sigmaInverse[kkk];
double sum = 0.0;
for( int jjj = 0; jjj < numAnnotations; jjj++ ) {
double value = 0.0;
for( int ppp = 0; ppp < numAnnotations; ppp++ ) {
final double myMu = mu[kkk][ppp];
double myAnn = annotations[ppp];
final double mySigma = sigmaVals[ppp][jjj];
double z = (myAnn - myMu) * mySigma;
switch ( dataManager.getAnnotationType(jjj) ) {
case TWO_TAILED: break;
case SMALL_IS_GOOD: if( myAnn < myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
case BIG_IS_GOOD: if( myAnn > myMu && Math.abs(z) < MAX_SIGMA_TO_BE_IN_GAUSSIAN ) { z = 0.0; } break;
default: throw new ReviewedStingException("Unexpected annotation tail type: " + dataManager.getAnnotationType(jjj));
}
value += z;
}
final double jNorm = annotations[jjj] - mu[kkk][jjj];
final double prod = value * jNorm;
sum += prod;
}
final double denomLog10 = CONSTANT_GAUSSIAN_DENOM_LOG10 + sqrtDeterminantLog10[kkk];
final double evalGaussianPDFLog10 = (( -0.5 * sum ) / Math.log(10.0)) - denomLog10;
//final double pVar1 = Math.pow(10.0, pClusterLog10[kkk] + evalGaussianPDFLog10);
final double pVar1 = pClusterLog10[kkk] + evalGaussianPDFLog10;
log10pVarInCluster[kkk] = pVar1;
}
return MathUtils.log10sumLog10(log10pVarInCluster);
}
private void maximizeGaussians( final VariantDatum[] data, final double[][] pVarInCluster, final int startCluster, final int stopCluster ) {
final int numVariants = data.length;