fixing the previous fix

This commit is contained in:
Ryan Poplin 2011-08-20 21:21:55 -04:00
parent 539e157ecd
commit b008676878
4 changed files with 13 additions and 27 deletions

View File

@ -44,7 +44,6 @@ import java.util.List;
public class GaussianMixtureModel {
protected final static Logger logger = Logger.getLogger(GaussianMixtureModel.class);
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
private final ArrayList<MultivariateGaussian> gaussians;
private final double shrinkage;
@ -214,14 +213,7 @@ public class GaussianMixtureModel {
for( final MultivariateGaussian gaussian : gaussians ) {
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
}
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
// Cap the values at an extremely negative value and spread them out randomly
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
}
return lod;
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
}
// Used only to decide which covariate dimension is most divergent in order to report in the culprit info field annotation
@ -235,14 +227,7 @@ public class GaussianMixtureModel {
normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) );
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) );
}
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
// Cap the values at an extremely negative value and spread them out randomly
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
}
return lod;
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
}
public double evaluateDatumMarginalized( final VariantDatum datum ) {
@ -269,13 +254,6 @@ public class GaussianMixtureModel {
}
}
}
double lod = Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
// Cap the values at an extremely negative value and spread them out randomly
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
}
return lod;
return Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
}
}

View File

@ -272,7 +272,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
double priorFactor = QualityUtils.qualToProb( datum.prior );
//if( PERFORM_PROJECT_CONSENSUS ) {
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
//}

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.exceptions.UserException;
import java.util.List;
@ -43,6 +44,7 @@ public class VariantRecalibratorEngine {
/////////////////////////////
protected final static Logger logger = Logger.getLogger(VariantRecalibratorEngine.class);
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
// the unified argument collection
final private VariantRecalibratorArgumentCollection VRAC;
@ -78,7 +80,12 @@ public class VariantRecalibratorEngine {
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe.");
}
}
datum.lod = ( evaluateContrastively ? (datum.prior + datum.lod - thisLod) : thisLod );
datum.lod = ( evaluateContrastively ?
( Double.isInfinite(datum.lod) ? // positive model said negative infinity
( MIN_ACCEPTABLE_LOD_SCORE + GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE ) // Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
: datum.prior + datum.lod - thisLod) // contrastive evaluation: (prior + positive model - negative model)
: thisLod ); // positive model only so set the lod and return
}
}

View File

@ -236,6 +236,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
// 3.) Variant Quality Score Recalibration - Generate Recalibration table
class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
this.memoryLimit = 4
this.nt = 2
this.reference_sequence = t.reference
this.intervalsString ++= List(t.intervals)
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )