fixing the previous fix
This commit is contained in:
parent
539e157ecd
commit
b008676878
|
|
@ -44,7 +44,6 @@ import java.util.List;
|
|||
public class GaussianMixtureModel {
|
||||
|
||||
protected final static Logger logger = Logger.getLogger(GaussianMixtureModel.class);
|
||||
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
|
||||
|
||||
private final ArrayList<MultivariateGaussian> gaussians;
|
||||
private final double shrinkage;
|
||||
|
|
@ -214,14 +213,7 @@ public class GaussianMixtureModel {
|
|||
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
|
||||
}
|
||||
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||
|
||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
||||
// Cap the values at an extremely negative value and spread them out randomly
|
||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
||||
}
|
||||
return lod;
|
||||
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||
}
|
||||
|
||||
// Used only to decide which covariate dimension is most divergent in order to report in the culprit info field annotation
|
||||
|
|
@ -235,14 +227,7 @@ public class GaussianMixtureModel {
|
|||
normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) );
|
||||
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) );
|
||||
}
|
||||
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||
|
||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
||||
// Cap the values at an extremely negative value and spread them out randomly
|
||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
||||
}
|
||||
return lod;
|
||||
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||
}
|
||||
|
||||
public double evaluateDatumMarginalized( final VariantDatum datum ) {
|
||||
|
|
@ -269,13 +254,6 @@ public class GaussianMixtureModel {
|
|||
}
|
||||
}
|
||||
}
|
||||
double lod = Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
|
||||
|
||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
||||
// Cap the values at an extremely negative value and spread them out randomly
|
||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
||||
}
|
||||
return lod;
|
||||
return Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
|
||||
}
|
||||
}
|
||||
|
|
@ -272,7 +272,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
|||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
|
||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||
//if( PERFORM_PROJECT_CONSENSUS ) {
|
||||
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||
//}
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -43,6 +44,7 @@ public class VariantRecalibratorEngine {
|
|||
/////////////////////////////
|
||||
|
||||
protected final static Logger logger = Logger.getLogger(VariantRecalibratorEngine.class);
|
||||
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
|
||||
|
||||
// the unified argument collection
|
||||
final private VariantRecalibratorArgumentCollection VRAC;
|
||||
|
|
@ -78,7 +80,12 @@ public class VariantRecalibratorEngine {
|
|||
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe.");
|
||||
}
|
||||
}
|
||||
datum.lod = ( evaluateContrastively ? (datum.prior + datum.lod - thisLod) : thisLod );
|
||||
|
||||
datum.lod = ( evaluateContrastively ?
|
||||
( Double.isInfinite(datum.lod) ? // positive model said negative infinity
|
||||
( MIN_ACCEPTABLE_LOD_SCORE + GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE ) // Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
||||
: datum.prior + datum.lod - thisLod) // contrastive evaluation: (prior + positive model - negative model)
|
||||
: thisLod ); // positive model only so set the lod and return
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -236,6 +236,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
|||
// 3.) Variant Quality Score Recalibration - Generate Recalibration table
|
||||
class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||
this.memoryLimit = 4
|
||||
this.nt = 2
|
||||
this.reference_sequence = t.reference
|
||||
this.intervalsString ++= List(t.intervals)
|
||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
||||
|
|
|
|||
Loading…
Reference in New Issue