fixing the previous fix
This commit is contained in:
parent
539e157ecd
commit
b008676878
|
|
@ -44,7 +44,6 @@ import java.util.List;
|
||||||
public class GaussianMixtureModel {
|
public class GaussianMixtureModel {
|
||||||
|
|
||||||
protected final static Logger logger = Logger.getLogger(GaussianMixtureModel.class);
|
protected final static Logger logger = Logger.getLogger(GaussianMixtureModel.class);
|
||||||
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
|
|
||||||
|
|
||||||
private final ArrayList<MultivariateGaussian> gaussians;
|
private final ArrayList<MultivariateGaussian> gaussians;
|
||||||
private final double shrinkage;
|
private final double shrinkage;
|
||||||
|
|
@ -214,14 +213,7 @@ public class GaussianMixtureModel {
|
||||||
for( final MultivariateGaussian gaussian : gaussians ) {
|
for( final MultivariateGaussian gaussian : gaussians ) {
|
||||||
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
|
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + gaussian.evaluateDatumLog10( datum );
|
||||||
}
|
}
|
||||||
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||||
|
|
||||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
|
||||||
// Cap the values at an extremely negative value and spread them out randomly
|
|
||||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
|
||||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
|
||||||
}
|
|
||||||
return lod;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Used only to decide which covariate dimension is most divergent in order to report in the culprit info field annotation
|
// Used only to decide which covariate dimension is most divergent in order to report in the culprit info field annotation
|
||||||
|
|
@ -235,14 +227,7 @@ public class GaussianMixtureModel {
|
||||||
normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) );
|
normal.setState( gaussian.mu[iii], gaussian.sigma.get(iii, iii) );
|
||||||
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) );
|
pVarInGaussianLog10[gaussianIndex++] = gaussian.pMixtureLog10 + Math.log10( normal.pdf( datum.annotations[iii] ) );
|
||||||
}
|
}
|
||||||
double lod = MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
return MathUtils.log10sumLog10(pVarInGaussianLog10); // Sum(pi_k * p(v|n,k))
|
||||||
|
|
||||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
|
||||||
// Cap the values at an extremely negative value and spread them out randomly
|
|
||||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
|
||||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
|
||||||
}
|
|
||||||
return lod;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public double evaluateDatumMarginalized( final VariantDatum datum ) {
|
public double evaluateDatumMarginalized( final VariantDatum datum ) {
|
||||||
|
|
@ -269,13 +254,6 @@ public class GaussianMixtureModel {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
double lod = Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
|
return Math.log10( sumPVarInGaussian / ((double) numRandomDraws) );
|
||||||
|
|
||||||
// Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
|
||||||
// Cap the values at an extremely negative value and spread them out randomly
|
|
||||||
if( lod < MIN_ACCEPTABLE_LOD_SCORE ) {
|
|
||||||
lod = MIN_ACCEPTABLE_LOD_SCORE - GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE;
|
|
||||||
}
|
|
||||||
return lod;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -272,7 +272,7 @@ public class VariantRecalibrator extends RodWalker<ExpandingArrayList<VariantDat
|
||||||
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
// Loop through the training data sets and if they overlap this loci then update the prior and training status appropriately
|
||||||
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
|
dataManager.parseTrainingSets( tracker, context.getLocation(), vc, datum, TRUST_ALL_POLYMORPHIC, rodToPriorMap, training, truth, known, badSites, resource ); // BUGBUG: need to clean this up to be a class, not a list of all the rod bindings
|
||||||
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
double priorFactor = QualityUtils.qualToProb( datum.prior );
|
||||||
//if( PERFORM_PROJECT_CONSENSUS ) {
|
//if( PERFORM_PROJECT_CONSENSUS ) { // BUGBUG: need to resurrect this functionality?
|
||||||
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
// final double consensusPrior = QualityUtils.qualToProb( 1.0 + 5.0 * datum.consensusCount );
|
||||||
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
// priorFactor = 1.0 - ((1.0 - priorFactor) * (1.0 - consensusPrior));
|
||||||
//}
|
//}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
package org.broadinstitute.sting.gatk.walkers.variantrecalibration;
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
@ -43,6 +44,7 @@ public class VariantRecalibratorEngine {
|
||||||
/////////////////////////////
|
/////////////////////////////
|
||||||
|
|
||||||
protected final static Logger logger = Logger.getLogger(VariantRecalibratorEngine.class);
|
protected final static Logger logger = Logger.getLogger(VariantRecalibratorEngine.class);
|
||||||
|
public final static double MIN_ACCEPTABLE_LOD_SCORE = -20000.0;
|
||||||
|
|
||||||
// the unified argument collection
|
// the unified argument collection
|
||||||
final private VariantRecalibratorArgumentCollection VRAC;
|
final private VariantRecalibratorArgumentCollection VRAC;
|
||||||
|
|
@ -78,7 +80,12 @@ public class VariantRecalibratorEngine {
|
||||||
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe.");
|
throw new UserException("NaN LOD value assigned. Clustering with this few variants and these annotations is unsafe.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
datum.lod = ( evaluateContrastively ? (datum.prior + datum.lod - thisLod) : thisLod );
|
|
||||||
|
datum.lod = ( evaluateContrastively ?
|
||||||
|
( Double.isInfinite(datum.lod) ? // positive model said negative infinity
|
||||||
|
( MIN_ACCEPTABLE_LOD_SCORE + GenomeAnalysisEngine.getRandomGenerator().nextDouble() * MIN_ACCEPTABLE_LOD_SCORE ) // Negative infinity lod values are possible when covariates are extremely far away from their tight Gaussians
|
||||||
|
: datum.prior + datum.lod - thisLod) // contrastive evaluation: (prior + positive model - negative model)
|
||||||
|
: thisLod ); // positive model only so set the lod and return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -236,6 +236,7 @@ class MethodsDevelopmentCallingPipeline extends QScript {
|
||||||
// 3.) Variant Quality Score Recalibration - Generate Recalibration table
|
// 3.) Variant Quality Score Recalibration - Generate Recalibration table
|
||||||
class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
class VQSR(t: Target, goldStandard: Boolean) extends VariantRecalibrator with UNIVERSAL_GATK_ARGS {
|
||||||
this.memoryLimit = 4
|
this.memoryLimit = 4
|
||||||
|
this.nt = 2
|
||||||
this.reference_sequence = t.reference
|
this.reference_sequence = t.reference
|
||||||
this.intervalsString ++= List(t.intervals)
|
this.intervalsString ++= List(t.intervals)
|
||||||
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
this.input :+= ( if ( goldStandard ) { t.goldStandard_VCF } else { t.rawVCF } )
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue