Fixed strand bias calculation because of -Infinity issues.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@2980 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-03-11 16:05:51 +00:00
parent dde9fd8a15
commit 4a05757a2a
3 changed files with 15 additions and 2 deletions

View File

@ -66,6 +66,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// first, calculate for AF=0 (no change to matrix) // first, calculate for AF=0 (no change to matrix)
log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency(); log10PofDgivenAFi[baseIndex][0] = matrix.getLikelihoodsOfFrequency();
double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0]; double maxLikelihoodSeen = log10PofDgivenAFi[baseIndex][0];
int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest();
// for each minor allele frequency, calculate log10PofDgivenAFi // for each minor allele frequency, calculate log10PofDgivenAFi
for (int i = 1; i <= numFrequencies; i++) { for (int i = 1; i <= numFrequencies; i++) {
@ -77,7 +78,7 @@ public class DiploidGenotypeCalculationModel extends JointEstimateGenotypeCalcul
// an optimization to speed up the calculation: if we are beyond the local maximum such // an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit // that subsequent likelihoods won't factor into the confidence score, just quit
if ( maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) { if ( i >= minAlleleFrequencyToTest && maxLikelihoodSeen - log10PofDgivenAFi[baseIndex][i] > LOG10_OPTIMIZATION_EPSILON ) {
ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex); ignoreAlleleFrequenciesAboveI(i, numFrequencies, baseIndex);
return; return;
} }

View File

@ -16,6 +16,7 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
// how much off from the max likelihoods do we need to be before we can quit calculating? // how much off from the max likelihoods do we need to be before we can quit calculating?
protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0; protected static final Double LOG10_OPTIMIZATION_EPSILON = 8.0;
protected static final Double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE; protected static final Double VALUE_NOT_CALCULATED = -1.0 * Double.MAX_VALUE;
private int minAlleleFrequencyToTest;
// because the null allele frequencies are constant for a given N, // because the null allele frequencies are constant for a given N,
// we cache the results to avoid having to recompute everything // we cache the results to avoid having to recompute everything
@ -47,6 +48,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
int numSamples = getNSamples(contexts); int numSamples = getNSamples(contexts);
int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero) int frequencyEstimationPoints = (2 * numSamples) + 1; // (add 1 for allele frequency of zero)
// reset the optimization value
minAlleleFrequencyToTest = 0;
// find the alternate allele with the largest sum of quality scores // find the alternate allele with the largest sum of quality scores
initializeBestAlternateAllele(ref, contexts); initializeBestAlternateAllele(ref, contexts);
@ -82,6 +86,10 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
return vcc; return vcc;
} }
protected int getMinAlleleFrequencyToTest() {
return minAlleleFrequencyToTest;
}
protected int getNSamples(Map<String, StratifiedAlignmentContext> contexts) { protected int getNSamples(Map<String, StratifiedAlignmentContext> contexts) {
return contexts.size(); return contexts.size();
} }
@ -376,6 +384,9 @@ public abstract class JointEstimateGenotypeCalculationModel extends GenotypeCalc
double overallLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess]; double overallLog10PofF = log10AlleleFrequencyPriors[bestAFguess] + log10PofDgivenAFi[indexOfMax][bestAFguess];
double lod = overallLog10PofF - overallLog10PofNull; double lod = overallLog10PofF - overallLog10PofNull;
// set the optimization value for the subsequent strand calculations
minAlleleFrequencyToTest = bestAFguess;
// the forward lod // the forward lod
initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); initialize(ref, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);
calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD); calculateAlleleFrequencyPosteriors(ref, frequencyEstimationPoints, contexts, StratifiedAlignmentContext.StratifiedContextType.FORWARD);

View File

@ -54,6 +54,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
int altIndex = BaseUtils.simpleBaseToBaseIndex(alt); int altIndex = BaseUtils.simpleBaseToBaseIndex(alt);
double maxLikelihoodSeen = -1.0 * Double.MAX_VALUE; double maxLikelihoodSeen = -1.0 * Double.MAX_VALUE;
int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest();
// for each minor allele frequency, calculate log10PofDgivenAFi // for each minor allele frequency, calculate log10PofDgivenAFi
for (int frequency = 0; frequency <= nChromosomes; frequency++) { for (int frequency = 0; frequency <= nChromosomes; frequency++) {
@ -72,7 +73,7 @@ public class PooledCalculationModel extends JointEstimateGenotypeCalculationMode
// an optimization to speed up the calculation: if we are beyond the local maximum such // an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit // that subsequent likelihoods won't factor into the confidence score, just quit
if ( frequency > 0 && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) { if ( frequency > minAlleleFrequencyToTest && maxLikelihoodSeen - log10PofDgivenAFi[altIndex][frequency] > LOG10_OPTIMIZATION_EPSILON ) {
ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex); ignoreAlleleFrequenciesAboveI(frequency, nChromosomes, altIndex);
return; return;
} }