Small bug fix to the new UG (need to initialize the entire posteriors array) means that we also get identical results as old UG when calling with 60 samples in the pilot1 data. Now that I'm happier with UGv2, I've transitioned it to use the correct AF priors instead of the busted ones still in the old UG.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4379 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-09-29 14:24:50 +00:00
parent eee134baf2
commit 0d71dff928
2 changed files with 11 additions and 35 deletions

View File

@ -60,6 +60,7 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest(); int minAlleleFrequencyToTest = getMinAlleleFrequencyToTest();
int maxAlleleFrequencyToTest = AFMatrix.getSamples().size() * 2; int maxAlleleFrequencyToTest = AFMatrix.getSamples().size() * 2;
// for each minor allele frequency, calculate log10PofDgivenAFi // for each minor allele frequency, calculate log10PofDgivenAFi
for (int i = 1; i <= maxAlleleFrequencyToTest; i++) { for (int i = 1; i <= maxAlleleFrequencyToTest; i++) {
// add one more alternate allele // add one more alternate allele
@ -70,10 +71,8 @@ public class GridSearchAFEstimation extends AlleleFrequencyCalculationModel {
// an optimization to speed up the calculation: if we are beyond the local maximum such // an optimization to speed up the calculation: if we are beyond the local maximum such
// that subsequent likelihoods won't factor into the confidence score, just quit // that subsequent likelihoods won't factor into the confidence score, just quit
if ( i >= minAlleleFrequencyToTest && maxLikelihoodSeen - log10AlleleFrequencyPosteriors[i] > LOG10_OPTIMIZATION_EPSILON ) { if ( i >= minAlleleFrequencyToTest && maxLikelihoodSeen - log10AlleleFrequencyPosteriors[i] > LOG10_OPTIMIZATION_EPSILON )
UnifiedGenotyperEngine.ignoreAlleleFrequenciesAboveI(log10AlleleFrequencyPosteriors, i);
return; return;
}
if ( log10AlleleFrequencyPosteriors[i] > maxLikelihoodSeen ) if ( log10AlleleFrequencyPosteriors[i] > maxLikelihoodSeen )
maxLikelihoodSeen = log10AlleleFrequencyPosteriors[i]; maxLikelihoodSeen = log10AlleleFrequencyPosteriors[i];

View File

@ -146,8 +146,8 @@ public class UnifiedGenotyperEngine {
// TODO: get rid of this optimization, it is wrong! // TODO: get rid of this optimization, it is wrong!
afcm.get().setMinAlleleFrequencyToTest(0); afcm.get().setMinAlleleFrequencyToTest(0);
// zero out the AFs above the N for this position // 'zero' out the AFs (so that we don't have to worry if not all samples have reads at this position)
ignoreAlleleFrequenciesAboveI(log10AlleleFrequencyPosteriors.get(), 2 * GLs.size()); clearAFarray(log10AlleleFrequencyPosteriors.get());
afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get()); afcm.get().getLog10PNonRef(tracker, refContext, GLs, log10AlleleFrequencyPriors, log10AlleleFrequencyPosteriors.get());
// find the most likely frequency // find the most likely frequency
@ -297,13 +297,9 @@ public class UnifiedGenotyperEngine {
return stratifiedContexts; return stratifiedContexts;
} }
/** protected static void clearAFarray(double[] AFs) {
* @param AFs AF array for ( int i = 0; i < AFs.length; i++ )
* @param freqI allele frequency I AFs[i] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
*/
protected static void ignoreAlleleFrequenciesAboveI(double[] AFs, int freqI) {
while ( ++freqI < AFs.length )
AFs[freqI] = AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED;
} }
private VariantCallContext estimateReferenceConfidence(Map<String, StratifiedAlignmentContext> contexts, double theta, boolean ignoreCoveredSamples) { private VariantCallContext estimateReferenceConfidence(Map<String, StratifiedAlignmentContext> contexts, double theta, boolean ignoreCoveredSamples) {
@ -342,7 +338,10 @@ public class UnifiedGenotyperEngine {
AFline.append(i + "/" + N + "\t"); AFline.append(i + "/" + N + "\t");
AFline.append(String.format("%.2f\t", ((float)i)/N)); AFline.append(String.format("%.2f\t", ((float)i)/N));
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPriors[i])); AFline.append(String.format("%.8f\t", log10AlleleFrequencyPriors[i]));
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i])); if ( log10AlleleFrequencyPosteriors.get()[i] == AlleleFrequencyCalculationModel.VALUE_NOT_CALCULATED)
AFline.append("0.0\t");
else
AFline.append(String.format("%.8f\t", log10AlleleFrequencyPosteriors.get()[i]));
AFline.append(String.format("%.8f\t", normalizedPosteriors[i])); AFline.append(String.format("%.8f\t", normalizedPosteriors[i]));
verboseWriter.println(AFline.toString()); verboseWriter.println(AFline.toString());
} }
@ -394,28 +393,6 @@ public class UnifiedGenotyperEngine {
} }
protected void computeAlleleFrequencyPriors(int N) { protected void computeAlleleFrequencyPriors(int N) {
// calculate sum(1/i)
double sigma_1_over_I = 0.0;
for (int i = 1; i <= N; i++)
sigma_1_over_I += 1.0 / (double)i;
// delta = theta / sum(1/i)
double delta = UAC.heterozygosity / sigma_1_over_I;
// calculate the null allele frequencies for 1-N
double sum = 0.0;
for (int i = 1; i <= N; i++) {
double value = delta / (double)i;
log10AlleleFrequencyPriors[i] = Math.log10(value);
sum += value;
}
// null frequency for AF=0 is (1 - sum(all other frequencies))
log10AlleleFrequencyPriors[0] = Math.log10(1.0 - sum);
}
// TODO: enable me
protected void computeAlleleFrequencyPriorsCorrect(int N) {
// calculate the allele frequency priors for 1-N // calculate the allele frequency priors for 1-N
double sum = 0.0; double sum = 0.0;
for (int i = 1; i <= N; i++) { for (int i = 1; i <= N; i++) {