search over q's for finding argmax[q] p(D|q)

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@412 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-04-14 19:15:45 +00:00
parent 96248cdec4
commit d28e9f9b98
3 changed files with 38 additions and 17 deletions

View File

@ -304,9 +304,9 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
{
double q = ML_q(bases, quals, refnum, altnum);
double q = ML_q_byEM(bases, quals, refnum, altnum);
double pDq = P_D_q(bases, quals, q, refnum, altnum);
long q_R = Math.round(q*bases.length);
@ -399,6 +399,22 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return alt_count / (alt_count + ref_count);
}
double ML_q_byEM(byte[] bases, double[][]quals, int refnum, int altnum)
{
double best_q = 0;
double best_likelihood = -1000000;
for (double q = 0.0; q <= 1.0; q += 0.001)
{
double likelihood = P_D_q(bases, quals, q, refnum, altnum);
if (likelihood >= best_likelihood)
{
best_likelihood = likelihood;
best_q = q;
}
}
return best_q;
}
double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum)
{
double p = 0.0;

View File

@ -24,11 +24,10 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
AlleleFrequencyWalker pooled_caller = null;
List<String> sample_names = null;
//@Argument public int DOWNSAMPLE;
public int DOWNSAMPLE = 4;
public int DOWNSAMPLE_NOISE = 3;
public boolean LOG_METRICS = true;
@Argument(required=false, shortName="downsample", defaultValue="4") public int DOWNSAMPLE;
@Argument(required=false, shortName="downsample_noise", defaultValue="3") public int DOWNSAMPLE_NOISE;
@Argument(required=false, shortName="log_metrics", defaultValue="true") public boolean LOG_METRICS;
private Random random;
public void initialize()
@ -70,9 +69,6 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
pooled_caller.N = sample_names.size() * 2;
pooled_caller.DOWNSAMPLE = 0;
pooled_caller.GFF_OUTPUT_FILE = "/dev/null";
//pooled_caller.LOG_METRICS = LOG_METRICS;
//pooled_caller.METRICS_OUTPUT_FILE = "metrics.out";
//pooled_caller.METRICS_INTERVAL = 1;
pooled_caller.initalize();
pooled_caller.reduceInit();
}
@ -135,10 +131,10 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
System.out.print("POOLED_CALL " + pooled_call.asTabularString());
// (this loop is the EM cycle)
EM_alt_freq = pooled_call.qhat;
EM_alt_freq = pooled_call.qstar; //pooled_call.qhat;
int num_iterations = 10;
double[] trajectory = new double[num_iterations + 1]; trajectory[0] = EM_alt_freq;
double[] likelihood_trajectory = new double[num_iterations + 1];
double[] likelihood_trajectory = new double[num_iterations + 1]; likelihood_trajectory[0] = pooled_call.pBest;
for (int iterations = 0; iterations < num_iterations; iterations++)
{
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
@ -157,9 +153,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
if ((deep_calls[i].lodVsNextBest >= 5.0) || (deep_calls[i].lodVsRef <= -5.0))
{
shallow_callers.get(i).setAlleleFrequencyPrior(EM_alt_freq);
shallow_calls[i] = shallow_callers.get(i).map(tracker, ref, filterLocusContext(context, sample_names.get(i), DOWNSAMPLE + (int)(random.nextGaussian() * DOWNSAMPLE_NOISE)));
shallow_calls[i] = shallow_callers.get(i).map(tracker, ref, downsampled_contexts[i]);
String deep_genotype = deep_calls[i].genotype();
String shallow_genotype = shallow_calls[i].genotype();
likelihood += shallow_calls[i].lodVsNextBest;
/*
System.out.printf("DBG: %f %f %f %f\n",
@ -179,6 +177,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
EM_alt_freq = EM_sum / EM_N;
shallow_calls_fraction_correct = correct_shallow_calls / total_shallow_calls;
trajectory[iterations+1] = EM_alt_freq;
likelihood_trajectory[iterations+1] = likelihood/(double)total_shallow_calls;
}
// 7. Compare to estimation from the pool.
@ -196,12 +195,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
correct_shallow_calls,
shallow_calls_fraction_correct,
EM_alt_freq);
System.out.print("TRAJECTORY ");
for (int i = 0; i < trajectory.length; i++)
for (int i = 0; i < likelihood_trajectory.length; i++)
{
System.out.printf("%f ", trajectory[i]);
System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
}
System.out.print("\n");
System.out.print("\n\n");
return null;
}

View File

@ -109,8 +109,15 @@ public class AlleleMetrics {
//String called_genotype = alleleFreq.asString();
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString());
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
System.out.printf("DBG %f %f %f %f\n",
hapmap_q,
alleleFreq.qhat,
alleleFreq.qstar,
alleleFreq.lodVsNextBest);
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
//if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {