search over q's for finding argmax[q] p(D|q)
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@412 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
96248cdec4
commit
d28e9f9b98
|
|
@ -304,9 +304,9 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
|
||||
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
|
||||
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
|
||||
|
||||
|
||||
{
|
||||
double q = ML_q(bases, quals, refnum, altnum);
|
||||
double q = ML_q_byEM(bases, quals, refnum, altnum);
|
||||
double pDq = P_D_q(bases, quals, q, refnum, altnum);
|
||||
long q_R = Math.round(q*bases.length);
|
||||
|
||||
|
|
@ -399,6 +399,22 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
|
|||
return alt_count / (alt_count + ref_count);
|
||||
}
|
||||
|
||||
double ML_q_byEM(byte[] bases, double[][]quals, int refnum, int altnum)
|
||||
{
|
||||
double best_q = 0;
|
||||
double best_likelihood = -1000000;
|
||||
for (double q = 0.0; q <= 1.0; q += 0.001)
|
||||
{
|
||||
double likelihood = P_D_q(bases, quals, q, refnum, altnum);
|
||||
if (likelihood >= best_likelihood)
|
||||
{
|
||||
best_likelihood = likelihood;
|
||||
best_q = q;
|
||||
}
|
||||
}
|
||||
return best_q;
|
||||
}
|
||||
|
||||
double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum)
|
||||
{
|
||||
double p = 0.0;
|
||||
|
|
|
|||
|
|
@ -24,11 +24,10 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
AlleleFrequencyWalker pooled_caller = null;
|
||||
List<String> sample_names = null;
|
||||
|
||||
//@Argument public int DOWNSAMPLE;
|
||||
public int DOWNSAMPLE = 4;
|
||||
public int DOWNSAMPLE_NOISE = 3;
|
||||
public boolean LOG_METRICS = true;
|
||||
|
||||
@Argument(required=false, shortName="downsample", defaultValue="4") public int DOWNSAMPLE;
|
||||
@Argument(required=false, shortName="downsample_noise", defaultValue="3") public int DOWNSAMPLE_NOISE;
|
||||
@Argument(required=false, shortName="log_metrics", defaultValue="true") public boolean LOG_METRICS;
|
||||
|
||||
private Random random;
|
||||
|
||||
public void initialize()
|
||||
|
|
@ -70,9 +69,6 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
pooled_caller.N = sample_names.size() * 2;
|
||||
pooled_caller.DOWNSAMPLE = 0;
|
||||
pooled_caller.GFF_OUTPUT_FILE = "/dev/null";
|
||||
//pooled_caller.LOG_METRICS = LOG_METRICS;
|
||||
//pooled_caller.METRICS_OUTPUT_FILE = "metrics.out";
|
||||
//pooled_caller.METRICS_INTERVAL = 1;
|
||||
pooled_caller.initalize();
|
||||
pooled_caller.reduceInit();
|
||||
}
|
||||
|
|
@ -135,10 +131,10 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
System.out.print("POOLED_CALL " + pooled_call.asTabularString());
|
||||
|
||||
// (this loop is the EM cycle)
|
||||
EM_alt_freq = pooled_call.qhat;
|
||||
EM_alt_freq = pooled_call.qstar; //pooled_call.qhat;
|
||||
int num_iterations = 10;
|
||||
double[] trajectory = new double[num_iterations + 1]; trajectory[0] = EM_alt_freq;
|
||||
double[] likelihood_trajectory = new double[num_iterations + 1];
|
||||
double[] likelihood_trajectory = new double[num_iterations + 1]; likelihood_trajectory[0] = pooled_call.pBest;
|
||||
for (int iterations = 0; iterations < num_iterations; iterations++)
|
||||
{
|
||||
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
|
||||
|
|
@ -157,9 +153,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
if ((deep_calls[i].lodVsNextBest >= 5.0) || (deep_calls[i].lodVsRef <= -5.0))
|
||||
{
|
||||
shallow_callers.get(i).setAlleleFrequencyPrior(EM_alt_freq);
|
||||
shallow_calls[i] = shallow_callers.get(i).map(tracker, ref, filterLocusContext(context, sample_names.get(i), DOWNSAMPLE + (int)(random.nextGaussian() * DOWNSAMPLE_NOISE)));
|
||||
shallow_calls[i] = shallow_callers.get(i).map(tracker, ref, downsampled_contexts[i]);
|
||||
String deep_genotype = deep_calls[i].genotype();
|
||||
String shallow_genotype = shallow_calls[i].genotype();
|
||||
|
||||
likelihood += shallow_calls[i].lodVsNextBest;
|
||||
|
||||
/*
|
||||
System.out.printf("DBG: %f %f %f %f\n",
|
||||
|
|
@ -179,6 +177,7 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
EM_alt_freq = EM_sum / EM_N;
|
||||
shallow_calls_fraction_correct = correct_shallow_calls / total_shallow_calls;
|
||||
trajectory[iterations+1] = EM_alt_freq;
|
||||
likelihood_trajectory[iterations+1] = likelihood/(double)total_shallow_calls;
|
||||
}
|
||||
|
||||
// 7. Compare to estimation from the pool.
|
||||
|
|
@ -196,12 +195,11 @@ public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate,
|
|||
correct_shallow_calls,
|
||||
shallow_calls_fraction_correct,
|
||||
EM_alt_freq);
|
||||
System.out.print("TRAJECTORY ");
|
||||
for (int i = 0; i < trajectory.length; i++)
|
||||
for (int i = 0; i < likelihood_trajectory.length; i++)
|
||||
{
|
||||
System.out.printf("%f ", trajectory[i]);
|
||||
System.out.printf("TRAJECTORY %f %f\n", trajectory[i], likelihood_trajectory[i]);
|
||||
}
|
||||
System.out.print("\n");
|
||||
System.out.print("\n\n");
|
||||
|
||||
return null;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -109,8 +109,15 @@ public class AlleleMetrics {
|
|||
//String called_genotype = alleleFreq.asString();
|
||||
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
|
||||
|
||||
System.out.printf("DBG %f %s\n", LOD_cutoff, alleleFreq.asTabularString());
|
||||
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
|
||||
|
||||
System.out.printf("DBG %f %f %f %f\n",
|
||||
hapmap_q,
|
||||
alleleFreq.qhat,
|
||||
alleleFreq.qstar,
|
||||
alleleFreq.lodVsNextBest);
|
||||
|
||||
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
|
||||
//if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
|
||||
if (Math.abs(hapmap_q - -1.0) > dbl_cmp_precision && Math.abs(hapmap_q - alleleFreq.qstar) <= dbl_cmp_precision) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue