Updated AlleleFrequency* classes to calculate separate lods for VarVsRef and BestVsNextBest mixture (qstar) theories; AFWMetrics now reports single sample performance w.r.t. Hapmap chip using the appropriate lod for gentoyping (BestVsNextBest) or variant / reference calling (VarVsRef).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-03-26 02:10:18 +00:00
parent c88a17dfee
commit 0331cd8e95
3 changed files with 75 additions and 85 deletions

View File

@ -61,9 +61,9 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
} }
} }
if (Math.abs(alleleFreq.LOD) >= LOD_cutoff) { num_loci_confident += 1; } if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
if (alleleFreq.qstar > 0.0 && alleleFreq.LOD >= LOD_cutoff) if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
{ {
// Confident variant. // Confident variant.
@ -75,20 +75,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
} }
} }
// Convert qstar and LOD to give best qstar and a positive LOD for that qstar if (has_hapmap_chip_genotype) {
double qstar;
double LOD;
if (alleleFreq.LOD < 0) {
qstar = 0.0;
LOD = -1 * alleleFreq.LOD;
}else{
qstar = alleleFreq.qstar;
LOD = alleleFreq.LOD;
}
//long confident_calls_at_hapmap_chip_sites = 0;
if (LOD >= LOD_cutoff && has_hapmap_chip_genotype) {
//confident_calls_at_hapmap_chip_sites++;
// convert hapmap call to mixture of ref/nonref // convert hapmap call to mixture of ref/nonref
String hapmap_genotype = hapmap_chip_genotype.getFeature(); String hapmap_genotype = hapmap_chip_genotype.getFeature();
@ -107,43 +94,39 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
} }
// Hapmap debug info // Hapmap debug info
System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, qstar, LOD); System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
String called_genotype = alleleFreq.asString(); String called_genotype = alleleFreq.asString();
System.out.format("%s %s %c %c %.2f", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt, alleleFreq.LOD); System.out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices? if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
if (hapmap_q != -1 && hapmap_q == qstar) {
hapmap_genotype_correct++; // Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
System.out.print("\n"); if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
}else{ hapmap_genotype_correct++;
hapmap_genotype_incorrect++; }else{
System.out.printf(" INCORRECT GENOTYPE Bases: %s\n", AlleleFrequencyWalker.getBases(context)); hapmap_genotype_incorrect++;
AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context)); System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
} }
// Now calculate ref / var performance - did we correctly classify the site as if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q != 0.0; // Now calculate ref / var performance - did we correctly classify the site as
boolean called_var = qstar != 0.0; // reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
if (hapmap_var != called_var) { boolean hapmap_var = hapmap_q != 0.0;
hapmap_refvar_incorrect++; boolean called_var = alleleFreq.qstar != 0.0;
}else{ if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_correct++; hapmap_refvar_incorrect++;
}else{
hapmap_refvar_correct++;
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
}
} }
System.out.print("\n");
} }
/*String hapmap_genotype = hapmap_chip_genotype.getFeature();
String called_genotype = alleleFreq.asString();
System.out.format("HAPMAP %s %s %.2f\n", hapmap_genotype, called_genotype, alleleFreq.LOD);
// There is a hapmap site here, is it correct?
if (hapmap_genotype.equals(called_genotype))
{
hapmap_tp++;
} else {
hapmap_fp++;
} */
return alleleFreq; return alleleFreq;
} }
@ -190,7 +173,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
{ {
// Print RESULT data for confident calls // Print RESULT data for confident calls
//if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); } //if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
System.out.print(alleleFreq.asTabularString()); System.out.print(alleleFreq.asTabularString());
if (this.num_loci_total % 1000 == 0) { printMetrics(); } if (this.num_loci_total % 1000 == 0) { printMetrics(); }

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import net.sf.samtools.SAMRecord; import net.sf.samtools.SAMRecord;
import java.util.List; import java.util.List;
import java.util.Arrays;
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> { public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> {
@ -159,13 +160,14 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
double qstep = 0.001; double qstep = 0.001;
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
double best_qstar = Math.log10(0); // Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
double best_qhat = Math.log10(0); MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
double best_posterior = Math.log10(0); // Fill 1..N ML positions with 0 valued MLs
for (int i=1; i<=N; i++) bestMixtures[i] = new MixtureLikelihood();
double best_pDq = Math.log10(0); // Calculate null hypothesis for qstar = 0, qhat = 0
double best_pqG = Math.log10(0); double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
double best_pG = Math.log10(0); // Put null hypothesis into ML[0] because that is our prob of that theory and we don't loop over it below
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
// hypothetic allele balance that we sample over // hypothetic allele balance that we sample over
for (double q=0.0; q <= qend; q += qstep) for (double q=0.0; q <= qend; q += qstep)
@ -180,30 +182,36 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
double pG = P_G(N, qstar_N); double pG = P_G(N, qstar_N);
double posterior = pDq + pqG + pG; double posterior = pDq + pqG + pG;
if (posterior > best_posterior) if (posterior > bestMixtures[qstar_N].posterior)
{ bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
best_qstar = qstar;
best_qhat = q;
best_posterior = posterior;
best_pDq = pDq;
best_pqG = pqG;
best_pG = pG;
}
//System.out.format("%.2f %.2f %5.2f %5.2f %5.2f %5.2f\n", q, qstar, pDq, pqG, pG, posterior); //System.out.format("%.2f %.2f %5.2f %5.2f %5.2f %5.2f\n", q, qstar, pDq, pqG, pG, posterior);
} }
} }
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0); // First reverset sort NONREF mixtures according to highest posterior probabililty
double LOD = best_posterior - posterior_null_hyp; Arrays.sort(bestMixtures, 1, N+1);
// Calculate Lod of any variant call versus the reference call
// Answers how confident are we in the best variant (nonref) mixture versus the null hypothesis
// reference mixture - qhat = qstar = 0.0
double lodVarVsRef = bestMixtures[1].posterior - posterior_null_hyp;
// Now reverse sort ALL mixtures according to highest posterior probability
Arrays.sort(bestMixtures);
// Calculate Lod of the mixture versus other possible
// Answers how confident are we in the best mixture versus the next best mixture
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location, AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
num2nuc[refnum], num2nuc[refnum],
num2nuc[altnum], num2nuc[altnum],
N, N,
best_qhat, bestMixtures[0].qhat,
best_qstar, bestMixtures[0].qstar,
LOD, lodVarVsRef,
lodBestVsNextBest,
depth); depth);
return alleleFreq; return alleleFreq;
} }
@ -213,6 +221,12 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
public double qstar; public double qstar;
public double qhat; public double qhat;
MixtureLikelihood() {
this.posterior = Math.log10(0);
this.qstar = Math.log10(0);
this.qhat = Math.log10(0);
}
MixtureLikelihood(double posterior, double qstar, double qhat) { MixtureLikelihood(double posterior, double qstar, double qhat) {
this.posterior = posterior; this.posterior = posterior;
this.qstar = qstar; this.qstar = qstar;
@ -317,7 +331,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
public Integer reduce(AlleleFrequencyEstimate alleleFreq, Integer sum) public Integer reduce(AlleleFrequencyEstimate alleleFreq, Integer sum)
{ {
// Print RESULT data for confident calls // Print RESULT data for confident calls
if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); } if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
return 0; return 0;
} }
@ -393,7 +407,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
AlleleFrequencyWalker w = new AlleleFrequencyWalker(); AlleleFrequencyWalker w = new AlleleFrequencyWalker();
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20); AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n", System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null")); "null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
} }
{ {
@ -510,7 +524,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
w.N = 10; w.N = 10;
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20); AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n", System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null")); "null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
} }
} }

View File

@ -10,21 +10,12 @@ public class AlleleFrequencyEstimate {
public int N; public int N;
public double qhat; public double qhat;
public double qstar; public double qstar;
public double LOD; public double lodVsRef;
public double lodVsNextBest;
public int depth; public int depth;
public String notes; public String notes;
public double getQstar() public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
{
return qstar;
}
public double getLOD()
{
return LOD;
}
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double LOD, int depth)
{ {
this.location = location; this.location = location;
this.ref = ref; this.ref = ref;
@ -32,19 +23,21 @@ public class AlleleFrequencyEstimate {
this.N = N; this.N = N;
this.qhat = qhat; this.qhat = qhat;
this.qstar = qstar; this.qstar = qstar;
this.LOD = LOD; this.lodVsRef = lodVsRef;
this.lodVsNextBest = lodVsNextBest;
this.depth = depth; this.depth = depth;
this.notes = ""; this.notes = "";
} }
public String asTabularString() { public String asTabularString() {
return String.format("RESULT %s %c %c %f %f %f %d %s\n", return String.format("RESULT %s %c %c %f %f %f %f %d %s\n",
location, location,
ref, ref,
alt, alt,
qhat, qhat,
qstar, qstar,
LOD, lodVsRef,
lodVsNextBest,
depth, depth,
notes); notes);
} }