Updated AlleleFrequency* classes to calculate separate lods for VarVsRef and BestVsNextBest mixture (qstar) theories; AFWMetrics now reports single sample performance w.r.t. Hapmap chip using the appropriate lod for gentoyping (BestVsNextBest) or variant / reference calling (VarVsRef).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-03-26 02:10:18 +00:00
parent c88a17dfee
commit 0331cd8e95
3 changed files with 75 additions and 85 deletions

View File

@ -61,9 +61,9 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
}
}
if (Math.abs(alleleFreq.LOD) >= LOD_cutoff) { num_loci_confident += 1; }
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
if (alleleFreq.qstar > 0.0 && alleleFreq.LOD >= LOD_cutoff)
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
{
// Confident variant.
@ -75,20 +75,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
}
}
// Convert qstar and LOD to give best qstar and a positive LOD for that qstar
double qstar;
double LOD;
if (alleleFreq.LOD < 0) {
qstar = 0.0;
LOD = -1 * alleleFreq.LOD;
}else{
qstar = alleleFreq.qstar;
LOD = alleleFreq.LOD;
}
//long confident_calls_at_hapmap_chip_sites = 0;
if (LOD >= LOD_cutoff && has_hapmap_chip_genotype) {
//confident_calls_at_hapmap_chip_sites++;
if (has_hapmap_chip_genotype) {
// convert hapmap call to mixture of ref/nonref
String hapmap_genotype = hapmap_chip_genotype.getFeature();
@ -107,43 +94,39 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
}
// Hapmap debug info
System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, qstar, LOD);
System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
String called_genotype = alleleFreq.asString();
System.out.format("%s %s %c %c %.2f", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt, alleleFreq.LOD);
System.out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
if (hapmap_q != -1 && hapmap_q == qstar) {
hapmap_genotype_correct++;
System.out.print("\n");
}else{
hapmap_genotype_incorrect++;
System.out.printf(" INCORRECT GENOTYPE Bases: %s\n", AlleleFrequencyWalker.getBases(context));
AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
hapmap_genotype_correct++;
}else{
hapmap_genotype_incorrect++;
System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
}
// Now calculate ref / var performance - did we correctly classify the site as
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q != 0.0;
boolean called_var = qstar != 0.0;
if (hapmap_var != called_var) {
hapmap_refvar_incorrect++;
}else{
hapmap_refvar_correct++;
if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
// Now calculate ref / var performance - did we correctly classify the site as
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q != 0.0;
boolean called_var = alleleFreq.qstar != 0.0;
if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_incorrect++;
}else{
hapmap_refvar_correct++;
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
}
}
System.out.print("\n");
}
/*String hapmap_genotype = hapmap_chip_genotype.getFeature();
String called_genotype = alleleFreq.asString();
System.out.format("HAPMAP %s %s %.2f\n", hapmap_genotype, called_genotype, alleleFreq.LOD);
// There is a hapmap site here, is it correct?
if (hapmap_genotype.equals(called_genotype))
{
hapmap_tp++;
} else {
hapmap_fp++;
} */
return alleleFreq;
}
@ -190,7 +173,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
{
// Print RESULT data for confident calls
//if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); }
//if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
System.out.print(alleleFreq.asTabularString());
if (this.num_loci_total % 1000 == 0) { printMetrics(); }

View File

@ -8,6 +8,7 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Arrays;
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> {
@ -159,13 +160,14 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
double qstep = 0.001;
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
double best_qstar = Math.log10(0);
double best_qhat = Math.log10(0);
double best_posterior = Math.log10(0);
double best_pDq = Math.log10(0);
double best_pqG = Math.log10(0);
double best_pG = Math.log10(0);
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
// Fill 1..N ML positions with 0 valued MLs
for (int i=1; i<=N; i++) bestMixtures[i] = new MixtureLikelihood();
// Calculate null hypothesis for qstar = 0, qhat = 0
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
// Put null hypothesis into ML[0] because that is our prob of that theory and we don't loop over it below
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
// hypothetic allele balance that we sample over
for (double q=0.0; q <= qend; q += qstep)
@ -180,30 +182,36 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
double pG = P_G(N, qstar_N);
double posterior = pDq + pqG + pG;
if (posterior > best_posterior)
{
best_qstar = qstar;
best_qhat = q;
best_posterior = posterior;
if (posterior > bestMixtures[qstar_N].posterior)
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
best_pDq = pDq;
best_pqG = pqG;
best_pG = pG;
}
//System.out.format("%.2f %.2f %5.2f %5.2f %5.2f %5.2f\n", q, qstar, pDq, pqG, pG, posterior);
}
}
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
double LOD = best_posterior - posterior_null_hyp;
// First reverset sort NONREF mixtures according to highest posterior probabililty
Arrays.sort(bestMixtures, 1, N+1);
// Calculate Lod of any variant call versus the reference call
// Answers how confident are we in the best variant (nonref) mixture versus the null hypothesis
// reference mixture - qhat = qstar = 0.0
double lodVarVsRef = bestMixtures[1].posterior - posterior_null_hyp;
// Now reverse sort ALL mixtures according to highest posterior probability
Arrays.sort(bestMixtures);
// Calculate Lod of the mixture versus other possible
// Answers how confident are we in the best mixture versus the next best mixture
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
num2nuc[refnum],
num2nuc[altnum],
N,
best_qhat,
best_qstar,
LOD,
bestMixtures[0].qhat,
bestMixtures[0].qstar,
lodVarVsRef,
lodBestVsNextBest,
depth);
return alleleFreq;
}
@ -213,6 +221,12 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
public double qstar;
public double qhat;
MixtureLikelihood() {
this.posterior = Math.log10(0);
this.qstar = Math.log10(0);
this.qhat = Math.log10(0);
}
MixtureLikelihood(double posterior, double qstar, double qhat) {
this.posterior = posterior;
this.qstar = qstar;
@ -317,7 +331,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
public Integer reduce(AlleleFrequencyEstimate alleleFreq, Integer sum)
{
// Print RESULT data for confident calls
if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); }
if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
return 0;
}
@ -393,7 +407,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null"));
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
}
{
@ -510,7 +524,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
w.N = 10;
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null"));
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
}
}

View File

@ -10,21 +10,12 @@ public class AlleleFrequencyEstimate {
public int N;
public double qhat;
public double qstar;
public double LOD;
public double lodVsRef;
public double lodVsNextBest;
public int depth;
public String notes;
public double getQstar()
{
return qstar;
}
public double getLOD()
{
return LOD;
}
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double LOD, int depth)
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
{
this.location = location;
this.ref = ref;
@ -32,19 +23,21 @@ public class AlleleFrequencyEstimate {
this.N = N;
this.qhat = qhat;
this.qstar = qstar;
this.LOD = LOD;
this.lodVsRef = lodVsRef;
this.lodVsNextBest = lodVsNextBest;
this.depth = depth;
this.notes = "";
}
public String asTabularString() {
return String.format("RESULT %s %c %c %f %f %f %d %s\n",
return String.format("RESULT %s %c %c %f %f %f %f %d %s\n",
location,
ref,
alt,
qhat,
qstar,
LOD,
lodVsRef,
lodVsNextBest,
depth,
notes);
}