Updated AlleleFrequency* classes to calculate separate lods for VarVsRef and BestVsNextBest mixture (qstar) theories; AFWMetrics now reports single sample performance w.r.t. Hapmap chip using the appropriate lod for gentoyping (BestVsNextBest) or variant / reference calling (VarVsRef).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@196 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
c88a17dfee
commit
0331cd8e95
|
|
@ -61,9 +61,9 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
|
|||
}
|
||||
}
|
||||
|
||||
if (Math.abs(alleleFreq.LOD) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
|
||||
|
||||
if (alleleFreq.qstar > 0.0 && alleleFreq.LOD >= LOD_cutoff)
|
||||
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
|
||||
{
|
||||
// Confident variant.
|
||||
|
||||
|
|
@ -75,20 +75,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
|
|||
}
|
||||
}
|
||||
|
||||
// Convert qstar and LOD to give best qstar and a positive LOD for that qstar
|
||||
double qstar;
|
||||
double LOD;
|
||||
if (alleleFreq.LOD < 0) {
|
||||
qstar = 0.0;
|
||||
LOD = -1 * alleleFreq.LOD;
|
||||
}else{
|
||||
qstar = alleleFreq.qstar;
|
||||
LOD = alleleFreq.LOD;
|
||||
}
|
||||
|
||||
//long confident_calls_at_hapmap_chip_sites = 0;
|
||||
if (LOD >= LOD_cutoff && has_hapmap_chip_genotype) {
|
||||
//confident_calls_at_hapmap_chip_sites++;
|
||||
if (has_hapmap_chip_genotype) {
|
||||
|
||||
// convert hapmap call to mixture of ref/nonref
|
||||
String hapmap_genotype = hapmap_chip_genotype.getFeature();
|
||||
|
|
@ -107,43 +94,39 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
|
|||
}
|
||||
|
||||
// Hapmap debug info
|
||||
System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, qstar, LOD);
|
||||
System.out.format("HAPMAP %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
|
||||
String called_genotype = alleleFreq.asString();
|
||||
System.out.format("%s %s %c %c %.2f", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt, alleleFreq.LOD);
|
||||
System.out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
|
||||
|
||||
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
|
||||
if (hapmap_q != -1 && hapmap_q == qstar) {
|
||||
hapmap_genotype_correct++;
|
||||
System.out.print("\n");
|
||||
}else{
|
||||
hapmap_genotype_incorrect++;
|
||||
System.out.printf(" INCORRECT GENOTYPE Bases: %s\n", AlleleFrequencyWalker.getBases(context));
|
||||
AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
||||
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
|
||||
|
||||
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
|
||||
if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
|
||||
hapmap_genotype_correct++;
|
||||
}else{
|
||||
hapmap_genotype_incorrect++;
|
||||
System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
|
||||
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
|
||||
}
|
||||
}
|
||||
|
||||
// Now calculate ref / var performance - did we correctly classify the site as
|
||||
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
|
||||
boolean hapmap_var = hapmap_q != 0.0;
|
||||
boolean called_var = qstar != 0.0;
|
||||
if (hapmap_var != called_var) {
|
||||
hapmap_refvar_incorrect++;
|
||||
}else{
|
||||
hapmap_refvar_correct++;
|
||||
if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
|
||||
|
||||
// Now calculate ref / var performance - did we correctly classify the site as
|
||||
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
|
||||
boolean hapmap_var = hapmap_q != 0.0;
|
||||
boolean called_var = alleleFreq.qstar != 0.0;
|
||||
if (hapmap_q != -1 && hapmap_var != called_var) {
|
||||
hapmap_refvar_incorrect++;
|
||||
}else{
|
||||
hapmap_refvar_correct++;
|
||||
System.out.printf(" INCORRECT REFVAR CALL Bases: %s\n", AlleleFrequencyWalker.getBases(context));
|
||||
}
|
||||
}
|
||||
|
||||
System.out.print("\n");
|
||||
}
|
||||
|
||||
/*String hapmap_genotype = hapmap_chip_genotype.getFeature();
|
||||
String called_genotype = alleleFreq.asString();
|
||||
System.out.format("HAPMAP %s %s %.2f\n", hapmap_genotype, called_genotype, alleleFreq.LOD);
|
||||
// There is a hapmap site here, is it correct?
|
||||
if (hapmap_genotype.equals(called_genotype))
|
||||
{
|
||||
hapmap_tp++;
|
||||
} else {
|
||||
hapmap_fp++;
|
||||
} */
|
||||
|
||||
return alleleFreq;
|
||||
}
|
||||
|
||||
|
|
@ -190,7 +173,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker<AlleleFrequenc
|
|||
public String reduce(AlleleFrequencyEstimate alleleFreq, String sum)
|
||||
{
|
||||
// Print RESULT data for confident calls
|
||||
//if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); }
|
||||
//if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
|
||||
System.out.print(alleleFreq.asTabularString());
|
||||
|
||||
if (this.num_loci_total % 1000 == 0) { printMetrics(); }
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
|
|||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> {
|
||||
|
||||
|
|
@ -159,13 +160,14 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
double qstep = 0.001;
|
||||
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
|
||||
|
||||
double best_qstar = Math.log10(0);
|
||||
double best_qhat = Math.log10(0);
|
||||
double best_posterior = Math.log10(0);
|
||||
|
||||
double best_pDq = Math.log10(0);
|
||||
double best_pqG = Math.log10(0);
|
||||
double best_pG = Math.log10(0);
|
||||
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
|
||||
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
|
||||
// Fill 1..N ML positions with 0 valued MLs
|
||||
for (int i=1; i<=N; i++) bestMixtures[i] = new MixtureLikelihood();
|
||||
// Calculate null hypothesis for qstar = 0, qhat = 0
|
||||
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
|
||||
// Put null hypothesis into ML[0] because that is our prob of that theory and we don't loop over it below
|
||||
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
|
||||
|
||||
// hypothetic allele balance that we sample over
|
||||
for (double q=0.0; q <= qend; q += qstep)
|
||||
|
|
@ -180,30 +182,36 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
double pG = P_G(N, qstar_N);
|
||||
double posterior = pDq + pqG + pG;
|
||||
|
||||
if (posterior > best_posterior)
|
||||
{
|
||||
best_qstar = qstar;
|
||||
best_qhat = q;
|
||||
best_posterior = posterior;
|
||||
if (posterior > bestMixtures[qstar_N].posterior)
|
||||
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
|
||||
|
||||
best_pDq = pDq;
|
||||
best_pqG = pqG;
|
||||
best_pG = pG;
|
||||
}
|
||||
//System.out.format("%.2f %.2f %5.2f %5.2f %5.2f %5.2f\n", q, qstar, pDq, pqG, pG, posterior);
|
||||
}
|
||||
}
|
||||
|
||||
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
|
||||
double LOD = best_posterior - posterior_null_hyp;
|
||||
// First reverset sort NONREF mixtures according to highest posterior probabililty
|
||||
Arrays.sort(bestMixtures, 1, N+1);
|
||||
|
||||
// Calculate Lod of any variant call versus the reference call
|
||||
// Answers how confident are we in the best variant (nonref) mixture versus the null hypothesis
|
||||
// reference mixture - qhat = qstar = 0.0
|
||||
double lodVarVsRef = bestMixtures[1].posterior - posterior_null_hyp;
|
||||
|
||||
// Now reverse sort ALL mixtures according to highest posterior probability
|
||||
Arrays.sort(bestMixtures);
|
||||
|
||||
// Calculate Lod of the mixture versus other possible
|
||||
// Answers how confident are we in the best mixture versus the next best mixture
|
||||
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
|
||||
|
||||
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
|
||||
num2nuc[refnum],
|
||||
num2nuc[altnum],
|
||||
N,
|
||||
best_qhat,
|
||||
best_qstar,
|
||||
LOD,
|
||||
bestMixtures[0].qhat,
|
||||
bestMixtures[0].qstar,
|
||||
lodVarVsRef,
|
||||
lodBestVsNextBest,
|
||||
depth);
|
||||
return alleleFreq;
|
||||
}
|
||||
|
|
@ -213,6 +221,12 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
public double qstar;
|
||||
public double qhat;
|
||||
|
||||
MixtureLikelihood() {
|
||||
this.posterior = Math.log10(0);
|
||||
this.qstar = Math.log10(0);
|
||||
this.qhat = Math.log10(0);
|
||||
}
|
||||
|
||||
MixtureLikelihood(double posterior, double qstar, double qhat) {
|
||||
this.posterior = posterior;
|
||||
this.qstar = qstar;
|
||||
|
|
@ -317,7 +331,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
public Integer reduce(AlleleFrequencyEstimate alleleFreq, Integer sum)
|
||||
{
|
||||
// Print RESULT data for confident calls
|
||||
if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) { System.out.print(alleleFreq.asTabularString()); }
|
||||
if ((alleleFreq.lodVsRef >= 5) || (alleleFreq.lodVsRef <= -5)) { System.out.print(alleleFreq.asTabularString()); }
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
@ -393,7 +407,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
AlleleFrequencyWalker w = new AlleleFrequencyWalker();
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
||||
System.out.print(String.format("50%% Het : %s %c %c %f %f %f %d %s\n",
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null"));
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||
}
|
||||
|
||||
{
|
||||
|
|
@ -510,7 +524,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstima
|
|||
w.N = 10;
|
||||
AlleleFrequencyEstimate estimate = w.AlleleFrequencyEstimator("null", N, het_bases, het_quals, 0, 1, 20);
|
||||
System.out.print(String.format("10%% Het : %s %c %c %f %f %f %d %s\n",
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.LOD, 20, "null"));
|
||||
"null", estimate.ref, estimate.alt, estimate.qhat, estimate.qstar, estimate.lodVsRef, 20, "null"));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,21 +10,12 @@ public class AlleleFrequencyEstimate {
|
|||
public int N;
|
||||
public double qhat;
|
||||
public double qstar;
|
||||
public double LOD;
|
||||
public double lodVsRef;
|
||||
public double lodVsNextBest;
|
||||
public int depth;
|
||||
public String notes;
|
||||
|
||||
public double getQstar()
|
||||
{
|
||||
return qstar;
|
||||
}
|
||||
|
||||
public double getLOD()
|
||||
{
|
||||
return LOD;
|
||||
}
|
||||
|
||||
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double LOD, int depth)
|
||||
public AlleleFrequencyEstimate(String location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
|
||||
{
|
||||
this.location = location;
|
||||
this.ref = ref;
|
||||
|
|
@ -32,19 +23,21 @@ public class AlleleFrequencyEstimate {
|
|||
this.N = N;
|
||||
this.qhat = qhat;
|
||||
this.qstar = qstar;
|
||||
this.LOD = LOD;
|
||||
this.lodVsRef = lodVsRef;
|
||||
this.lodVsNextBest = lodVsNextBest;
|
||||
this.depth = depth;
|
||||
this.notes = "";
|
||||
}
|
||||
|
||||
public String asTabularString() {
|
||||
return String.format("RESULT %s %c %c %f %f %f %d %s\n",
|
||||
return String.format("RESULT %s %c %c %f %f %f %f %d %s\n",
|
||||
location,
|
||||
ref,
|
||||
alt,
|
||||
qhat,
|
||||
qstar,
|
||||
LOD,
|
||||
lodVsRef,
|
||||
lodVsNextBest,
|
||||
depth,
|
||||
notes);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue