diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java index 9439b4d99..5de700eab 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java @@ -41,7 +41,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) { // we confidently called it a SNP! + if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLOD() >= LOD_cutoff) { // we confidently called it a SNP! if (is_dbSNP_SNP) { dbsnp_tp += 1; }else{ @@ -49,7 +49,7 @@ public class AlleleFrequencyMetricsWalker extends BasicLociWalker 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) { + if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLOD() >= LOD_cutoff) { //System.out.println(alleleFreq.getLogOddsVarRef()); num_snps++; } diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java index b9d382094..0443fac87 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java @@ -18,7 +18,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker 0); + assert(altnum != -1); - if (debug >= 1) System.out.format("Pos: %s ref: %c alt: %c ", context.getLocation(), num2nuc[refnum], num2nuc[altnum]); - //System.out.format("%2dA %2dC %2dT %2dG %2d total", base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length); - - if (debug >= 2) print_base_qual_matrix(quals, numReads); - - AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(N, bases, quals, refnum, altnum, debug); + AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(N, bases, quals, refnum, altnum); // Print dbSNP data if its there - if (debug >= 1) { + if (false) { for ( ReferenceOrderedDatum datum : rodData ) { if ( datum != null && datum instanceof rodDbSNP) { rodDbSNP dbsnp = (rodDbSNP)datum; @@ -80,13 +74,21 @@ public class AlleleFrequencyWalker extends BasicLociWalker= 1) System.out.format(" %s\n", base_string); - if (debug >= 2) System.out.println(); + + System.out.print(String.format("RESULT %s %c %c %f %f %f %d\n", + context.getLocation(), + alleleFreq.ref, + alleleFreq.alt, + alleleFreq.qhat, + alleleFreq.qstar, + alleleFreq.LOD, + base_string.length())); return alleleFreq; } - AlleleFrequencyEstimate AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum, int debug) { + public AlleleFrequencyEstimate AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum) + { // q = hypothetical %nonref // qstar = true %nonref @@ -101,61 +103,43 @@ public class AlleleFrequencyWalker extends BasicLociWalker= 2) { - System.out.format("%4s ", "q "); - for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) - System.out.format("%5s%10s %10s %10s ", "qstar", "p(D|q) ", "p(q|G) ", "posterior "); - System.out.println(); - } - - MixtureLikelihood[] bestMixtures = { new MixtureLikelihood(-1,-1,-1), new MixtureLikelihood(-1,-1,-1), new MixtureLikelihood(-1,-1,-1) }; - /*double[] highest_posterior = new double[] {-1.0, -1.0, -1.0}; - double[] highest_qstar = new double[] {-1.0, -1.0, -1.0}; - double[] highest_q = new double[] {-1.0, -1.0, -1.0};*/ double qstep = 0.01; double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating - for (double q=0.0; q <= qend; q += qstep) { // hypothetic allele balance that we sample over - if (debug >= 2) System.out.format("%.2f ", q); + double best_qstar = Math.log10(0); + double best_qhat = Math.log10(0); + double best_posterior = Math.log10(0); + + for (double q=0.0; q <= qend; q += qstep) // hypothetic allele balance that we sample over + { long q_R = Math.round(q*bases.length); - for (qstar = epsilon, qstar_N = 0; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N, qstar_N++) { // qstar - true allele balances + for (qstar = epsilon, qstar_N = 0; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N, qstar_N++) // qstar - true allele balances + { // for N=2: these are 0.0 + epsilon, 0.5, 1.0 - epsilon corresponding to reference, het-SNP, homo-SNP - //int qstar_N = Math.round((float)qstar*N); double pDq = P_D_q(bases, quals, q, refnum, altnum); double pqG = P_q_G(bases, N, q, qstar, q_R); - double pG = P_G(N, qstar_N); //= P_G(N, qstar); - double posterior = pDq * pqG * pG; - if (debug >= 2) System.out.format("%.2f %10.7f %10.7f %10.7f ", qstar, Math.log10(pDq), Math.log10(pqG), Math.log10(posterior)); - if (posterior > bestMixtures[qstar_N].posterior) - bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q); + double pG = P_G(N, qstar_N); //= P_G(N, qstar); + double posterior = pDq + pqG; // + pG; + + if (posterior > best_posterior) + { + best_qstar = qstar; + best_qhat = q; + best_posterior = posterior; + } } - if (debug >= 2) System.out.println(); } - // Calculate log-odds difference - must happen before mixtures are sorted by posterior prob. - double logOddsVarRef = logOddsNonrefRef(bestMixtures); - if (debug >= 1) System.out.printf("LOD(Var,Ref): %6.2f ", logOddsVarRef); + double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, 0.0, 0) + P_G(N, 0); + double LOD = best_posterior - posterior_null_hyp; - // Print all mixture likelihoods in order of qstar - for (MixtureLikelihood mix : bestMixtures) // outputs: "het 3.26 " - if (debug >= 1) System.out.printf("%3s: %6.2f ", genotypeTypeString(mix.qstar, N), Math.log10(mix.posterior / (1-mix.posterior))); - - // Sort by posterior probability - Arrays.sort(bestMixtures); // We only used best entry, so this could be a generic max function (optimization) - if (debug >= 1) System.out.printf("qhat: %.2f qstar: %.2f ", bestMixtures[0].qhat, bestMixtures[0].qstar); //highest_q, highest_qstar); - if (debug >= 1) System.out.printf("prob: %.2e ", bestMixtures[0].posterior); - if (debug >= 1) System.out.printf("type: %3s ", genotypeTypeString(bestMixtures[0].qstar, N)); - - double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) * P_q_G(bases, N, 0.0, 0.0, 0) * P_G(N, 0); - if (debug >= 1) System.out.printf("P(null): %.2e ", posterior_null_hyp); - //double LOD = Math.log10(bestMixtures[0].posterior) - Math.log10(posterior_null_hyp); - double LOD = logOdds(bestMixtures[0].posterior, posterior_null_hyp); - if (debug >= 1) System.out.printf("LOD: %6.2f ", LOD); - - AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(num2nuc[refnum], num2nuc[refnum], - N, bestMixtures[0].qhat, bestMixtures[0].qstar, logOddsVarRef); - if (debug >= 1) System.out.printf("gen: %s", genotypeTypeString(bestMixtures[0].qstar, N)); + AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(num2nuc[refnum], + num2nuc[altnum], + N, + best_qhat, + best_qstar, + LOD); return alleleFreq; } @@ -179,41 +163,32 @@ public class AlleleFrequencyWalker extends BasicLociWalker mixtures[2].posterior) ? mixtures[1] : mixtures[2]); - return logOdds(bestNonref.posterior, mixtures[0].posterior); - - // Code to calculate LOD using qstar=0 qhat=0 - //double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) * P_q_G(bases, N, 0.0, 0.0) * P_G(N, 0.0); - //double LOD = Math.log10(highest_posterior) - Math.log10(posterior_null_hyp); - } - - static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) { - double p = 1.0; - - for (int i=0; i