diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java index 792e2a61f..a32deb856 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java @@ -1,5 +1,6 @@ package org.broadinstitute.sting.gatk.walkers; +//import org.broadinstitute.sting.gatk.iterators.LocusIterator; import org.broadinstitute.sting.gatk.LocusContext; import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum; import net.sf.samtools.SAMRecord; @@ -9,6 +10,7 @@ import java.util.List; public class AlleleFrequencyWalker extends BasicLociWalker { + public Integer map(List rodData, char ref, LocusContext context) { @@ -16,6 +18,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker { // Set number of chromosomes, N, to 2 for now int N = 2; + boolean debug = false; // Convert bases to CharArray int numReads = context.getReads().size(); //numReads(); @@ -40,16 +43,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker { quals[i][b] = nonref_quals; } - // Print quals for debugging - System.out.println("Base quality matrix"); - for (int b=0; b<4; b++) { - System.out.print(num2nuc[b]); - for (int i=0; i < numReads; i++){ - System.out.format(" %.4f", quals[i][b]); - } - System.out.println(); - } - // Count bases int[] base_counts = new int[4]; for (byte b : bases) @@ -67,23 +60,21 @@ public class AlleleFrequencyWalker extends BasicLociWalker { } assert(altnum > 0); - if (true) { //altcount > 2) { - System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total\n", context.getLocation(), num2nuc[refnum], num2nuc[altnum], - base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length); + if (bases.length > 0) { //altcount > 2) { + System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total | ", context.getLocation(), num2nuc[refnum], + num2nuc[altnum], base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length); + + if (debug) print_base_qual_matrix(quals, numReads); // Check if we really want to do this one - if (bases.length == 0) { - System.out.println("No reads at position; no call being made"); - }else{ - AlleleFrequencyEstimator(N, bases, quals, refnum, altnum); - } + AlleleFrequencyEstimator(N, bases, quals, refnum, altnum, debug); } - System.out.println(); + if (debug) System.out.println(); return 1; } - static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum) { + static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum, boolean debug) { // q = hypothetical %nonref // qstar = true %nonref @@ -94,10 +85,12 @@ public class AlleleFrequencyWalker extends BasicLociWalker { double epsilon = 0; // 1e-2; double qstar; - System.out.format("%4s | ", "q "); - for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) - System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior "); - System.out.println(); + if (debug) { + System.out.format("%4s | ", "q "); + for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) + System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior "); + if (debug) System.out.println(); + } double highest_posterior = -1.0; double highest_qstar = -1.0; @@ -106,30 +99,31 @@ public class AlleleFrequencyWalker extends BasicLociWalker { double qstep = 0.01; double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating for (double q=0.0; q <= qend; q += qstep) { - System.out.format("%.2f | ", q); + if (debug) System.out.format("%.2f | ", q); for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) { double pDq = P_D_q(bases, quals, q, refnum, altnum); double pqG = P_q_G(bases, N, q, qstar); double pG = P_G(N, qstar, altnum); double posterior = pDq * pqG * pG; - System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior); + if (debug) System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior); if (posterior > highest_posterior) { highest_posterior = posterior; highest_qstar = qstar; highest_q = q; } } - System.out.println(); + if (debug) System.out.println(); } - System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior); + /*System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior); System.out.printf("q that maximimizes posterior: %.6f\n", highest_q); - System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar); + System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar);*/ + System.out.printf("qhat %.2f | qstar %.2f | ", highest_q, highest_qstar); // Print out the called bases long numNonrefBases = Math.round(highest_q * N); long numRefBases = N - numNonrefBases; String genotype = repeat(num2nuc[refnum], numRefBases) + repeat(num2nuc[altnum], numNonrefBases); - System.out.printf("Maximal likelihood genotype: %s\n", genotype); + System.out.printf("gen: %s\n", genotype); } static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) { @@ -147,7 +141,11 @@ public class AlleleFrequencyWalker extends BasicLociWalker { } static double P_G(int N, double qstar, int altnum) { - return 1.0; + if (N==2) { + return p_G_N_2[ Math.round((float)(qstar * N)) ]; + }else{ + return 1.0; + } } static double binomialProb(long k, long n, double p) { @@ -187,6 +185,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker { static int nuc2num[]; static char num2nuc[]; + static double p_G_N_2[]; // pop. gen. priors for N=2 public AlleleFrequencyWalker() { nuc2num = new int[128]; nuc2num['A'] = 0; @@ -197,10 +196,45 @@ public class AlleleFrequencyWalker extends BasicLociWalker { nuc2num['c'] = 1; nuc2num['t'] = 2; nuc2num['g'] = 3; + num2nuc = new char[4]; num2nuc[0] = 'A'; num2nuc[1] = 'C'; num2nuc[2] = 'T'; num2nuc[3] = 'G'; + + p_G_N_2 = new double[3]; + p_G_N_2[0] = 0.999; + p_G_N_2[1] = 1e-3; + p_G_N_2[2] = 1e-5; } -} \ No newline at end of file + + void print_base_qual_matrix(double [][]quals, int numReads) { + // Print quals for debugging + System.out.println("Base quality matrix"); + for (int b=0; b<4; b++) { + System.out.print(num2nuc[b]); + for (int i=0; i < numReads; i++){ + System.out.format(" %.4f", quals[i][b]); + } + System.out.println(); + } + } + +} + + + + + + + + + + + + + + + +