Partial implementation of single sample allele calling
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@64 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
4808dff110
commit
685fc8bd61
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.walkers;
|
package org.broadinstitute.sting.gatk.walkers;
|
||||||
|
|
||||||
|
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
@ -9,6 +10,7 @@ import java.util.List;
|
||||||
public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
|
|
||||||
|
|
@ -16,6 +18,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
// Set number of chromosomes, N, to 2 for now
|
// Set number of chromosomes, N, to 2 for now
|
||||||
int N = 2;
|
int N = 2;
|
||||||
|
boolean debug = false;
|
||||||
|
|
||||||
// Convert bases to CharArray
|
// Convert bases to CharArray
|
||||||
int numReads = context.getReads().size(); //numReads();
|
int numReads = context.getReads().size(); //numReads();
|
||||||
|
|
@ -40,16 +43,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
quals[i][b] = nonref_quals;
|
quals[i][b] = nonref_quals;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Print quals for debugging
|
|
||||||
System.out.println("Base quality matrix");
|
|
||||||
for (int b=0; b<4; b++) {
|
|
||||||
System.out.print(num2nuc[b]);
|
|
||||||
for (int i=0; i < numReads; i++){
|
|
||||||
System.out.format(" %.4f", quals[i][b]);
|
|
||||||
}
|
|
||||||
System.out.println();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count bases
|
// Count bases
|
||||||
int[] base_counts = new int[4];
|
int[] base_counts = new int[4];
|
||||||
for (byte b : bases)
|
for (byte b : bases)
|
||||||
|
|
@ -67,23 +60,21 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
assert(altnum > 0);
|
assert(altnum > 0);
|
||||||
|
|
||||||
if (true) { //altcount > 2) {
|
if (bases.length > 0) { //altcount > 2) {
|
||||||
System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total\n", context.getLocation(), num2nuc[refnum], num2nuc[altnum],
|
System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total | ", context.getLocation(), num2nuc[refnum],
|
||||||
base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length);
|
num2nuc[altnum], base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length);
|
||||||
|
|
||||||
|
if (debug) print_base_qual_matrix(quals, numReads);
|
||||||
|
|
||||||
// Check if we really want to do this one
|
// Check if we really want to do this one
|
||||||
if (bases.length == 0) {
|
AlleleFrequencyEstimator(N, bases, quals, refnum, altnum, debug);
|
||||||
System.out.println("No reads at position; no call being made");
|
|
||||||
}else{
|
|
||||||
AlleleFrequencyEstimator(N, bases, quals, refnum, altnum);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
System.out.println();
|
if (debug) System.out.println();
|
||||||
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum) {
|
static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum, boolean debug) {
|
||||||
|
|
||||||
// q = hypothetical %nonref
|
// q = hypothetical %nonref
|
||||||
// qstar = true %nonref
|
// qstar = true %nonref
|
||||||
|
|
@ -94,10 +85,12 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
double epsilon = 0; // 1e-2;
|
double epsilon = 0; // 1e-2;
|
||||||
double qstar;
|
double qstar;
|
||||||
System.out.format("%4s | ", "q ");
|
if (debug) {
|
||||||
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N)
|
System.out.format("%4s | ", "q ");
|
||||||
System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior ");
|
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N)
|
||||||
System.out.println();
|
System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior ");
|
||||||
|
if (debug) System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
double highest_posterior = -1.0;
|
double highest_posterior = -1.0;
|
||||||
double highest_qstar = -1.0;
|
double highest_qstar = -1.0;
|
||||||
|
|
@ -106,30 +99,31 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
double qstep = 0.01;
|
double qstep = 0.01;
|
||||||
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
|
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
|
||||||
for (double q=0.0; q <= qend; q += qstep) {
|
for (double q=0.0; q <= qend; q += qstep) {
|
||||||
System.out.format("%.2f | ", q);
|
if (debug) System.out.format("%.2f | ", q);
|
||||||
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) {
|
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) {
|
||||||
double pDq = P_D_q(bases, quals, q, refnum, altnum);
|
double pDq = P_D_q(bases, quals, q, refnum, altnum);
|
||||||
double pqG = P_q_G(bases, N, q, qstar);
|
double pqG = P_q_G(bases, N, q, qstar);
|
||||||
double pG = P_G(N, qstar, altnum);
|
double pG = P_G(N, qstar, altnum);
|
||||||
double posterior = pDq * pqG * pG;
|
double posterior = pDq * pqG * pG;
|
||||||
System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior);
|
if (debug) System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior);
|
||||||
if (posterior > highest_posterior) {
|
if (posterior > highest_posterior) {
|
||||||
highest_posterior = posterior;
|
highest_posterior = posterior;
|
||||||
highest_qstar = qstar;
|
highest_qstar = qstar;
|
||||||
highest_q = q;
|
highest_q = q;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
System.out.println();
|
if (debug) System.out.println();
|
||||||
}
|
}
|
||||||
System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior);
|
/*System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior);
|
||||||
System.out.printf("q that maximimizes posterior: %.6f\n", highest_q);
|
System.out.printf("q that maximimizes posterior: %.6f\n", highest_q);
|
||||||
System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar);
|
System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar);*/
|
||||||
|
System.out.printf("qhat %.2f | qstar %.2f | ", highest_q, highest_qstar);
|
||||||
|
|
||||||
// Print out the called bases
|
// Print out the called bases
|
||||||
long numNonrefBases = Math.round(highest_q * N);
|
long numNonrefBases = Math.round(highest_q * N);
|
||||||
long numRefBases = N - numNonrefBases;
|
long numRefBases = N - numNonrefBases;
|
||||||
String genotype = repeat(num2nuc[refnum], numRefBases) + repeat(num2nuc[altnum], numNonrefBases);
|
String genotype = repeat(num2nuc[refnum], numRefBases) + repeat(num2nuc[altnum], numNonrefBases);
|
||||||
System.out.printf("Maximal likelihood genotype: %s\n", genotype);
|
System.out.printf("gen: %s\n", genotype);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) {
|
static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) {
|
||||||
|
|
@ -147,7 +141,11 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
static double P_G(int N, double qstar, int altnum) {
|
static double P_G(int N, double qstar, int altnum) {
|
||||||
return 1.0;
|
if (N==2) {
|
||||||
|
return p_G_N_2[ Math.round((float)(qstar * N)) ];
|
||||||
|
}else{
|
||||||
|
return 1.0;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static double binomialProb(long k, long n, double p) {
|
static double binomialProb(long k, long n, double p) {
|
||||||
|
|
@ -187,6 +185,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
static int nuc2num[];
|
static int nuc2num[];
|
||||||
static char num2nuc[];
|
static char num2nuc[];
|
||||||
|
static double p_G_N_2[]; // pop. gen. priors for N=2
|
||||||
public AlleleFrequencyWalker() {
|
public AlleleFrequencyWalker() {
|
||||||
nuc2num = new int[128];
|
nuc2num = new int[128];
|
||||||
nuc2num['A'] = 0;
|
nuc2num['A'] = 0;
|
||||||
|
|
@ -197,10 +196,45 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
nuc2num['c'] = 1;
|
nuc2num['c'] = 1;
|
||||||
nuc2num['t'] = 2;
|
nuc2num['t'] = 2;
|
||||||
nuc2num['g'] = 3;
|
nuc2num['g'] = 3;
|
||||||
|
|
||||||
num2nuc = new char[4];
|
num2nuc = new char[4];
|
||||||
num2nuc[0] = 'A';
|
num2nuc[0] = 'A';
|
||||||
num2nuc[1] = 'C';
|
num2nuc[1] = 'C';
|
||||||
num2nuc[2] = 'T';
|
num2nuc[2] = 'T';
|
||||||
num2nuc[3] = 'G';
|
num2nuc[3] = 'G';
|
||||||
|
|
||||||
|
p_G_N_2 = new double[3];
|
||||||
|
p_G_N_2[0] = 0.999;
|
||||||
|
p_G_N_2[1] = 1e-3;
|
||||||
|
p_G_N_2[2] = 1e-5;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
void print_base_qual_matrix(double [][]quals, int numReads) {
|
||||||
|
// Print quals for debugging
|
||||||
|
System.out.println("Base quality matrix");
|
||||||
|
for (int b=0; b<4; b++) {
|
||||||
|
System.out.print(num2nuc[b]);
|
||||||
|
for (int i=0; i < numReads; i++){
|
||||||
|
System.out.format(" %.4f", quals[i][b]);
|
||||||
|
}
|
||||||
|
System.out.println();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue