One line format is useable and two levels of debug output are available (debug = 1: one line format, debug = 2: table of sampled probs for each locus). Class AlleleFrequencyMetrics computes %dbSNP and frequency of SNPs.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@99 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
f1034f3dfd
commit
5fa99f430e
|
|
@ -3,26 +3,27 @@ package org.broadinstitute.sting.gatk.walkers;
|
||||||
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||||
import org.broadinstitute.sting.gatk.LocusContext;
|
import org.broadinstitute.sting.gatk.LocusContext;
|
||||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.utils.AlleleFrequencyMetrics;
|
||||||
import net.sf.samtools.SAMRecord;
|
import net.sf.samtools.SAMRecord;
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker.AlleleFrequencyEstimate, Integer> {
|
||||||
|
|
||||||
|
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||||
|
|
||||||
|
|
||||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
|
||||||
|
|
||||||
// Convert context data into bases and 4-base quals
|
// Convert context data into bases and 4-base quals
|
||||||
|
|
||||||
// Set number of chromosomes, N, to 2 for now
|
// Set number of chromosomes, N, to 2 for now
|
||||||
int N = 2;
|
int N = 2;
|
||||||
boolean debug = false;
|
int debug = 0;
|
||||||
|
|
||||||
// Convert bases to CharArray
|
// Convert bases to CharArray
|
||||||
int numReads = context.getReads().size(); //numReads();
|
int numReads = context.getReads().size(); //numReads();
|
||||||
byte[] bases = new byte[numReads];
|
byte[] bases = new byte[numReads];
|
||||||
|
String base_string = "";
|
||||||
double[][] quals = new double[numReads][4];
|
double[][] quals = new double[numReads][4];
|
||||||
int refnum = nuc2num[ref];
|
int refnum = nuc2num[ref];
|
||||||
|
|
||||||
|
|
@ -33,6 +34,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
int offset = offsets.get(i);
|
int offset = offsets.get(i);
|
||||||
|
|
||||||
bases[i] = read.getReadBases()[offset];
|
bases[i] = read.getReadBases()[offset];
|
||||||
|
base_string += read.getReadString().charAt(offset);
|
||||||
int callednum = nuc2num[bases[i]];
|
int callednum = nuc2num[bases[i]];
|
||||||
quals[i][callednum] = 1 - Math.pow(10.0, (double)read.getBaseQualities()[offset] / -10.0);
|
quals[i][callednum] = 1 - Math.pow(10.0, (double)read.getBaseQualities()[offset] / -10.0);
|
||||||
|
|
||||||
|
|
@ -60,21 +62,33 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
assert(altnum > 0);
|
assert(altnum > 0);
|
||||||
|
|
||||||
if (bases.length > 0) { //altcount > 2) {
|
if (debug >= 1) System.out.format("Pos: %s ref: %c alt: %c ", context.getLocation(), num2nuc[refnum], num2nuc[altnum]);
|
||||||
System.out.format("Pos: %s | ref: %c | alt: %c | %2dA | %2dC | %2dT | %2dG | %2d total | ", context.getLocation(), num2nuc[refnum],
|
//System.out.format("%2dA %2dC %2dT %2dG %2d total", base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length);
|
||||||
num2nuc[altnum], base_counts[0], base_counts[1], base_counts[2], base_counts[3], bases.length);
|
|
||||||
|
|
||||||
if (debug) print_base_qual_matrix(quals, numReads);
|
if (debug >= 2) print_base_qual_matrix(quals, numReads);
|
||||||
|
|
||||||
// Check if we really want to do this one
|
AlleleFrequencyEstimate alleleFreq = AlleleFrequencyEstimator(N, bases, quals, refnum, altnum, debug);
|
||||||
AlleleFrequencyEstimator(N, bases, quals, refnum, altnum, debug);
|
|
||||||
}
|
|
||||||
if (debug) System.out.println();
|
|
||||||
|
|
||||||
return 1;
|
// Print dbSNP data if its there
|
||||||
|
if (debug >= 1) {
|
||||||
|
for ( ReferenceOrderedDatum datum : rodData ) {
|
||||||
|
if ( datum != null && datum instanceof rodDbSNP) {
|
||||||
|
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||||
|
//System.out.printf(" DBSNP %s on %s => %s%n", dbsnp.toSimpleString(), dbsnp.strand, Utils.join("/", dbsnp.getAllelesFWD()));
|
||||||
|
System.out.printf("ROD: %s ",dbsnp.toMediumString());
|
||||||
}
|
}
|
||||||
|
|
||||||
static void AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum, boolean debug) {
|
}
|
||||||
|
}
|
||||||
|
if (debug >= 1) System.out.format(" %s\n", base_string);
|
||||||
|
if (debug >= 2) System.out.println();
|
||||||
|
|
||||||
|
alleleMetrics.calculateMetrics(rodData, alleleFreq);
|
||||||
|
|
||||||
|
return alleleFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate AlleleFrequencyEstimator(int N, byte[] bases, double[][] quals, int refnum, int altnum, int debug) {
|
||||||
|
|
||||||
// q = hypothetical %nonref
|
// q = hypothetical %nonref
|
||||||
// qstar = true %nonref
|
// qstar = true %nonref
|
||||||
|
|
@ -83,47 +97,138 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
// alt = alternate hypothesis base (most frequent nonref base)
|
// alt = alternate hypothesis base (most frequent nonref base)
|
||||||
// ref = reference base
|
// ref = reference base
|
||||||
|
|
||||||
|
// b = number of bases at locus
|
||||||
|
|
||||||
|
|
||||||
double epsilon = 0; // 1e-2;
|
double epsilon = 0; // 1e-2;
|
||||||
double qstar;
|
double qstar;
|
||||||
if (debug) {
|
int qstar_N;
|
||||||
System.out.format("%4s | ", "q ");
|
if (debug >= 2) {
|
||||||
|
System.out.format("%4s ", "q ");
|
||||||
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N)
|
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N)
|
||||||
System.out.format("%5s%12s %12s %12s | ", "qstar", "p(D|q) ", "p(q|G) ", "posterior ");
|
System.out.format("%5s%10s %10s %10s ", "qstar", "p(D|q) ", "p(q|G) ", "posterior ");
|
||||||
if (debug) System.out.println();
|
System.out.println();
|
||||||
}
|
}
|
||||||
|
|
||||||
double highest_posterior = -1.0;
|
MixtureLikelihood[] bestMixtures = { new MixtureLikelihood(-1,-1,-1), new MixtureLikelihood(-1,-1,-1), new MixtureLikelihood(-1,-1,-1) };
|
||||||
double highest_qstar = -1.0;
|
/*double[] highest_posterior = new double[] {-1.0, -1.0, -1.0};
|
||||||
double highest_q = -1.0;
|
double[] highest_qstar = new double[] {-1.0, -1.0, -1.0};
|
||||||
|
double[] highest_q = new double[] {-1.0, -1.0, -1.0};*/
|
||||||
|
|
||||||
double qstep = 0.01;
|
double qstep = 0.01;
|
||||||
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
|
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
|
||||||
for (double q=0.0; q <= qend; q += qstep) {
|
for (double q=0.0; q <= qend; q += qstep) { // hypothetic allele balance that we sample over
|
||||||
if (debug) System.out.format("%.2f | ", q);
|
if (debug >= 2) System.out.format("%.2f ", q);
|
||||||
for (qstar = epsilon; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N) {
|
|
||||||
double pDq = P_D_q(bases, quals, q, refnum, altnum);
|
|
||||||
double pqG = P_q_G(bases, N, q, qstar);
|
|
||||||
double pG = P_G(N, qstar, altnum);
|
|
||||||
double posterior = pDq * pqG * pG;
|
|
||||||
if (debug) System.out.format("%.2f %.10f %.10f %.10f | ", qstar, pDq, pqG, posterior);
|
|
||||||
if (posterior > highest_posterior) {
|
|
||||||
highest_posterior = posterior;
|
|
||||||
highest_qstar = qstar;
|
|
||||||
highest_q = q;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (debug) System.out.println();
|
|
||||||
}
|
|
||||||
/*System.out.printf("Maximal likelihood posterior: %.6f\n", highest_posterior);
|
|
||||||
System.out.printf("q that maximimizes posterior: %.6f\n", highest_q);
|
|
||||||
System.out.printf("qstar used when calculating max q: %.6f\n", highest_qstar);*/
|
|
||||||
System.out.printf("qhat %.2f | qstar %.2f | ", highest_q, highest_qstar);
|
|
||||||
|
|
||||||
|
long q_R = Math.round(q*bases.length);
|
||||||
|
for (qstar = epsilon, qstar_N = 0; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N, qstar_N++) { // qstar - true allele balances
|
||||||
|
// for N=2: these are 0.0 + epsilon, 0.5, 1.0 - epsilon corresponding to reference, het-SNP, homo-SNP
|
||||||
|
//int qstar_N = Math.round((float)qstar*N);
|
||||||
|
double pDq = P_D_q(bases, quals, q, refnum, altnum);
|
||||||
|
double pqG = P_q_G(bases, N, q, qstar, q_R);
|
||||||
|
double pG = P_G(N, qstar_N); //= P_G(N, qstar);
|
||||||
|
double posterior = pDq * pqG * pG;
|
||||||
|
if (debug >= 2) System.out.format("%.2f %10.7f %10.7f %10.7f ", qstar, Math.log10(pDq), Math.log10(pqG), Math.log10(posterior));
|
||||||
|
if (posterior > bestMixtures[qstar_N].posterior)
|
||||||
|
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
|
||||||
|
}
|
||||||
|
if (debug >= 2) System.out.println();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Calculate log-odds difference - must happen before mixtures are sorted by posterior prob.
|
||||||
|
double logOddsVarRef = logOddsNonrefRef(bestMixtures);
|
||||||
|
if (debug >= 1) System.out.printf("LOD(Var,Ref): %6.2f ", logOddsVarRef);
|
||||||
|
|
||||||
|
// Print all mixture likelihoods in order of qstar
|
||||||
|
for (MixtureLikelihood mix : bestMixtures) // outputs: "het 3.26 "
|
||||||
|
if (debug >= 1) System.out.printf("%3s: %6.2f ", genotypeTypeString(mix.qstar, N), Math.log10(mix.posterior / (1-mix.posterior)));
|
||||||
|
|
||||||
|
// Sort by posterior probability
|
||||||
|
Arrays.sort(bestMixtures); // We only used best entry, so this could be a generic max function (optimization)
|
||||||
|
if (debug >= 1) System.out.printf("qhat: %.2f qstar: %.2f ", bestMixtures[0].qhat, bestMixtures[0].qstar); //highest_q, highest_qstar);
|
||||||
|
if (debug >= 1) System.out.printf("prob: %.2e ", bestMixtures[0].posterior);
|
||||||
|
if (debug >= 1) System.out.printf("type: %3s ", genotypeTypeString(bestMixtures[0].qstar, N));
|
||||||
|
|
||||||
|
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) * P_q_G(bases, N, 0.0, 0.0, 0) * P_G(N, 0);
|
||||||
|
if (debug >= 1) System.out.printf("P(null): %.2e ", posterior_null_hyp);
|
||||||
|
//double LOD = Math.log10(bestMixtures[0].posterior) - Math.log10(posterior_null_hyp);
|
||||||
|
double LOD = logOdds(bestMixtures[0].posterior, posterior_null_hyp);
|
||||||
|
if (debug >= 1) System.out.printf("LOD: %6.2f ", LOD);
|
||||||
|
|
||||||
|
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(num2nuc[refnum], num2nuc[refnum],
|
||||||
|
N, bestMixtures[0].qhat, bestMixtures[0].qstar, logOddsVarRef);
|
||||||
|
if (debug >= 1) System.out.printf("gen: %s", genotypeTypeString(bestMixtures[0].qstar, N));
|
||||||
|
|
||||||
|
return alleleFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
public class MixtureLikelihood implements Comparable<MixtureLikelihood> {
|
||||||
|
public double posterior;
|
||||||
|
public double qstar;
|
||||||
|
public double qhat;
|
||||||
|
|
||||||
|
MixtureLikelihood(double posterior, double qstar, double qhat) {
|
||||||
|
this.posterior = posterior;
|
||||||
|
this.qstar = qstar;
|
||||||
|
this.qhat = qhat;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int compareTo (MixtureLikelihood other) {
|
||||||
|
// default sorting is REVERSE sorting on posterior prob; gives array with top post. prob. as first element
|
||||||
|
if (posterior < other.posterior) return 1;
|
||||||
|
else if (posterior > other.posterior) return -1;
|
||||||
|
else return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public class AlleleFrequencyEstimate {
|
||||||
|
//AlleleFrequencyEstimate();
|
||||||
|
char ref;
|
||||||
|
char alt;
|
||||||
|
int N;
|
||||||
|
double qhat;
|
||||||
|
double qstar;
|
||||||
|
double logOddsVarRef;
|
||||||
|
|
||||||
|
public double getQstar() {return qstar;}
|
||||||
|
public double getLogOddsVarRef() {return logOddsVarRef;}
|
||||||
|
|
||||||
|
|
||||||
|
public AlleleFrequencyEstimate (char ref, char alt, int N, double qhat, double qstar, double logOddsVarRef) {
|
||||||
|
this.ref = ref;
|
||||||
|
this.alt = alt;
|
||||||
|
this.N = N;
|
||||||
|
this.qhat = qhat;
|
||||||
|
this.qstar = qstar;
|
||||||
|
this.logOddsVarRef = logOddsVarRef;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String asString () {
|
||||||
// Print out the called bases
|
// Print out the called bases
|
||||||
long numNonrefBases = Math.round(highest_q * N);
|
// Notes: switched from qhat to qstar because qhat doesn't work at n=1 (1 observed base) where having a single non-ref
|
||||||
|
// base has you calculate qstar = 0.0 and qhat = 0.49 and that leads to a genotype predicition of AG according
|
||||||
|
// to qhat, but AA according to qstar. This needs to be further investigated to see whether we really want
|
||||||
|
// to use qstar, but make N (number of chormosomes) switch to n (number of reads at locus) for n=1
|
||||||
|
long numNonrefBases = Math.round(qstar * N);
|
||||||
long numRefBases = N - numNonrefBases;
|
long numRefBases = N - numNonrefBases;
|
||||||
String genotype = repeat(num2nuc[refnum], numRefBases) + repeat(num2nuc[altnum], numNonrefBases);
|
return repeat(ref, numRefBases) + repeat(alt, numNonrefBases);
|
||||||
System.out.printf("gen: %s\n", genotype);
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static double logOdds(double prob1, double prob2) {
|
||||||
|
return Math.log10(prob1 / (1-prob1)) - Math.log10(prob2 / (1-prob2));
|
||||||
|
}
|
||||||
|
|
||||||
|
static double logOddsNonrefRef(MixtureLikelihood[] mixtures) {
|
||||||
|
// takes list of mixtureLikelihoods SORTED BY QSTAR (default ordering)
|
||||||
|
// Works for N == 2 right now!
|
||||||
|
assert (mixtures.length == 2);
|
||||||
|
MixtureLikelihood bestNonref = ((mixtures[1].posterior > mixtures[2].posterior) ? mixtures[1] : mixtures[2]);
|
||||||
|
return logOdds(bestNonref.posterior, mixtures[0].posterior);
|
||||||
|
|
||||||
|
// Code to calculate LOD using qstar=0 qhat=0
|
||||||
|
//double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) * P_q_G(bases, N, 0.0, 0.0) * P_G(N, 0.0);
|
||||||
|
//double LOD = Math.log10(highest_posterior) - Math.log10(posterior_null_hyp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) {
|
static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum) {
|
||||||
|
|
@ -136,18 +241,30 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
return p;
|
return p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static double P_q_G(byte [] bases, int N, double q, double qstar) {
|
static double P_q_G(byte [] bases, int N, double q, double qstar, long q_R) {
|
||||||
return binomialProb(Math.round(q*bases.length), bases.length, qstar);
|
return binomialProb(q_R, bases.length, qstar);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double P_G(int N, double qstar, int altnum) {
|
static double P_G(int N, int qstar_N) {
|
||||||
if (N==2) {
|
if (N==2) {
|
||||||
return p_G_N_2[ Math.round((float)(qstar * N)) ];
|
return p_G_N_2[ qstar_N ];
|
||||||
}else{
|
}else{
|
||||||
return 1.0;
|
return 1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static String genotypeTypeString(double q, int N){
|
||||||
|
switch (Math.round((float)q*N)) {
|
||||||
|
case (0):
|
||||||
|
return "ref";
|
||||||
|
case (1):
|
||||||
|
return "het";
|
||||||
|
case (2):
|
||||||
|
return "hom";
|
||||||
|
}
|
||||||
|
return "ERROR";
|
||||||
|
}
|
||||||
|
|
||||||
static double binomialProb(long k, long n, double p) {
|
static double binomialProb(long k, long n, double p) {
|
||||||
// k - numebr of successes
|
// k - numebr of successes
|
||||||
// n - number of Bernoulli trials
|
// n - number of Bernoulli trials
|
||||||
|
|
@ -168,6 +285,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static String repeat(char letter, long count) {
|
public static String repeat(char letter, long count) {
|
||||||
|
// Repeat a character count times
|
||||||
String result = "";
|
String result = "";
|
||||||
for (int i=0; i<count; i++) {
|
for (int i=0; i<count; i++) {
|
||||||
result = result + letter;
|
result = result + letter;
|
||||||
|
|
@ -178,8 +296,10 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
|
|
||||||
public Integer reduceInit() { return 0; }
|
public Integer reduceInit() { return 0; }
|
||||||
|
|
||||||
public Integer reduce(Integer value, Integer sum) {
|
public Integer reduce(AlleleFrequencyEstimate alleleFreq, Integer sum) {
|
||||||
return value + sum;
|
|
||||||
|
//System.out.printf("%s %.2f\n", alleleFreq.asString(), alleleFreq.logOddsVarRef);
|
||||||
|
return 0;//value + sum;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -207,8 +327,17 @@ public class AlleleFrequencyWalker extends BasicLociWalker<Integer, Integer> {
|
||||||
p_G_N_2[0] = 0.999;
|
p_G_N_2[0] = 0.999;
|
||||||
p_G_N_2[1] = 1e-3;
|
p_G_N_2[1] = 1e-3;
|
||||||
p_G_N_2[2] = 1e-5;
|
p_G_N_2[2] = 1e-5;
|
||||||
|
|
||||||
|
alleleMetrics = new AlleleFrequencyMetrics();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
AlleleFrequencyMetrics alleleMetrics;
|
||||||
|
|
||||||
|
public void onTraversalDone() {
|
||||||
|
alleleMetrics.printMetrics();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void print_base_qual_matrix(double [][]quals, int numReads) {
|
void print_base_qual_matrix(double [][]quals, int numReads) {
|
||||||
// Print quals for debugging
|
// Print quals for debugging
|
||||||
System.out.println("Base quality matrix");
|
System.out.println("Base quality matrix");
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,65 @@
|
||||||
|
package org.broadinstitute.sting.utils;
|
||||||
|
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Created by IntelliJ IDEA.
|
||||||
|
* User: andrewk
|
||||||
|
* Date: Mar 18, 2009
|
||||||
|
* Time: 5:28:58 PM
|
||||||
|
* To change this template use File | Settings | File Templates.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
public class AlleleFrequencyMetrics {
|
||||||
|
|
||||||
|
long dbsnp_tp=0;
|
||||||
|
long dbsnp_fp=0;
|
||||||
|
long num_snps=0;
|
||||||
|
long num_loci=0;
|
||||||
|
|
||||||
|
public void calculateMetrics(List<ReferenceOrderedDatum> rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) {
|
||||||
|
|
||||||
|
boolean is_dbSNP_SNP = false;
|
||||||
|
|
||||||
|
for ( ReferenceOrderedDatum datum : rodData ) {
|
||||||
|
if ( datum != null && datum instanceof rodDbSNP) {
|
||||||
|
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||||
|
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= 5.0) { // we confidently called it a SNP!
|
||||||
|
if (is_dbSNP_SNP) {
|
||||||
|
dbsnp_tp += 1;
|
||||||
|
}else{
|
||||||
|
dbsnp_fp += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (alleleFreq.getQstar() > 0.0) num_snps++;
|
||||||
|
num_loci++;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
public void printMetrics() {
|
||||||
|
System.out.println("\nAllele Frequency Metrics:\n");
|
||||||
|
System.out.printf("Precision of LOD >= 5 SNPs w.r.t dbSNP: %.2f\n", (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100);
|
||||||
|
System.out.printf("\\--TP: %d\n", dbsnp_tp);
|
||||||
|
System.out.printf("\\--FP: %d\n", dbsnp_fp);
|
||||||
|
System.out.println();
|
||||||
|
System.out.printf("SNPs (LOD > 0): %d\n", num_snps);
|
||||||
|
System.out.printf("Total loci: %d\n", num_loci);
|
||||||
|
System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps);
|
||||||
|
System.out.println();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue