Restructured AlleleFrequency classes into 3 classes: AlleleFrequencyWalker, AlleleFrequencyMetricsWalker, AlleleFrequencyEstimate. AlleleFrequencyMetricsWalker class now calls mapper function of AlleleFrequencyWalker and works with the result. AlleleFrequencyEstimate is now a separate class instead of a subclass of AlleleFrequencyWalker.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@102 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
andrewk 2009-03-19 22:06:01 +00:00
parent 41fec1565c
commit 6bcdac5c62
4 changed files with 135 additions and 112 deletions

View File

@ -0,0 +1,89 @@
package org.broadinstitute.sting.gatk.walkers;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
import org.broadinstitute.sting.gatk.walkers.BasicLociWalker;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Mar 18, 2009
* Time: 5:28:58 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleFrequencyMetricsWalker extends BasicLociWalker<Integer, Integer> {
long dbsnp_tp=0;
long dbsnp_fp=0;
long num_snps=0;
long num_loci=0;
double LOD_cutoff = 5;
//public void calculateMetrics(List<ReferenceOrderedDatum> rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) {
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
//AlleleFrequencyWalker = AlleleFrequencyWalker();
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyWalker().map(rodData, ref, context);
boolean is_dbSNP_SNP = false;
for ( ReferenceOrderedDatum datum : rodData ) {
if ( datum != null && datum instanceof rodDbSNP) {
rodDbSNP dbsnp = (rodDbSNP)datum;
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
}
}
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) { // we confidently called it a SNP!
if (is_dbSNP_SNP) {
dbsnp_tp += 1;
}else{
dbsnp_fp += 1;
}
}
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) {
//System.out.println(alleleFreq.getLogOddsVarRef());
num_snps++;
}
num_loci++;
return 1;
}
public void printMetrics() {
System.out.println("\nAllele Frequency Metrics:\n");
System.out.printf("Precision of LOD >= %.0f SNPs w.r.t dbSNP: %.2f\n", LOD_cutoff, (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100);
System.out.printf("\\--TP: %d\n", dbsnp_tp);
System.out.printf("\\--FP: %d\n", dbsnp_fp);
System.out.println();
System.out.printf("SNPs (LOD > %.0f): %d\n", LOD_cutoff, num_snps);
System.out.printf("Total loci: %d\n", num_loci);
System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps);
System.out.println();
}
public void onTraversalDone() {
printMetrics();
}
public Integer reduceInit() { return 0; }
public Integer reduce(Integer alleleFreq, Integer sum) {
//System.out.printf("%s %.2f\n", alleleFreq.asString(), alleleFreq.logOddsVarRef);
return 0;//value + sum;
}
}

View File

@ -1,16 +1,16 @@
package org.broadinstitute.sting.gatk.walkers;
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.utils.AlleleFrequencyMetrics;
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
import net.sf.samtools.SAMRecord;
import java.util.List;
import java.util.Arrays;
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker.AlleleFrequencyEstimate, Integer> {
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> {
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
@ -50,6 +50,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
for (byte b : bases)
base_counts[nuc2num[b]]++;
// Find alternate allele - 2nd most frequent non-ref allele
// (maybe we should check for ties and eval both or check most common including quality scores)
int altnum = -1; // start with first base numerical identity (0,1,2,3)
@ -77,14 +78,11 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
//System.out.printf(" DBSNP %s on %s => %s%n", dbsnp.toSimpleString(), dbsnp.strand, Utils.join("/", dbsnp.getAllelesFWD()));
System.out.printf("ROD: %s ",dbsnp.toMediumString());
}
}
}
if (debug >= 1) System.out.format(" %s\n", base_string);
if (debug >= 2) System.out.println();
alleleMetrics.calculateMetrics(rodData, alleleFreq);
return alleleFreq;
}
@ -181,40 +179,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
}
}
public class AlleleFrequencyEstimate {
//AlleleFrequencyEstimate();
char ref;
char alt;
int N;
double qhat;
double qstar;
double logOddsVarRef;
public double getQstar() {return qstar;}
public double getLogOddsVarRef() {return logOddsVarRef;}
public AlleleFrequencyEstimate (char ref, char alt, int N, double qhat, double qstar, double logOddsVarRef) {
this.ref = ref;
this.alt = alt;
this.N = N;
this.qhat = qhat;
this.qstar = qstar;
this.logOddsVarRef = logOddsVarRef;
}
public String asString () {
// Print out the called bases
// Notes: switched from qhat to qstar because qhat doesn't work at n=1 (1 observed base) where having a single non-ref
// base has you calculate qstar = 0.0 and qhat = 0.49 and that leads to a genotype predicition of AG according
// to qhat, but AA according to qstar. This needs to be further investigated to see whether we really want
// to use qstar, but make N (number of chormosomes) switch to n (number of reads at locus) for n=1
long numNonrefBases = Math.round(qstar * N);
long numRefBases = N - numNonrefBases;
return repeat(ref, numRefBases) + repeat(alt, numNonrefBases);
}
}
static double logOdds(double prob1, double prob2) {
return Math.log10(prob1 / (1-prob1)) - Math.log10(prob2 / (1-prob2));
}
@ -327,15 +291,8 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
p_G_N_2[0] = 0.999;
p_G_N_2[1] = 1e-3;
p_G_N_2[2] = 1e-5;
alleleMetrics = new AlleleFrequencyMetrics();
}
AlleleFrequencyMetrics alleleMetrics;
public void onTraversalDone() {
alleleMetrics.printMetrics();
}
void print_base_qual_matrix(double [][]quals, int numReads) {

View File

@ -0,0 +1,42 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
public class AlleleFrequencyEstimate {
//AlleleFrequencyEstimate();
char ref;
char alt;
int N;
double qhat;
double qstar;
double logOddsVarRef;
public double getQstar() {
return qstar;
}
public double getLogOddsVarRef() {
return logOddsVarRef;
}
public AlleleFrequencyEstimate(char ref, char alt, int N, double qhat, double qstar, double logOddsVarRef) {
this.ref = ref;
this.alt = alt;
this.N = N;
this.qhat = qhat;
this.qstar = qstar;
this.logOddsVarRef = logOddsVarRef;
}
public String asString() {
// Print out the called bases
// Notes: switched from qhat to qstar because qhat doesn't work at n=1 (1 observed base) where having a single non-ref
// base has you calculate qstar = 0.0 and qhat = 0.49 and that leads to a genotype predicition of AG according
// to qhat, but AA according to qstar. This needs to be further investigated to see whether we really want
// to use qstar, but make N (number of chormosomes) switch to n (number of reads at locus) for n=1
long numNonrefBases = Math.round(qstar * N);
long numRefBases = N - numNonrefBases;
return AlleleFrequencyWalker.repeat(ref, numRefBases) + AlleleFrequencyWalker.repeat(alt, numNonrefBases);
}
}

View File

@ -1,65 +0,0 @@
package org.broadinstitute.sting.utils;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
import java.util.List;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Mar 18, 2009
* Time: 5:28:58 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleFrequencyMetrics {
long dbsnp_tp=0;
long dbsnp_fp=0;
long num_snps=0;
long num_loci=0;
public void calculateMetrics(List<ReferenceOrderedDatum> rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) {
boolean is_dbSNP_SNP = false;
for ( ReferenceOrderedDatum datum : rodData ) {
if ( datum != null && datum instanceof rodDbSNP) {
rodDbSNP dbsnp = (rodDbSNP)datum;
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
}
}
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= 5.0) { // we confidently called it a SNP!
if (is_dbSNP_SNP) {
dbsnp_tp += 1;
}else{
dbsnp_fp += 1;
}
}
if (alleleFreq.getQstar() > 0.0) num_snps++;
num_loci++;
}
public void printMetrics() {
System.out.println("\nAllele Frequency Metrics:\n");
System.out.printf("Precision of LOD >= 5 SNPs w.r.t dbSNP: %.2f\n", (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100);
System.out.printf("\\--TP: %d\n", dbsnp_tp);
System.out.printf("\\--FP: %d\n", dbsnp_fp);
System.out.println();
System.out.printf("SNPs (LOD > 0): %d\n", num_snps);
System.out.printf("Total loci: %d\n", num_loci);
System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps);
System.out.println();
}
}