Restructured AlleleFrequency classes into 3 classes: AlleleFrequencyWalker, AlleleFrequencyMetricsWalker, AlleleFrequencyEstimate. AlleleFrequencyMetricsWalker class now calls mapper function of AlleleFrequencyWalker and works with the result. AlleleFrequencyEstimate is now a separate class instead of a subclass of AlleleFrequencyWalker.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@102 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
41fec1565c
commit
6bcdac5c62
|
|
@ -0,0 +1,89 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.BasicLociWalker;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: andrewk
|
||||
* Date: Mar 18, 2009
|
||||
* Time: 5:28:58 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
public class AlleleFrequencyMetricsWalker extends BasicLociWalker<Integer, Integer> {
|
||||
|
||||
long dbsnp_tp=0;
|
||||
long dbsnp_fp=0;
|
||||
long num_snps=0;
|
||||
long num_loci=0;
|
||||
double LOD_cutoff = 5;
|
||||
|
||||
//public void calculateMetrics(List<ReferenceOrderedDatum> rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) {
|
||||
public Integer map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||
|
||||
//AlleleFrequencyWalker = AlleleFrequencyWalker();
|
||||
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyWalker().map(rodData, ref, context);
|
||||
|
||||
|
||||
boolean is_dbSNP_SNP = false;
|
||||
|
||||
for ( ReferenceOrderedDatum datum : rodData ) {
|
||||
if ( datum != null && datum instanceof rodDbSNP) {
|
||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) { // we confidently called it a SNP!
|
||||
if (is_dbSNP_SNP) {
|
||||
dbsnp_tp += 1;
|
||||
}else{
|
||||
dbsnp_fp += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= LOD_cutoff) {
|
||||
//System.out.println(alleleFreq.getLogOddsVarRef());
|
||||
num_snps++;
|
||||
}
|
||||
num_loci++;
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
public void printMetrics() {
|
||||
System.out.println("\nAllele Frequency Metrics:\n");
|
||||
System.out.printf("Precision of LOD >= %.0f SNPs w.r.t dbSNP: %.2f\n", LOD_cutoff, (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100);
|
||||
System.out.printf("\\--TP: %d\n", dbsnp_tp);
|
||||
System.out.printf("\\--FP: %d\n", dbsnp_fp);
|
||||
System.out.println();
|
||||
System.out.printf("SNPs (LOD > %.0f): %d\n", LOD_cutoff, num_snps);
|
||||
System.out.printf("Total loci: %d\n", num_loci);
|
||||
System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps);
|
||||
System.out.println();
|
||||
|
||||
}
|
||||
|
||||
|
||||
public void onTraversalDone() {
|
||||
printMetrics();
|
||||
}
|
||||
|
||||
|
||||
public Integer reduceInit() { return 0; }
|
||||
|
||||
public Integer reduce(Integer alleleFreq, Integer sum) {
|
||||
|
||||
//System.out.printf("%s %.2f\n", alleleFreq.asString(), alleleFreq.logOddsVarRef);
|
||||
return 0;//value + sum;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
@ -1,16 +1,16 @@
|
|||
package org.broadinstitute.sting.gatk.walkers;
|
||||
|
||||
//import org.broadinstitute.sting.gatk.iterators.LocusIterator;
|
||||
import org.broadinstitute.sting.gatk.LocusContext;
|
||||
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.utils.AlleleFrequencyMetrics;
|
||||
import org.broadinstitute.sting.utils.AlleleFrequencyEstimate;
|
||||
import net.sf.samtools.SAMRecord;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Arrays;
|
||||
|
||||
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker.AlleleFrequencyEstimate, Integer> {
|
||||
public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyEstimate, Integer> {
|
||||
|
||||
public AlleleFrequencyEstimate map(List<ReferenceOrderedDatum> rodData, char ref, LocusContext context) {
|
||||
|
||||
|
|
@ -50,6 +50,7 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
|
|||
for (byte b : bases)
|
||||
base_counts[nuc2num[b]]++;
|
||||
|
||||
|
||||
// Find alternate allele - 2nd most frequent non-ref allele
|
||||
// (maybe we should check for ties and eval both or check most common including quality scores)
|
||||
int altnum = -1; // start with first base numerical identity (0,1,2,3)
|
||||
|
|
@ -77,14 +78,11 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
|
|||
//System.out.printf(" DBSNP %s on %s => %s%n", dbsnp.toSimpleString(), dbsnp.strand, Utils.join("/", dbsnp.getAllelesFWD()));
|
||||
System.out.printf("ROD: %s ",dbsnp.toMediumString());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
if (debug >= 1) System.out.format(" %s\n", base_string);
|
||||
if (debug >= 2) System.out.println();
|
||||
|
||||
alleleMetrics.calculateMetrics(rodData, alleleFreq);
|
||||
|
||||
return alleleFreq;
|
||||
}
|
||||
|
||||
|
|
@ -181,40 +179,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
|
|||
}
|
||||
}
|
||||
|
||||
public class AlleleFrequencyEstimate {
|
||||
//AlleleFrequencyEstimate();
|
||||
char ref;
|
||||
char alt;
|
||||
int N;
|
||||
double qhat;
|
||||
double qstar;
|
||||
double logOddsVarRef;
|
||||
|
||||
public double getQstar() {return qstar;}
|
||||
public double getLogOddsVarRef() {return logOddsVarRef;}
|
||||
|
||||
|
||||
public AlleleFrequencyEstimate (char ref, char alt, int N, double qhat, double qstar, double logOddsVarRef) {
|
||||
this.ref = ref;
|
||||
this.alt = alt;
|
||||
this.N = N;
|
||||
this.qhat = qhat;
|
||||
this.qstar = qstar;
|
||||
this.logOddsVarRef = logOddsVarRef;
|
||||
}
|
||||
|
||||
public String asString () {
|
||||
// Print out the called bases
|
||||
// Notes: switched from qhat to qstar because qhat doesn't work at n=1 (1 observed base) where having a single non-ref
|
||||
// base has you calculate qstar = 0.0 and qhat = 0.49 and that leads to a genotype predicition of AG according
|
||||
// to qhat, but AA according to qstar. This needs to be further investigated to see whether we really want
|
||||
// to use qstar, but make N (number of chormosomes) switch to n (number of reads at locus) for n=1
|
||||
long numNonrefBases = Math.round(qstar * N);
|
||||
long numRefBases = N - numNonrefBases;
|
||||
return repeat(ref, numRefBases) + repeat(alt, numNonrefBases);
|
||||
}
|
||||
}
|
||||
|
||||
static double logOdds(double prob1, double prob2) {
|
||||
return Math.log10(prob1 / (1-prob1)) - Math.log10(prob2 / (1-prob2));
|
||||
}
|
||||
|
|
@ -327,15 +291,8 @@ public class AlleleFrequencyWalker extends BasicLociWalker<AlleleFrequencyWalker
|
|||
p_G_N_2[0] = 0.999;
|
||||
p_G_N_2[1] = 1e-3;
|
||||
p_G_N_2[2] = 1e-5;
|
||||
|
||||
alleleMetrics = new AlleleFrequencyMetrics();
|
||||
}
|
||||
|
||||
AlleleFrequencyMetrics alleleMetrics;
|
||||
|
||||
public void onTraversalDone() {
|
||||
alleleMetrics.printMetrics();
|
||||
}
|
||||
|
||||
|
||||
void print_base_qual_matrix(double [][]quals, int numReads) {
|
||||
|
|
|
|||
|
|
@ -0,0 +1,42 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
|
||||
|
||||
public class AlleleFrequencyEstimate {
|
||||
//AlleleFrequencyEstimate();
|
||||
char ref;
|
||||
char alt;
|
||||
int N;
|
||||
double qhat;
|
||||
double qstar;
|
||||
double logOddsVarRef;
|
||||
|
||||
public double getQstar() {
|
||||
return qstar;
|
||||
}
|
||||
|
||||
public double getLogOddsVarRef() {
|
||||
return logOddsVarRef;
|
||||
}
|
||||
|
||||
|
||||
public AlleleFrequencyEstimate(char ref, char alt, int N, double qhat, double qstar, double logOddsVarRef) {
|
||||
this.ref = ref;
|
||||
this.alt = alt;
|
||||
this.N = N;
|
||||
this.qhat = qhat;
|
||||
this.qstar = qstar;
|
||||
this.logOddsVarRef = logOddsVarRef;
|
||||
}
|
||||
|
||||
public String asString() {
|
||||
// Print out the called bases
|
||||
// Notes: switched from qhat to qstar because qhat doesn't work at n=1 (1 observed base) where having a single non-ref
|
||||
// base has you calculate qstar = 0.0 and qhat = 0.49 and that leads to a genotype predicition of AG according
|
||||
// to qhat, but AA according to qstar. This needs to be further investigated to see whether we really want
|
||||
// to use qstar, but make N (number of chormosomes) switch to n (number of reads at locus) for n=1
|
||||
long numNonrefBases = Math.round(qstar * N);
|
||||
long numRefBases = N - numNonrefBases;
|
||||
return AlleleFrequencyWalker.repeat(ref, numRefBases) + AlleleFrequencyWalker.repeat(alt, numNonrefBases);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
package org.broadinstitute.sting.utils;
|
||||
|
||||
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
|
||||
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
|
||||
import org.broadinstitute.sting.gatk.walkers.AlleleFrequencyWalker;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Created by IntelliJ IDEA.
|
||||
* User: andrewk
|
||||
* Date: Mar 18, 2009
|
||||
* Time: 5:28:58 PM
|
||||
* To change this template use File | Settings | File Templates.
|
||||
*/
|
||||
|
||||
|
||||
public class AlleleFrequencyMetrics {
|
||||
|
||||
long dbsnp_tp=0;
|
||||
long dbsnp_fp=0;
|
||||
long num_snps=0;
|
||||
long num_loci=0;
|
||||
|
||||
public void calculateMetrics(List<ReferenceOrderedDatum> rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) {
|
||||
|
||||
boolean is_dbSNP_SNP = false;
|
||||
|
||||
for ( ReferenceOrderedDatum datum : rodData ) {
|
||||
if ( datum != null && datum instanceof rodDbSNP) {
|
||||
rodDbSNP dbsnp = (rodDbSNP)datum;
|
||||
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLogOddsVarRef() >= 5.0) { // we confidently called it a SNP!
|
||||
if (is_dbSNP_SNP) {
|
||||
dbsnp_tp += 1;
|
||||
}else{
|
||||
dbsnp_fp += 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (alleleFreq.getQstar() > 0.0) num_snps++;
|
||||
num_loci++;
|
||||
|
||||
}
|
||||
|
||||
public void printMetrics() {
|
||||
System.out.println("\nAllele Frequency Metrics:\n");
|
||||
System.out.printf("Precision of LOD >= 5 SNPs w.r.t dbSNP: %.2f\n", (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100);
|
||||
System.out.printf("\\--TP: %d\n", dbsnp_tp);
|
||||
System.out.printf("\\--FP: %d\n", dbsnp_fp);
|
||||
System.out.println();
|
||||
System.out.printf("SNPs (LOD > 0): %d\n", num_snps);
|
||||
System.out.printf("Total loci: %d\n", num_loci);
|
||||
System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps);
|
||||
System.out.println();
|
||||
|
||||
|
||||
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
Loading…
Reference in New Issue