From 4e0cd6ab84a2fdf1ab9023c92e1b2360d3d9dd17 Mon Sep 17 00:00:00 2001 From: jmaguire Date: Sun, 22 Mar 2009 15:45:12 +0000 Subject: [PATCH] Now works on single samples and computes metrics. Here is an example metrics output from a very tiny region: Allele Frequency Metrics (LOD >= 5) ------------------------------------------------- Total loci : 14704 Total called with confidence : 10920 (74.27%) Number of Variants : 16 (0.15%) (1/682) Fraction of variant sites in dbSNP : 100.00% Missing: Microarray(hapmap) concordance, tp/fp. Optional: Histograms of depth of coverage, LOD, observed allele frequency, etc. Still to implement: Propagate command line argument N (number of chromosomes) into walker to enable pooled calling. Take allele frequency priors as input. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@133 348d0f76-0448-11de-a6fe-93d51630548a --- .../walkers/AlleleFrequencyMetricsWalker.java | 108 +++++++++++------- .../gatk/walkers/AlleleFrequencyWalker.java | 70 ++++++------ 2 files changed, 100 insertions(+), 78 deletions(-) diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java index 5de700eab..6357640e9 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyMetricsWalker.java @@ -17,72 +17,96 @@ import java.util.List; * To change this template use File | Settings | File Templates. */ -public class AlleleFrequencyMetricsWalker extends BasicLociWalker { +public class AlleleFrequencyMetricsWalker extends BasicLociWalker +{ - long dbsnp_tp=0; - long dbsnp_fp=0; - long num_snps=0; - long num_loci=0; + long dbsnp_hits=0; + long num_variants=0; + long num_loci_total=0; + long num_loci_confident=0; double LOD_cutoff = 5; - //public void calculateMetrics(List rodData, AlleleFrequencyWalker.AlleleFrequencyEstimate alleleFreq) { - public Integer map(List rodData, char ref, LocusContext context) { - - //AlleleFrequencyWalker = AlleleFrequencyWalker(); - AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyWalker().map(rodData, ref, context); + AlleleFrequencyWalker caller; + public AlleleFrequencyEstimate map(List rodData, char ref, LocusContext context) + { + AlleleFrequencyEstimate alleleFreq = caller.map(rodData, ref, context); boolean is_dbSNP_SNP = false; - for ( ReferenceOrderedDatum datum : rodData ) { - if ( datum != null && datum instanceof rodDbSNP) { + for ( ReferenceOrderedDatum datum : rodData ) + { + if ( datum != null && datum instanceof rodDbSNP) + { rodDbSNP dbsnp = (rodDbSNP)datum; if (dbsnp.isSNP()) is_dbSNP_SNP = true; } } - if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLOD() >= LOD_cutoff) { // we confidently called it a SNP! - if (is_dbSNP_SNP) { - dbsnp_tp += 1; - }else{ - dbsnp_fp += 1; + num_loci_total += 1; + + if (Math.abs(alleleFreq.LOD) >= LOD_cutoff) { num_loci_confident += 1; } + + if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLOD() >= LOD_cutoff) + { + // Confident variant. + + num_variants += 1; + + if (is_dbSNP_SNP) + { + dbsnp_hits += 1; } } - if (alleleFreq.getQstar() > 0.0 && alleleFreq.getLOD() >= LOD_cutoff) { - //System.out.println(alleleFreq.getLogOddsVarRef()); - num_snps++; + return alleleFreq; + } + + public void printMetrics() + { + if (num_loci_total == 0) { return; } + + System.out.printf("\n"); + System.out.printf("METRICS Allele Frequency Metrics (LOD >= %.0f)\n", LOD_cutoff); + System.out.printf("METRICS -------------------------------------------------\n"); + System.out.printf("METRICS Total loci : %d\n", num_loci_total); + System.out.printf("METRICS Total called with confidence : %d (%.2f%%)\n", num_loci_confident, 100.0 * (float)num_loci_confident / (float)num_loci_total); + if (num_variants != 0) + { + System.out.printf("METRICS Number of Variants : %d (%.2f%%) (1/%d)\n", num_variants, 100.0 * (float)num_variants / (float)num_loci_confident, num_loci_confident / num_variants); + System.out.printf("METRICS Fraction of variant sites in dbSNP : %.2f%%\n", 100.0 * (float)dbsnp_hits / (float)num_variants); } - num_loci++; - - return 1; + System.out.println(); } - public void printMetrics() { - System.out.println("\nAllele Frequency Metrics:\n"); - System.out.printf("Precision of LOD >= %.0f SNPs w.r.t dbSNP: %.2f\n", LOD_cutoff, (float)dbsnp_tp / (dbsnp_fp + dbsnp_tp) * 100); - System.out.printf("\\--TP: %d\n", dbsnp_tp); - System.out.printf("\\--FP: %d\n", dbsnp_fp); - System.out.println(); - System.out.printf("SNPs (LOD > %.0f): %d\n", LOD_cutoff, num_snps); - System.out.printf("Total loci: %d\n", num_loci); - System.out.printf("SNPs / loci: 1/%.0f\n", (float)num_loci/num_snps); - System.out.println(); - - } - - - public void onTraversalDone() { + public void onTraversalDone() + { printMetrics(); } + public String reduceInit() + { + caller = new AlleleFrequencyWalker(); + return ""; + } - public Integer reduceInit() { return 0; } + public String reduce(AlleleFrequencyEstimate alleleFreq, String sum) + { + if ((alleleFreq.LOD >= 5) || (alleleFreq.LOD <= -5)) + { + System.out.print(String.format("RESULT %s %c %c %f %f %f %d\n", + alleleFreq.location, + alleleFreq.ref, + alleleFreq.alt, + alleleFreq.qhat, + alleleFreq.qstar, + alleleFreq.LOD, + alleleFreq.depth)); + } - public Integer reduce(Integer alleleFreq, Integer sum) { + if (this.num_loci_total % 10000 == 0) { printMetrics(); } - //System.out.printf("%s %.2f\n", alleleFreq.asString(), alleleFreq.logOddsVarRef); - return 0;//value + sum; + return "null"; } diff --git a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java index 0443fac87..f81fd102b 100755 --- a/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java +++ b/playground/java/src/org/broadinstitute/sting/gatk/walkers/AlleleFrequencyWalker.java @@ -48,7 +48,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker best_posterior) { best_qstar = qstar; best_qhat = q; best_posterior = posterior; + + best_pDq = pDq; + best_pqG = pqG; + best_pG = pG; } } } - double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, 0.0, 0) + P_G(N, 0); + double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0); double LOD = best_posterior - posterior_null_hyp; - AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(num2nuc[refnum], + AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location, + num2nuc[refnum], num2nuc[altnum], N, best_qhat, best_qstar, - LOD); - + LOD, + depth); return alleleFreq; } @@ -182,14 +179,10 @@ public class AlleleFrequencyWalker extends BasicLociWalker= 5) + { + System.out.print(String.format("RESULT %s %c %c %f %f %f %d\n", + alleleFreq.location, + alleleFreq.ref, + alleleFreq.alt, + alleleFreq.qhat, + alleleFreq.qstar, + alleleFreq.LOD, + alleleFreq.depth)); + } return 0; } static int nuc2num[]; static char num2nuc[]; - static double p_G_N_2[]; // pop. gen. priors for N=2 public AlleleFrequencyWalker() { nuc2num = new int[128]; nuc2num['A'] = 0; @@ -260,11 +263,6 @@ public class AlleleFrequencyWalker extends BasicLociWalker