Unified caller is go.

AlleleFrequencyWalker and related classes work equally well for 2 or 200 chromosomes. 

Single Sample Calling:

	Allele Frequency Metrics (LOD >= 5)
	-------------------------------------------------
	Total loci                            : 171575
	Total called with confidence          : 168615 (98.27%)
	Number of variants                    : 111 (0.07%) (1/1519)
	Fraction of variant sites in dbSNP    : 87.39%
	-------------------------------------------------
	
    Hapmap metrics are coming up all zero. Will fix.

Pooled Calling:

	AAF r-squared after EM is 0.99. 
    AAF r-squared after EM for alleles < 20% (in pools of ~100-200 chromosomes) is 0.95 (0.75 before EM)

    Still not using fractional genotype counts in EM. That should improve r-squared for low frequency alleles.


Chores still outstanding:
    - make a real pooled caller walker (as opposed to my experiment framework).
    - add fractional genotype counts to EM cycle.
    - add pool metrics to the metrics class? *shrug* we don't really have truth outside of a contrived experiment...



git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@380 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
jmaguire 2009-04-13 12:29:51 +00:00
parent f39092526d
commit 6e180ed44e
4 changed files with 426 additions and 78 deletions

View File

@ -31,25 +31,39 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
Random random;
PrintStream output;
private boolean initalized = false;
public void initalize()
{
if (initalized) { return; }
try
{
this.random = new java.util.Random(0);
if ( GFF_OUTPUT_FILE.equals("-") )
this.output = out;
else
this.output = new PrintStream(GFF_OUTPUT_FILE);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
initalized = true;
}
public boolean requiresReads() { return true; }
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
{
// Convert context data into bases and 4-base quals
String bases = getBases(context);
double quals[][] = getQuals(context);
// init in the map because GATK doesn't appear to be initing me today. no problemo.
this.initalize();
/*
// DEBUG: print the data for a read
{
List<SAMRecord> reads = context.getReads();
for (int i = 0; i < reads.size(); i++)
{
String cigar = reads.get(i).getCigarString();
System.out.println("DEBUG " + cigar);
}
}
*/
// Convert context data into bases and 4-base quals
String bases = getBases(context);
double quals[][] = getQuals(context);
//String[] indels = getIndels(context);
logger.debug(String.format("In alleleFrequnecy walker: N=%d, d=%d", N, DOWNSAMPLE));
@ -63,6 +77,14 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
while (downsampled_bases.length() < DOWNSAMPLE)
{
int choice;
/*
System.out.printf("DBG %b %b %b\n",
random == null,
bases == null,
picked_bases == null);
*/
for (choice = random.nextInt(bases.length()); picked_bases[choice] == 1; choice = random.nextInt(bases.length()));
picked_bases[choice] = 1;
downsampled_bases += bases.charAt(choice);
@ -137,6 +159,56 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return new String(bases);
}
/*
static public String[] getIndels(LocusContext context)
{
List<SAMRecord> reads = context.getReads();
List<Integer> offsets = context.getOffsets();
String[] indels = new String[reads.size()];
for (int i = 0; i < reads.size(); i++)
{
SAMRecord read = reads.get(i);
Cigar cigar = read.getCigar();
int k = 0;
for (int j = 0; j < cigar.numCigarElements(); j++)
{
CigarOperator operator = cigar.getCigarElement(j).getOperator();
int length = cigar.getCigarElement(j).getLength();
if (operator == CigarOperator.M)
{
k += length;
}
else if ((k != offset) && (operator == CigarOperator.I))
{
k += length;
}
else if ((k == offset) && (operator == CigarOperator.I))
{
// this insertion is associated with this offset.
break;
}
else if ((k == offset) && (operator == CigarOperator.D))
{
break;
}
else if ((k == offset) &&
((operator == CigarOperator.I) || (operator == CigarOperator.D)))
{
// no indel here.
indels[i] = "";
break;
}
}
}
return indels;
}
*/
static public double[][] getQuals (LocusContext context)
{
int numReads = context.getReads().size(); //numReads();
@ -220,51 +292,48 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
// alt = alternate hypothesis base (most frequent nonref base)
// ref = reference base
// b = number of bases at locus
double epsilon = 0; // 1e-2;
double qstar;
int qstar_N;
double qstep = 0.001;
double qend = 1.0 + qstep / 10; // makes sure we get to 1.0 even with rounding error of qsteps accumulating
double posterior_null_hyp;
// Initialize empyt MixtureLikelihood holders for each possible qstar (N+1)
MixtureLikelihood[] bestMixtures = new MixtureLikelihood[N+1];
// Fill 1..N ML positions with 0 valued MLs
for (int i=1; i<=N; i++) bestMixtures[i] = new MixtureLikelihood();
// Calculate null hypothesis for qstar = 0, qhat = 0
double posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, epsilon, 0) + P_G(N, 0);
// Put null hypothesis into ML[0] because that is our prob of that theory and we don't loop over it below
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
// hypothetic allele balance that we sample over
for (double q=0.0; q <= qend; q += qstep)
{
long q_R = Math.round(q*bases.length);
double q = ML_q(bases, quals, refnum, altnum);
double pDq = P_D_q(bases, quals, q, refnum, altnum);
long q_R = Math.round(q*bases.length);
posterior_null_hyp = P_D_q(bases, quals, 0.0, refnum, altnum) + P_q_G(bases, N, 0.0, 0, 0) + P_G(N, 0);
bestMixtures[0] = new MixtureLikelihood(posterior_null_hyp, 0.0, 0.0);
// qstar - true allele balances
for (qstar = epsilon + ((1.0 - 2*epsilon)/N), qstar_N = 1; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N, qstar_N++)
//for (qstar = epsilon + ((1.0 - 2*epsilon)/N), qstar_N = 1; qstar <= 1.0; qstar += (1.0 - 2*epsilon)/N, qstar_N++)
for (qstar_N = 1; qstar_N <= N; qstar_N += 1)
{
qstar = (double)qstar_N / (double)N;
double pqG = P_q_G(bases, N, q, qstar, q_R);
double pG = P_G(N, qstar_N);
double posterior = pDq + pqG + pG;
if (posterior > bestMixtures[qstar_N].posterior)
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
bestMixtures[qstar_N] = new MixtureLikelihood(posterior, qstar, q);
//System.out.format("%.2f %.2f %5.2f %5.2f %5.2f %5.2f\n", q, qstar, pDq, pqG, pG, posterior);
//System.out.format("DBG %s %.2f %.2f %5.2f %5.2f %5.2f %5.2f %d %d %s\n", location, q, qstar, pDq, pqG, pG, posterior, (int)(q*bases.length), (int)((1.0-q)*bases.length), new String(bases));
}
}
// First reverse sort NONREF mixtures according to highest posterior probabililty
Arrays.sort(bestMixtures, 1, N+1);
Arrays.sort(bestMixtures);
// Calculate Lod of any variant call versus the reference call
// Answers how confident are we in the best variant (nonref) mixture versus the null hypothesis
// reference mixture - qhat = qstar = 0.0
double lodVarVsRef = bestMixtures[1].posterior - posterior_null_hyp;
double lodVarVsRef = bestMixtures[0].posterior - posterior_null_hyp;
// Now reverse sort ALL mixtures according to highest posterior probability
Arrays.sort(bestMixtures);
@ -273,6 +342,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
// Answers how confident are we in the best mixture versus the nextPosition best mixture
double lodBestVsNextBest = bestMixtures[0].posterior - bestMixtures[1].posterior;
if (lodVarVsRef == 0) { lodVarVsRef = -1.0 * lodBestVsNextBest; }
AlleleFrequencyEstimate alleleFreq = new AlleleFrequencyEstimate(location,
num2nuc[refnum],
num2nuc[altnum],
@ -281,6 +352,8 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
bestMixtures[0].qstar,
lodVarVsRef,
lodBestVsNextBest,
bestMixtures[0].posterior,
posterior_null_hyp,
depth);
return alleleFreq;
}
@ -310,7 +383,19 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
}
}
static double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum)
double ML_q(byte[] bases, double[][]quals, int refnum, int altnum)
{
double ref_count = 0;
double alt_count = 0;
for (int i=0; i<bases.length; i++)
{
if (bases[i] == num2nuc[refnum]) { ref_count += 1; }
if (bases[i] == num2nuc[altnum]) { alt_count += 1; }
}
return alt_count / (alt_count + ref_count);
}
double P_D_q(byte[] bases, double[][]quals, double q, int refnum, int altnum)
{
double p = 0.0;
@ -322,15 +407,25 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return p;
}
static double P_q_G(byte [] bases, int N, double q, double qstar, long q_R)
double P_q_G(byte [] bases, int N, double q, double qstar, long q_R)
{
double epsilon = 1e-3;
if (qstar == 0) { qstar = epsilon; }
if (qstar == 1) { qstar = 1.0 - epsilon; }
if (N != 2) { return 0.0; }
else { return Math.log10(binomialProb(q_R, bases.length, qstar)); }
}
static double P_G(int N, int qstar_N)
double P_G(int N, int qstar_N)
{
// badly hard coded right now.
if ((N == 2) && (prior_alt_frequency != -1))
{
return ((double)qstar_N * prior_alt_frequency) + (((double)N-(double)qstar_N)*(1.0-prior_alt_frequency));
}
if (qstar_N == 0) { return Math.log10(0.999); }
else if (qstar_N == N) { return Math.log10(1e-5); }
else { return Math.log10(1e-3); }
@ -462,6 +557,12 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
return "";
}
private double prior_alt_frequency = -1.0;
public void setAlleleFrequencyPrior(double frequency)
{
this.prior_alt_frequency = frequency;
}
static int nuc2num[];
static char num2nuc[];
public AlleleFrequencyWalker() {
@ -481,20 +582,6 @@ public class AlleleFrequencyWalker extends LocusWalker<AlleleFrequencyEstimate,
num2nuc[2] = 'T';
num2nuc[3] = 'G';
try
{
this.random = new java.util.Random(0);
if ( GFF_OUTPUT_FILE.equals("-") )
this.output = out;
else
this.output = new PrintStream(GFF_OUTPUT_FILE);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
//if (System.getenv("N") != null) { this.N = (new Integer(System.getenv("N"))).intValue(); }
//else { this.N = 2; }
//

View File

@ -1,6 +1,9 @@
package org.broadinstitute.sting.playground.gatk.walkers;
import net.sf.samtools.*;
import org.broadinstitute.sting.gatk.*;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedData;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
@ -9,27 +12,258 @@ import org.broadinstitute.sting.gatk.walkers.LocusWalker;
import org.broadinstitute.sting.gatk.LocusContext;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import org.broadinstitute.sting.playground.utils.AlleleFrequencyEstimate;
import org.broadinstitute.sting.utils.*;
import org.broadinstitute.sting.utils.cmdLine.Argument;
import java.util.List;
import java.util.*;
public class PoolCallingExperiment extends LocusWalker<AlleleFrequencyEstimate, String>
{
List<AlleleFrequencyWalker> deep_callers;
List<AlleleFrequencyWalker> shallow_callers;
AlleleFrequencyWalker pooled_caller;
List<AlleleFrequencyWalker> deep_callers = null;
List<AlleleFrequencyWalker> shallow_callers = null;
AlleleFrequencyWalker pooled_caller = null;
List<String> sample_names = null;
@Argument public int DOWNSAMPLE;
//@Argument public int DOWNSAMPLE;
public int DOWNSAMPLE = 4;
public int DOWNSAMPLE_NOISE = 3;
public boolean LOG_METRICS = true;
private Random random;
public void initialize()
{
GenomeAnalysisTK toolkit = this.getToolkit();
SAMFileHeader header = toolkit.getSamReader().getFileHeader();
List<SAMReadGroupRecord> read_groups = header.getReadGroups();
sample_names = new ArrayList<String>();
deep_callers = new ArrayList<AlleleFrequencyWalker>();
shallow_callers = new ArrayList<AlleleFrequencyWalker>();
random = new Random(666);
for (int i = 0; i < read_groups.size(); i++)
{
String sample_name = read_groups.get(i).getSample();
//System.out.println("SAMPLE: " + sample_name);
AlleleFrequencyWalker deep = new AlleleFrequencyWalker();
AlleleFrequencyWalker shallow = new AlleleFrequencyWalker();
deep.N = 2;
deep.DOWNSAMPLE = 0;
deep.GFF_OUTPUT_FILE = "/dev/null";
deep.initalize();
shallow.N = 2;
shallow.DOWNSAMPLE = 0;
shallow.GFF_OUTPUT_FILE = "/dev/null";
shallow.initalize();
sample_names.add(sample_name);
deep_callers.add(deep);
shallow_callers.add(shallow);
}
pooled_caller = new AlleleFrequencyWalker();
pooled_caller.N = sample_names.size() * 2;
pooled_caller.DOWNSAMPLE = 0;
pooled_caller.GFF_OUTPUT_FILE = "/dev/null";
//pooled_caller.LOG_METRICS = LOG_METRICS;
//pooled_caller.METRICS_OUTPUT_FILE = "metrics.out";
//pooled_caller.METRICS_INTERVAL = 1;
pooled_caller.initalize();
pooled_caller.reduceInit();
}
public AlleleFrequencyEstimate map(RefMetaDataTracker tracker, char ref, LocusContext context)
{
// 1. seperate each context.
LocusContext[] deep_contexts = new LocusContext[sample_names.size()];
for (int i = 0; i < sample_names.size(); i++)
{
deep_contexts[i] = filterLocusContext(context, sample_names.get(i), 0);
}
// 2. Pool the deep contexts. (for the hell of it)
LocusContext deep_pooled_context = poolLocusContext(deep_contexts);
// 3. Call individuals from deep coverage.
AlleleFrequencyEstimate[] deep_calls = new AlleleFrequencyEstimate[sample_names.size()];
for (int i = 0; i < sample_names.size(); i++)
{
deep_calls[i] = deep_callers.get(i).map(tracker, ref, deep_contexts[i]);
}
// 4. Count "true" allele frequency from deep calls, and compare to the estimated frequency from the pool.
// 4.1. Count "true" allele frequency from deep calls, and downsample the individuals with solid truth.
double true_alt_freq = 0.0;
double true_N = 0.0;
LocusContext[] downsampled_contexts = new LocusContext[sample_names.size()];
for (int i = 0; i < deep_calls.length; i++)
{
downsampled_contexts[i] = null;
if ((deep_calls[i].lodVsNextBest >= 5.0) || (deep_calls[i].lodVsRef <= -5.0))
{
true_alt_freq += deep_calls[i].emperical_allele_frequency() * deep_calls[i].N;
true_N += 2;
downsampled_contexts[i] = filterLocusContext(context, sample_names.get(i), DOWNSAMPLE + (int)(random.nextGaussian()*DOWNSAMPLE_NOISE));
}
}
true_alt_freq /= true_N;
if (true_N == 0.0) { return null; } // just bail if true_N == 0.
// 4.2. Pool just the contexts that have truth data.
LocusContext pooled_context = poolLocusContext(downsampled_contexts);
// EM Loop:
AlleleFrequencyEstimate pooled_call = null;
double correct_shallow_calls = 0;
double total_shallow_calls = 0;
AlleleFrequencyEstimate[] shallow_calls = null;
double EM_alt_freq = 0;
double EM_N = 0;
double shallow_calls_fraction_correct = 0;
// 5. Call the pool. (this step is the EM init)
pooled_caller.N = (int)true_N;
pooled_call = pooled_caller.map(tracker, ref, pooled_context);
System.out.print("POOLED_CALL " + pooled_call.asTabularString());
// (this loop is the EM cycle)
EM_alt_freq = pooled_call.qhat;
int num_iterations = 10;
double[] trajectory = new double[num_iterations + 1]; trajectory[0] = EM_alt_freq;
double[] likelihood_trajectory = new double[num_iterations + 1];
for (int iterations = 0; iterations < num_iterations; iterations++)
{
// 6. Re-call from shallow coverage using the estimated frequency as a prior,
// and compare to true deep calls,
// and compute new MAF estimate.
correct_shallow_calls = 0;
total_shallow_calls = 0;
shallow_calls = new AlleleFrequencyEstimate[sample_names.size()];
EM_N = 0.0;
double EM_sum = 0.0;
double likelihood = 0.0;
for (int i = 0; i < deep_calls.length; i++)
{
// Only shallow-call things where we know the truth!
if ((deep_calls[i].lodVsNextBest >= 5.0) || (deep_calls[i].lodVsRef <= -5.0))
{
shallow_callers.get(i).setAlleleFrequencyPrior(EM_alt_freq);
shallow_calls[i] = shallow_callers.get(i).map(tracker, ref, filterLocusContext(context, sample_names.get(i), DOWNSAMPLE + (int)(random.nextGaussian() * DOWNSAMPLE_NOISE)));
String deep_genotype = deep_calls[i].genotype();
String shallow_genotype = shallow_calls[i].genotype();
/*
System.out.printf("DBG: %f %f %f %f\n",
deep_calls[i].lodVsNextBest,
deep_calls[i].lodVsRef,
shallow_calls[i].lodVsNextBest,
shallow_calls[i].lodVsRef);
*/
if (deep_genotype.equals(shallow_genotype)) { correct_shallow_calls += 1; }
total_shallow_calls += 1;
EM_sum += shallow_calls[i].emperical_allele_frequency() * shallow_calls[i].N;
EM_N += 2;
}
}
EM_alt_freq = EM_sum / EM_N;
shallow_calls_fraction_correct = correct_shallow_calls / total_shallow_calls;
trajectory[iterations+1] = EM_alt_freq;
}
// 7. Compare to estimation from the pool.
System.out.printf("EVAL %s %f %f %f %f %f %f %d %d %f %f %f %f\n",
pooled_call.location,
pooled_call.lodVsRef,
true_alt_freq,
pooled_call.qhat,
pooled_call.qstar,
true_alt_freq * true_N,
pooled_call.emperical_allele_frequency() * true_N,
pooled_call.N,
pooled_call.depth,
total_shallow_calls,
correct_shallow_calls,
shallow_calls_fraction_correct,
EM_alt_freq);
System.out.print("TRAJECTORY ");
for (int i = 0; i < trajectory.length; i++)
{
System.out.printf("%f ", trajectory[i]);
}
System.out.print("\n");
return null;
}
private LocusContext poolLocusContext(LocusContext[] contexts)
{
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
ArrayList<Integer> offsets = new ArrayList<Integer>();
GenomeLoc location = null;
for (int i = 0; i < contexts.length; i++)
{
if (contexts[i] != null)
{
location = contexts[i].getLocation();
reads.addAll(contexts[i].getReads());
offsets.addAll(contexts[i].getOffsets());
}
}
return new LocusContext(location, reads, offsets);
}
private LocusContext filterLocusContext(LocusContext context, String sample_name, int downsample)
{
ArrayList<SAMRecord> reads = new ArrayList<SAMRecord>();
ArrayList<Integer> offsets = new ArrayList<Integer>();
for (int i = 0; i < context.getReads().size(); i++)
{
String read_group = (String)(context.getReads().get(i).getAttribute("RG"));
String sample = context.getReads().get(i).getHeader().getReadGroup(read_group).READ_GROUP_SAMPLE_TAG;
System.out.println("RG: " + read_group + " SAMPLE: " + sample);
}
return null;
SAMRecord read = context.getReads().get(i);
Integer offset = context.getOffsets().get(i);
String RG = (String)(read.getAttribute("RG"));
String sample = read.getHeader().getReadGroup(RG).getSample();
if (sample == sample_name)
{
reads.add(read);
offsets.add(offset);
}
}
if (downsample != 0)
{
List<Integer> perm = new ArrayList<Integer>();
for (int i = 0; i < reads.size(); i++) { perm.add(i); }
perm = Utils.RandomSubset(perm, downsample);
ArrayList<SAMRecord> downsampled_reads = new ArrayList<SAMRecord>();
ArrayList<Integer> downsampled_offsets = new ArrayList<Integer>();
for (int i = 0; i < perm.size(); i++)
{
downsampled_reads.add(reads.get(perm.get(i)));
downsampled_offsets.add(offsets.get(perm.get(i)));
}
reads = downsampled_reads;
offsets = downsampled_offsets;
}
return new LocusContext(context.getLocation(), reads, offsets);
}
public void onTraversalDone()

View File

@ -1,6 +1,8 @@
package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import java.util.Arrays;
import java.lang.Math;
import org.broadinstitute.sting.utils.GenomeLoc;
public class AlleleFrequencyEstimate {
@ -13,12 +15,14 @@ public class AlleleFrequencyEstimate {
public double qstar;
public double lodVsRef;
public double lodVsNextBest;
public double pBest;
public double pRef;
public int depth;
public String notes;
GenomeLoc l;
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, int depth)
public AlleleFrequencyEstimate(GenomeLoc location, char ref, char alt, int N, double qhat, double qstar, double lodVsRef, double lodVsNextBest, double pBest, double pRef, int depth)
{
this.location = location;
this.ref = ref;
@ -32,6 +36,29 @@ public class AlleleFrequencyEstimate {
this.notes = "";
}
/** Return the most likely genotype. */
public String genotype()
{
int alt_count = (int)(qstar * N);
int ref_count = N-alt_count;
char[] alleles = new char[N];
int i;
for (i = 0; i < ref_count; i++) { alleles[i] = ref; }
for (; i < N; i++) { alleles[i] = alt; }
Arrays.sort(alleles);
return new String(alleles);
}
public double emperical_allele_frequency()
{
return (double)Math.round((double)qhat * (double)N) / (double)N;
}
public double emperical_allele_frequency(int N)
{
return (double)Math.round((double)qhat * (double)N) / (double)N;
}
public String asGFFString()
{
return String.format("%s\tCALLER\tVARIANT\t%s\t%s\t%f\t.\t.\tREF %c\t;\tALT %c\t;\tFREQ %f\n",

View File

@ -143,26 +143,26 @@ public class AlleleMetrics {
if (num_loci_total == 0) { return; }
out.printf("\n");
out.printf("METRICS Allele Frequency Metrics (LOD >= %.0f)\n", LOD_cutoff);
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS Total loci : %d\n", num_loci_total);
out.printf("METRICS Total called with confidence : %d (%.2f%%)\n", num_loci_confident, 100.0 * (float)num_loci_confident / (float)num_loci_total);
out.printf("Allele Frequency Metrics (LOD >= %.0f)\n", LOD_cutoff);
out.printf("-------------------------------------------------\n");
out.printf("Total loci : %d\n", num_loci_total);
out.printf("Total called with confidence : %d (%.2f%%)\n", num_loci_confident, 100.0 * (float)num_loci_confident / (float)num_loci_total);
if (num_variants != 0)
{
out.printf("METRICS Number of variants : %d (%.2f%%) (1/%d)\n", num_variants, 100.0 * (float)num_variants / (float)num_loci_confident, num_loci_confident / num_variants);
out.printf("METRICS Fraction of variant sites in dbSNP : %.2f%%\n", 100.0 * (float)dbsnp_hits / (float)num_variants);
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS -- Hapmap Genotyping performance --\n");
out.printf("METRICS Num. conf. calls at Hapmap chip sites : %d\n", hapmap_genotype_correct + hapmap_genotype_incorrect);
out.printf("METRICS Conf. calls at chip sites correct : %d\n", hapmap_genotype_correct);
out.printf("METRICS Conf. calls at chip sites incorrect : %d\n", hapmap_genotype_incorrect);
out.printf("METRICS %% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_genotype_correct / (float)(hapmap_genotype_correct + hapmap_genotype_incorrect));
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS -- Hapmap Reference/Variant performance --\n");
out.printf("METRICS Num. conf. calls at Hapmap chip sites : %d\n", hapmap_refvar_correct + hapmap_refvar_incorrect);
out.printf("METRICS Conf. calls at chip sites correct : %d\n", hapmap_refvar_correct);
out.printf("METRICS Conf. calls at chip sites incorrect : %d\n", hapmap_refvar_incorrect);
out.printf("METRICS %% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_refvar_correct / (float)(hapmap_refvar_correct + hapmap_refvar_incorrect));
out.printf("Number of variants : %d (%.2f%%) (1/%d)\n", num_variants, 100.0 * (float)num_variants / (float)num_loci_confident, num_loci_confident / num_variants);
out.printf("Fraction of variant sites in dbSNP : %.2f%%\n", 100.0 * (float)dbsnp_hits / (float)num_variants);
out.printf("-------------------------------------------------\n");
out.printf("-- Hapmap Genotyping performance --\n");
out.printf("Num. conf. calls at Hapmap chip sites : %d\n", hapmap_genotype_correct + hapmap_genotype_incorrect);
out.printf("Conf. calls at chip sites correct : %d\n", hapmap_genotype_correct);
out.printf("Conf. calls at chip sites incorrect : %d\n", hapmap_genotype_incorrect);
out.printf("%% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_genotype_correct / (float)(hapmap_genotype_correct + hapmap_genotype_incorrect));
out.printf("-------------------------------------------------\n");
out.printf("-- Hapmap Reference/Variant performance --\n");
out.printf("Num. conf. calls at Hapmap chip sites : %d\n", hapmap_refvar_correct + hapmap_refvar_incorrect);
out.printf("Conf. calls at chip sites correct : %d\n", hapmap_refvar_correct);
out.printf("Conf. calls at chip sites incorrect : %d\n", hapmap_refvar_incorrect);
out.printf("%% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_refvar_correct / (float)(hapmap_refvar_correct + hapmap_refvar_incorrect));
}
out.println();
}