gatk-3.8/java/src/org/broadinstitute/sting/playground/utils/AlleleMetrics.java

176 lines
7.0 KiB
Java
Executable File

package org.broadinstitute.sting.playground.utils;
import org.broadinstitute.sting.gatk.refdata.rodGFF;
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker;
import java.util.List;
import java.io.PrintStream;
/**
* Created by IntelliJ IDEA.
* User: andrewk
* Date: Apr 1, 2009
* Time: 5:53:21 PM
* To change this template use File | Settings | File Templates.
*/
public class AlleleMetrics {
long dbsnp_hits=0;
long num_variants=0;
long num_loci_total=0;
long num_loci_confident=0;
double LOD_cutoff = 5;
long hapmap_genotype_correct = 0;
long hapmap_genotype_incorrect = 0;
long hapmap_refvar_correct = 0;
long hapmap_refvar_incorrect = 0;
protected PrintStream out;
public AlleleMetrics(String MetricsOutputFile) {
try
{
/*if ( MetricsOutputFile.equals("-") )
this.out = out;
else*/
this.out = new PrintStream(MetricsOutputFile);
}
catch (Exception e)
{
e.printStackTrace();
System.exit(-1);
}
}
public void nextPosition(AlleleFrequencyEstimate alleleFreq, RefMetaDataTracker tracker) {
num_loci_total += 1;
boolean is_dbSNP_SNP = false;
boolean has_hapmap_chip_genotype = false;
rodGFF hapmap_chip_genotype = null;
for ( ReferenceOrderedDatum datum : tracker.getAllRods() )
{
if ( datum != null )
{
if ( datum instanceof rodDbSNP)
{
rodDbSNP dbsnp = (rodDbSNP)datum;
if (dbsnp.isSNP()) is_dbSNP_SNP = true;
}
if ( datum instanceof rodGFF )
{
has_hapmap_chip_genotype = true;
hapmap_chip_genotype = (rodGFF)datum;
}
}
}
if (Math.abs(alleleFreq.lodVsRef) >= LOD_cutoff) { num_loci_confident += 1; }
if (alleleFreq.qstar > 0.0 && alleleFreq.lodVsRef >= LOD_cutoff)
{
// Confident variant.
num_variants += 1;
if (is_dbSNP_SNP)
{
dbsnp_hits += 1;
}
}
if (has_hapmap_chip_genotype) {
// convert hapmap call to mixture of ref/nonref
String hapmap_genotype = hapmap_chip_genotype.getFeature();
long refs=0, alts=0;
double hapmap_q;
for (char c : hapmap_genotype.toCharArray()) {
if (c == alleleFreq.ref) { refs++; }
if (c == alleleFreq.alt) { alts++; }
}
if (refs+alts > 0) {
hapmap_q = (float)alts / (refs+alts);
}else{
hapmap_q = -1;
}
// Hapmap debug info
//out.format("HAPMAP DEBUG %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
//String called_genotype = alleleFreq.asString();
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
if (alleleFreq.lodVsNextBest >= LOD_cutoff) {
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
if (hapmap_q != -1 && hapmap_q == alleleFreq.qstar) {
hapmap_genotype_correct++;
}else{
hapmap_genotype_incorrect++;
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
//out.printf(" INCORRECT GENOTYPE");
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
}
if (alleleFreq.lodVsRef >= LOD_cutoff || -1 * alleleFreq.lodVsRef >= LOD_cutoff) {
// Now calculate ref / var performance - did we correctly classify the site as
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q != 0.0;
boolean called_var = alleleFreq.qstar != 0.0;
if (hapmap_q != -1 && hapmap_var != called_var) {
hapmap_refvar_incorrect++;
//out.printf(" INCORRECT REFVAR CALL");
}else{
hapmap_refvar_correct++;
}
}
//out.print("\n");
}
}
public void printMetrics()
{
if (num_loci_total == 0) { return; }
out.printf("\n");
out.printf("METRICS Allele Frequency Metrics (LOD >= %.0f)\n", LOD_cutoff);
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS Total loci : %d\n", num_loci_total);
out.printf("METRICS Total called with confidence : %d (%.2f%%)\n", num_loci_confident, 100.0 * (float)num_loci_confident / (float)num_loci_total);
if (num_variants != 0)
{
out.printf("METRICS Number of variants : %d (%.2f%%) (1/%d)\n", num_variants, 100.0 * (float)num_variants / (float)num_loci_confident, num_loci_confident / num_variants);
out.printf("METRICS Fraction of variant sites in dbSNP : %.2f%%\n", 100.0 * (float)dbsnp_hits / (float)num_variants);
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS -- Hapmap Genotyping performance --\n");
out.printf("METRICS Num. conf. calls at Hapmap chip sites : %d\n", hapmap_genotype_correct + hapmap_genotype_incorrect);
out.printf("METRICS Conf. calls at chip sites correct : %d\n", hapmap_genotype_correct);
out.printf("METRICS Conf. calls at chip sites incorrect : %d\n", hapmap_genotype_incorrect);
out.printf("METRICS %% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_genotype_correct / (float)(hapmap_genotype_correct + hapmap_genotype_incorrect));
out.printf("METRICS -------------------------------------------------\n");
out.printf("METRICS -- Hapmap Reference/Variant performance --\n");
out.printf("METRICS Num. conf. calls at Hapmap chip sites : %d\n", hapmap_refvar_correct + hapmap_refvar_incorrect);
out.printf("METRICS Conf. calls at chip sites correct : %d\n", hapmap_refvar_correct);
out.printf("METRICS Conf. calls at chip sites incorrect : %d\n", hapmap_refvar_incorrect);
out.printf("METRICS %% of confident calls that are correct : %.2f%%\n", 100.0 * (float) hapmap_refvar_correct / (float)(hapmap_refvar_correct + hapmap_refvar_incorrect));
}
out.println();
}
public void printMetricsAtLocusIntervals(int loci_interval) {
if (num_loci_total % loci_interval == 0) printMetrics();
}
}