2009-04-03 10:09:10 +08:00
package org.broadinstitute.sting.playground.utils ;
2009-03-20 06:06:01 +08:00
2009-04-03 10:09:10 +08:00
import org.broadinstitute.sting.gatk.refdata.rodGFF ;
2009-03-20 06:06:01 +08:00
import org.broadinstitute.sting.gatk.refdata.ReferenceOrderedDatum ;
import org.broadinstitute.sting.gatk.refdata.rodDbSNP ;
2009-04-04 03:54:54 +08:00
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker ;
2009-03-24 07:19:54 +08:00
import org.broadinstitute.sting.playground.gatk.walkers.AlleleFrequencyWalker ;
2009-03-20 06:06:01 +08:00
import java.util.List ;
2009-04-03 10:09:10 +08:00
import java.io.PrintStream ;
2009-03-20 06:06:01 +08:00
/ * *
* Created by IntelliJ IDEA .
* User : andrewk
2009-04-03 10:09:10 +08:00
* Date : Apr 1 , 2009
* Time : 5 : 53 : 21 PM
2009-03-20 06:06:01 +08:00
* To change this template use File | Settings | File Templates .
* /
2009-04-03 10:09:10 +08:00
public class AlleleMetrics {
2009-03-20 06:06:01 +08:00
2009-03-22 23:45:12 +08:00
long dbsnp_hits = 0 ;
long num_variants = 0 ;
long num_loci_total = 0 ;
long num_loci_confident = 0 ;
2009-03-20 06:06:01 +08:00
double LOD_cutoff = 5 ;
2009-03-25 09:12:05 +08:00
long hapmap_genotype_correct = 0 ;
long hapmap_genotype_incorrect = 0 ;
long hapmap_refvar_correct = 0 ;
long hapmap_refvar_incorrect = 0 ;
2009-03-20 06:06:01 +08:00
2009-04-03 10:09:10 +08:00
protected PrintStream out ;
2009-03-20 06:06:01 +08:00
2009-04-03 10:09:10 +08:00
public AlleleMetrics ( String MetricsOutputFile ) {
try
{
/ * if ( MetricsOutputFile . equals ( "-" ) )
this . out = out ;
else * /
this . out = new PrintStream ( MetricsOutputFile ) ;
}
catch ( Exception e )
{
e . printStackTrace ( ) ;
System . exit ( - 1 ) ;
}
}
2009-03-20 06:06:01 +08:00
2009-04-04 03:54:54 +08:00
public void nextPosition ( AlleleFrequencyEstimate alleleFreq , RefMetaDataTracker tracker ) {
2009-03-22 23:45:12 +08:00
num_loci_total + = 1 ;
2009-03-20 06:06:01 +08:00
2009-03-25 09:12:05 +08:00
boolean is_dbSNP_SNP = false ;
boolean has_hapmap_chip_genotype = false ;
rodGFF hapmap_chip_genotype = null ;
2009-04-04 03:54:54 +08:00
for ( ReferenceOrderedDatum datum : tracker . getAllRods ( ) )
2009-03-25 09:12:05 +08:00
{
if ( datum ! = null )
{
2009-04-03 10:09:10 +08:00
if ( datum instanceof rodDbSNP )
2009-03-25 09:12:05 +08:00
{
rodDbSNP dbsnp = ( rodDbSNP ) datum ;
if ( dbsnp . isSNP ( ) ) is_dbSNP_SNP = true ;
}
if ( datum instanceof rodGFF )
{
has_hapmap_chip_genotype = true ;
hapmap_chip_genotype = ( rodGFF ) datum ;
}
}
}
2009-03-26 10:10:18 +08:00
if ( Math . abs ( alleleFreq . lodVsRef ) > = LOD_cutoff ) { num_loci_confident + = 1 ; }
2009-03-22 23:45:12 +08:00
2009-03-26 10:10:18 +08:00
if ( alleleFreq . qstar > 0.0 & & alleleFreq . lodVsRef > = LOD_cutoff )
2009-04-03 10:09:10 +08:00
{
2009-03-22 23:45:12 +08:00
// Confident variant.
2009-04-03 10:09:10 +08:00
2009-03-22 23:45:12 +08:00
num_variants + = 1 ;
2009-03-24 11:58:03 +08:00
if ( is_dbSNP_SNP )
2009-03-22 23:45:12 +08:00
{
dbsnp_hits + = 1 ;
}
2009-03-20 06:06:01 +08:00
}
2009-03-26 10:10:18 +08:00
if ( has_hapmap_chip_genotype ) {
2009-03-25 09:12:05 +08:00
// convert hapmap call to mixture of ref/nonref
String hapmap_genotype = hapmap_chip_genotype . getFeature ( ) ;
long refs = 0 , alts = 0 ;
double hapmap_q ;
for ( char c : hapmap_genotype . toCharArray ( ) ) {
if ( c = = alleleFreq . ref ) { refs + + ; }
if ( c = = alleleFreq . alt ) { alts + + ; }
}
if ( refs + alts > 0 ) {
hapmap_q = ( float ) alts / ( refs + alts ) ;
} else {
hapmap_q = - 1 ;
}
// Hapmap debug info
2009-04-04 01:32:31 +08:00
//out.format("HAPMAP DEBUG %.2f %.2f %.2f ", hapmap_q, alleleFreq.qstar, alleleFreq.lodVsRef);
//String called_genotype = alleleFreq.asString();
//out.format("%s %s %c %c", hapmap_genotype, called_genotype, alleleFreq.ref, alleleFreq.alt);
2009-03-25 09:12:05 +08:00
2009-03-26 10:10:18 +08:00
if ( alleleFreq . lodVsNextBest > = LOD_cutoff ) {
// Calculate genotyping performance - did we get the correct genotype of the N+1 choices?
if ( hapmap_q ! = - 1 & & hapmap_q = = alleleFreq . qstar ) {
hapmap_genotype_correct + + ;
} else {
hapmap_genotype_incorrect + + ;
2009-04-03 10:09:10 +08:00
//System.out.printf(" INCORRECT GENOTYPE Bases: %s", AlleleFrequencyWalker.getBases(context));
2009-04-04 01:32:31 +08:00
//out.printf(" INCORRECT GENOTYPE");
2009-03-26 10:10:18 +08:00
//AlleleFrequencyWalker.print_base_qual_matrix(AlleleFrequencyWalker.getOneBaseQuals(context));
}
2009-03-25 09:12:05 +08:00
}
2009-03-26 10:10:18 +08:00
if ( alleleFreq . lodVsRef > = LOD_cutoff | | - 1 * alleleFreq . lodVsRef > = LOD_cutoff ) {
// Now calculate ref / var performance - did we correctly classify the site as
// reference or variant without regard to genotype; i.e. het/hom "miscalls" don't matter here
boolean hapmap_var = hapmap_q ! = 0.0 ;
boolean called_var = alleleFreq . qstar ! = 0.0 ;
if ( hapmap_q ! = - 1 & & hapmap_var ! = called_var ) {
hapmap_refvar_incorrect + + ;
2009-04-04 01:32:31 +08:00
//out.printf(" INCORRECT REFVAR CALL");
2009-03-26 10:10:18 +08:00
} else {
hapmap_refvar_correct + + ;
}
2009-03-25 09:12:05 +08:00
}
2009-04-04 01:32:31 +08:00
//out.print("\n");
2009-03-25 09:12:05 +08:00
}
2009-03-20 06:06:01 +08:00
}
2009-03-25 09:12:05 +08:00
public void printMetrics ( )
2009-03-22 23:45:12 +08:00
{
if ( num_loci_total = = 0 ) { return ; }
2009-03-27 23:03:32 +08:00
out . printf ( "\n" ) ;
out . printf ( "METRICS Allele Frequency Metrics (LOD >= %.0f)\n" , LOD_cutoff ) ;
out . printf ( "METRICS -------------------------------------------------\n" ) ;
out . printf ( "METRICS Total loci : %d\n" , num_loci_total ) ;
out . printf ( "METRICS Total called with confidence : %d (%.2f%%)\n" , num_loci_confident , 100.0 * ( float ) num_loci_confident / ( float ) num_loci_total ) ;
2009-03-22 23:45:12 +08:00
if ( num_variants ! = 0 )
{
2009-03-27 23:03:32 +08:00
out . printf ( "METRICS Number of variants : %d (%.2f%%) (1/%d)\n" , num_variants , 100.0 * ( float ) num_variants / ( float ) num_loci_confident , num_loci_confident / num_variants ) ;
out . printf ( "METRICS Fraction of variant sites in dbSNP : %.2f%%\n" , 100.0 * ( float ) dbsnp_hits / ( float ) num_variants ) ;
out . printf ( "METRICS -------------------------------------------------\n" ) ;
out . printf ( "METRICS -- Hapmap Genotyping performance --\n" ) ;
out . printf ( "METRICS Num. conf. calls at Hapmap chip sites : %d\n" , hapmap_genotype_correct + hapmap_genotype_incorrect ) ;
out . printf ( "METRICS Conf. calls at chip sites correct : %d\n" , hapmap_genotype_correct ) ;
out . printf ( "METRICS Conf. calls at chip sites incorrect : %d\n" , hapmap_genotype_incorrect ) ;
out . printf ( "METRICS %% of confident calls that are correct : %.2f%%\n" , 100.0 * ( float ) hapmap_genotype_correct / ( float ) ( hapmap_genotype_correct + hapmap_genotype_incorrect ) ) ;
out . printf ( "METRICS -------------------------------------------------\n" ) ;
out . printf ( "METRICS -- Hapmap Reference/Variant performance --\n" ) ;
out . printf ( "METRICS Num. conf. calls at Hapmap chip sites : %d\n" , hapmap_refvar_correct + hapmap_refvar_incorrect ) ;
out . printf ( "METRICS Conf. calls at chip sites correct : %d\n" , hapmap_refvar_correct ) ;
out . printf ( "METRICS Conf. calls at chip sites incorrect : %d\n" , hapmap_refvar_incorrect ) ;
out . printf ( "METRICS %% of confident calls that are correct : %.2f%%\n" , 100.0 * ( float ) hapmap_refvar_correct / ( float ) ( hapmap_refvar_correct + hapmap_refvar_incorrect ) ) ;
2009-03-22 23:45:12 +08:00
}
2009-03-27 23:03:32 +08:00
out . println ( ) ;
2009-03-20 06:06:01 +08:00
}
2009-04-03 10:09:10 +08:00
public void printMetricsAtLocusIntervals ( int loci_interval ) {
if ( num_loci_total % loci_interval = = 0 ) printMetrics ( ) ;
2009-03-22 23:45:12 +08:00
}
2009-03-20 06:06:01 +08:00
}