A proper validation report, calculating TP, FP, FN, sensitivity, FDR, PPV. Treats comp as a set of sites that have been either filtered (failed in assay), validated (polymorphic among samples), or invalidated (AC=0 or all genotypes = hom-ref). Very useful.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5384 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
af71576a07
commit
2f1e249aed
|
|
@ -0,0 +1,165 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||
|
||||
import org.broad.tribble.util.variantcontext.Allele;
|
||||
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||
import org.broad.tribble.vcf.VCFConstants;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||
import org.broadinstitute.sting.utils.MathUtils;
|
||||
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
@Analysis(description = "Assess site accuracy and sensitivity of callset against follow-up validation assay")
|
||||
public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||
// todo -- note this isn't strictly allele away. It's really focused on sites. A/T call at a validated A/G site is currently counted as a TP
|
||||
|
||||
@DataPoint(description = "nComp") int nComp = 0;
|
||||
@DataPoint(description = "TP") int TP = 0;
|
||||
@DataPoint(description = "FP") int FP = 0;
|
||||
@DataPoint(description = "FN") int FN = 0;
|
||||
@DataPoint(description = "TN") int TN = 0;
|
||||
|
||||
@DataPoint(description = "Sensitivity") double sensitivity = 0;
|
||||
@DataPoint(description = "PPV") double PPV = 0;
|
||||
@DataPoint(description = "FDR") double FDR = 0;
|
||||
|
||||
@DataPoint(description = "CompMonoEvalNoCall") int CompMonoEvalNoCall = 0;
|
||||
@DataPoint(description = "CompMonoEvalFiltered") int CompMonoEvalFiltered = 0;
|
||||
@DataPoint(description = "CompMonoEvalMono") int CompMonoEvalMono = 0;
|
||||
@DataPoint(description = "CompMonoEvalPoly") int CompMonoEvalPoly = 0;
|
||||
|
||||
@DataPoint(description = "CompPolyEvalNoCall") int CompPolyEvalNoCall = 0;
|
||||
@DataPoint(description = "CompPolyEvalFiltered") int CompPolyEvalFiltered = 0;
|
||||
@DataPoint(description = "CompPolyEvalMono") int CompPolyEvalMono = 0;
|
||||
@DataPoint(description = "CompPolyEvalPoly") int CompPolyEvalPoly = 0;
|
||||
|
||||
@DataPoint(description = "CompFiltered") int CompFiltered = 0;
|
||||
@DataPoint(description = "Eval and comp have different alleles") int nDifferentAlleleSites = 0;
|
||||
|
||||
private static final boolean TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED = true;
|
||||
private static final boolean REQUIRE_IDENTICAL_ALLELES = false;
|
||||
|
||||
private enum SiteStatus { NO_CALL, FILTERED, MONO, POLY }
|
||||
|
||||
// Counts of ValidationSiteStatus x CallSiteStatus
|
||||
final int[][] counts = new int[SiteStatus.values().length][SiteStatus.values().length];
|
||||
|
||||
@Override public int getComparisonOrder() { return 2; }
|
||||
@Override public boolean enabled() { return true; }
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
for ( SiteStatus x : SiteStatus.values() )
|
||||
CompFiltered += getCounts(SiteStatus.FILTERED, x);
|
||||
|
||||
CompMonoEvalNoCall = getCounts(SiteStatus.MONO, SiteStatus.NO_CALL);
|
||||
CompMonoEvalFiltered = getCounts(SiteStatus.MONO, SiteStatus.FILTERED);
|
||||
CompMonoEvalMono = getCounts(SiteStatus.MONO, SiteStatus.MONO);
|
||||
CompMonoEvalPoly = getCounts(SiteStatus.MONO, SiteStatus.POLY);
|
||||
|
||||
CompPolyEvalNoCall = getCounts(SiteStatus.POLY, SiteStatus.NO_CALL);
|
||||
CompPolyEvalFiltered = getCounts(SiteStatus.POLY, SiteStatus.FILTERED);
|
||||
CompPolyEvalMono = getCounts(SiteStatus.POLY, SiteStatus.MONO);
|
||||
CompPolyEvalPoly = getCounts(SiteStatus.POLY, SiteStatus.POLY);
|
||||
|
||||
TP = CompPolyEvalPoly;
|
||||
FN = CompPolyEvalNoCall + CompPolyEvalFiltered + CompPolyEvalMono;
|
||||
FP = CompMonoEvalPoly;
|
||||
TN = CompMonoEvalNoCall + CompMonoEvalFiltered + CompMonoEvalMono;
|
||||
|
||||
for ( SiteStatus x : SiteStatus.values() )
|
||||
for ( SiteStatus y : SiteStatus.values() )
|
||||
nComp += getCounts(x, y);
|
||||
|
||||
if ( nComp != TP + FN + FP + TN )
|
||||
throw new ReviewedStingException("BUG: nComp != TP + FN + FP + TN!");
|
||||
|
||||
sensitivity = (100.0 * TP) / (TP + FN);
|
||||
PPV = (100.0 * TP) / (TP + FP);
|
||||
FDR = (100.0 * FP) / (FP + TP);
|
||||
}
|
||||
|
||||
private int getCounts(SiteStatus comp, SiteStatus eval) {
|
||||
return counts[comp.ordinal()][eval.ordinal()];
|
||||
}
|
||||
|
||||
@Override
|
||||
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
if ( comp != null ) { // we only need to consider sites in comp
|
||||
if ( REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp)))
|
||||
nDifferentAlleleSites++;
|
||||
else {
|
||||
SiteStatus evalStatus = calcSiteStatus(eval);
|
||||
SiteStatus compStatus = calcSiteStatus(comp);
|
||||
counts[compStatus.ordinal()][evalStatus.ordinal()]++;
|
||||
}
|
||||
}
|
||||
return null; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
//
|
||||
// helper routines
|
||||
//
|
||||
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
||||
|
||||
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||
int ac = 0;
|
||||
if ( vc.getNAlleles() > 2 ) {
|
||||
return SiteStatus.POLY;
|
||||
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||
// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes
|
||||
// for ( String v : (List<String>)vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||
// ac += Integer.valueOf(v);
|
||||
//// System.out.printf(" ac = %d%n", ac);
|
||||
}
|
||||
else
|
||||
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else if ( vc.hasGenotypes() ) {
|
||||
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
||||
} else {
|
||||
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
|
||||
Set<Allele> evalAlts = eval.getAlternateAlleles();
|
||||
Set<Allele> compAlts = comp.getAlternateAlleles();
|
||||
if ( evalAlts.size() != compAlts.size() ) {
|
||||
return true;
|
||||
} else {
|
||||
// same size => every alt from eval must be in comp
|
||||
for ( Allele a : evalAlts ) {
|
||||
if ( ! compAlts.contains(a) ) {
|
||||
// System.out.printf("Different alleles: %s:%d eval=%s comp=%s\n\t\teval=%s\n\t\tcomp=%s%n",
|
||||
// eval.getChr(), eval.getStart(), eval.getAlleles(), comp.getAlleles(), eval, comp);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue