A proper validation report, calculating TP, FP, FN, sensitivity, FDR, PPV. Treats comp as a set of sites that have been either filtered (failed in assay), validated (polymorphic among samples), or invalidated (AC=0 or all genotypes = hom-ref). Very useful.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5384 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
af71576a07
commit
2f1e249aed
|
|
@ -0,0 +1,165 @@
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.varianteval.evaluators;
|
||||||
|
|
||||||
|
import org.broad.tribble.util.variantcontext.Allele;
|
||||||
|
import org.broad.tribble.util.variantcontext.VariantContext;
|
||||||
|
import org.broad.tribble.vcf.VCFConstants;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContextUtils;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.VariantEvalWalker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.Analysis;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.varianteval.tags.DataPoint;
|
||||||
|
import org.broadinstitute.sting.utils.MathUtils;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.ReviewedStingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The Broad Institute
|
||||||
|
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||||
|
* This software and its documentation are copyright 2009 by the
|
||||||
|
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||||
|
* <p/>
|
||||||
|
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||||
|
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||||
|
*/
|
||||||
|
@Analysis(description = "Assess site accuracy and sensitivity of callset against follow-up validation assay")
|
||||||
|
public class ValidationReport extends VariantEvaluator implements StandardEval {
|
||||||
|
// todo -- note this isn't strictly allele away. It's really focused on sites. A/T call at a validated A/G site is currently counted as a TP
|
||||||
|
|
||||||
|
@DataPoint(description = "nComp") int nComp = 0;
|
||||||
|
@DataPoint(description = "TP") int TP = 0;
|
||||||
|
@DataPoint(description = "FP") int FP = 0;
|
||||||
|
@DataPoint(description = "FN") int FN = 0;
|
||||||
|
@DataPoint(description = "TN") int TN = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "Sensitivity") double sensitivity = 0;
|
||||||
|
@DataPoint(description = "PPV") double PPV = 0;
|
||||||
|
@DataPoint(description = "FDR") double FDR = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "CompMonoEvalNoCall") int CompMonoEvalNoCall = 0;
|
||||||
|
@DataPoint(description = "CompMonoEvalFiltered") int CompMonoEvalFiltered = 0;
|
||||||
|
@DataPoint(description = "CompMonoEvalMono") int CompMonoEvalMono = 0;
|
||||||
|
@DataPoint(description = "CompMonoEvalPoly") int CompMonoEvalPoly = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "CompPolyEvalNoCall") int CompPolyEvalNoCall = 0;
|
||||||
|
@DataPoint(description = "CompPolyEvalFiltered") int CompPolyEvalFiltered = 0;
|
||||||
|
@DataPoint(description = "CompPolyEvalMono") int CompPolyEvalMono = 0;
|
||||||
|
@DataPoint(description = "CompPolyEvalPoly") int CompPolyEvalPoly = 0;
|
||||||
|
|
||||||
|
@DataPoint(description = "CompFiltered") int CompFiltered = 0;
|
||||||
|
@DataPoint(description = "Eval and comp have different alleles") int nDifferentAlleleSites = 0;
|
||||||
|
|
||||||
|
private static final boolean TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED = true;
|
||||||
|
private static final boolean REQUIRE_IDENTICAL_ALLELES = false;
|
||||||
|
|
||||||
|
private enum SiteStatus { NO_CALL, FILTERED, MONO, POLY }
|
||||||
|
|
||||||
|
// Counts of ValidationSiteStatus x CallSiteStatus
|
||||||
|
final int[][] counts = new int[SiteStatus.values().length][SiteStatus.values().length];
|
||||||
|
|
||||||
|
@Override public int getComparisonOrder() { return 2; }
|
||||||
|
@Override public boolean enabled() { return true; }
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void finalizeEvaluation() {
|
||||||
|
for ( SiteStatus x : SiteStatus.values() )
|
||||||
|
CompFiltered += getCounts(SiteStatus.FILTERED, x);
|
||||||
|
|
||||||
|
CompMonoEvalNoCall = getCounts(SiteStatus.MONO, SiteStatus.NO_CALL);
|
||||||
|
CompMonoEvalFiltered = getCounts(SiteStatus.MONO, SiteStatus.FILTERED);
|
||||||
|
CompMonoEvalMono = getCounts(SiteStatus.MONO, SiteStatus.MONO);
|
||||||
|
CompMonoEvalPoly = getCounts(SiteStatus.MONO, SiteStatus.POLY);
|
||||||
|
|
||||||
|
CompPolyEvalNoCall = getCounts(SiteStatus.POLY, SiteStatus.NO_CALL);
|
||||||
|
CompPolyEvalFiltered = getCounts(SiteStatus.POLY, SiteStatus.FILTERED);
|
||||||
|
CompPolyEvalMono = getCounts(SiteStatus.POLY, SiteStatus.MONO);
|
||||||
|
CompPolyEvalPoly = getCounts(SiteStatus.POLY, SiteStatus.POLY);
|
||||||
|
|
||||||
|
TP = CompPolyEvalPoly;
|
||||||
|
FN = CompPolyEvalNoCall + CompPolyEvalFiltered + CompPolyEvalMono;
|
||||||
|
FP = CompMonoEvalPoly;
|
||||||
|
TN = CompMonoEvalNoCall + CompMonoEvalFiltered + CompMonoEvalMono;
|
||||||
|
|
||||||
|
for ( SiteStatus x : SiteStatus.values() )
|
||||||
|
for ( SiteStatus y : SiteStatus.values() )
|
||||||
|
nComp += getCounts(x, y);
|
||||||
|
|
||||||
|
if ( nComp != TP + FN + FP + TN )
|
||||||
|
throw new ReviewedStingException("BUG: nComp != TP + FN + FP + TN!");
|
||||||
|
|
||||||
|
sensitivity = (100.0 * TP) / (TP + FN);
|
||||||
|
PPV = (100.0 * TP) / (TP + FP);
|
||||||
|
FDR = (100.0 * FP) / (FP + TP);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int getCounts(SiteStatus comp, SiteStatus eval) {
|
||||||
|
return counts[comp.ordinal()][eval.ordinal()];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String update2(VariantContext eval, VariantContext comp, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||||
|
if ( comp != null ) { // we only need to consider sites in comp
|
||||||
|
if ( REQUIRE_IDENTICAL_ALLELES && (eval != null && haveDifferentAltAlleles(eval, comp)))
|
||||||
|
nDifferentAlleleSites++;
|
||||||
|
else {
|
||||||
|
SiteStatus evalStatus = calcSiteStatus(eval);
|
||||||
|
SiteStatus compStatus = calcSiteStatus(comp);
|
||||||
|
counts[compStatus.ordinal()][evalStatus.ordinal()]++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null; // we don't capture any interesting sites
|
||||||
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// helper routines
|
||||||
|
//
|
||||||
|
public SiteStatus calcSiteStatus(VariantContext vc) {
|
||||||
|
if ( vc == null ) return SiteStatus.NO_CALL;
|
||||||
|
if ( vc.isFiltered() ) return SiteStatus.FILTERED;
|
||||||
|
if ( ! vc.isVariant() ) return SiteStatus.MONO;
|
||||||
|
|
||||||
|
if ( vc.hasAttribute(VCFConstants.ALLELE_COUNT_KEY) ) {
|
||||||
|
int ac = 0;
|
||||||
|
if ( vc.getNAlleles() > 2 ) {
|
||||||
|
return SiteStatus.POLY;
|
||||||
|
//// System.out.printf("multiple alleles %s = %s%n", vc.getAlleles(), vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY));
|
||||||
|
// // todo -- omg this is painful. We need a better approach to dealing with multi-valued attributes
|
||||||
|
// for ( String v : (List<String>)vc.getAttribute(VCFConstants.ALLELE_COUNT_KEY) )
|
||||||
|
// ac += Integer.valueOf(v);
|
||||||
|
//// System.out.printf(" ac = %d%n", ac);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
ac = vc.getAttributeAsInt(VCFConstants.ALLELE_COUNT_KEY);
|
||||||
|
return ac > 0 ? SiteStatus.POLY : SiteStatus.MONO;
|
||||||
|
} else if ( vc.hasGenotypes() ) {
|
||||||
|
return vc.isPolymorphic() ? SiteStatus.POLY : SiteStatus.MONO;
|
||||||
|
} else {
|
||||||
|
return TREAT_ALL_SITES_IN_EVAL_VCF_AS_CALLED ? SiteStatus.POLY : SiteStatus.NO_CALL; // we can't figure out what to do
|
||||||
|
//return SiteStatus.NO_CALL; // we can't figure out what to do
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
public boolean haveDifferentAltAlleles(VariantContext eval, VariantContext comp) {
|
||||||
|
Set<Allele> evalAlts = eval.getAlternateAlleles();
|
||||||
|
Set<Allele> compAlts = comp.getAlternateAlleles();
|
||||||
|
if ( evalAlts.size() != compAlts.size() ) {
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
// same size => every alt from eval must be in comp
|
||||||
|
for ( Allele a : evalAlts ) {
|
||||||
|
if ( ! compAlts.contains(a) ) {
|
||||||
|
// System.out.printf("Different alleles: %s:%d eval=%s comp=%s\n\t\teval=%s\n\t\tcomp=%s%n",
|
||||||
|
// eval.getChr(), eval.getStart(), eval.getAlleles(), comp.getAlleles(), eval, comp);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue