Renamed some of the column names in Ti/Tv and Concordance modules so that they are clearer. Removed ValidationRate module (it was busted).

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3564 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
ebanks 2010-06-16 15:53:06 +00:00
parent 8cb16a1d45
commit 7a91dbd490
4 changed files with 31 additions and 163 deletions

View File

@ -230,7 +230,7 @@ public class GenotypeConcordance extends VariantEvaluator {
private boolean warnedAboutValidationData = false;
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
final String interesting = null;
String interesting = null;
// sanity check that we at least have either eval or validation data
if (eval == null && !isValidVC(validation)) {
@ -266,12 +266,14 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
determineStats(eval, validation);
interesting = determineStats(eval, validation);
return interesting; // we don't capture any interesting sites
}
private void determineStats(final VariantContext eval, final VariantContext validation) {
private String determineStats(final VariantContext eval, final VariantContext validation) {
String interesting = null;
final boolean validationIsValidVC = isValidVC(validation);
@ -285,6 +287,8 @@ public class GenotypeConcordance extends VariantEvaluator {
truth = Genotype.Type.NO_CALL;
} else {
truth = validation.getGenotype(sample).getType();
// TODO -- capture "interesting" sites here, for example:
// interesting = "ConcordanceStatus=FP";
}
sampleStats.incrValue(sample, truth, called);
@ -327,6 +331,8 @@ public class GenotypeConcordance extends VariantEvaluator {
}
}
return interesting;
}
private static boolean isValidVC(final VariantContext vc) {
@ -429,9 +435,9 @@ class SampleSummaryStats implements TableType {
"percent_comp_het_called_var",
"percent_comp_hom_called_hom",
"percent_comp_hom_called_var",
"percent_variant_sensitivity",
"percent_genotype_concordance",
"percent_genotype_error_rate"};
"percent_non-reference_sensitivity",
"percent_overall_genotype_concordance",
"percent_non-reference_discrepancy_rate"};
// sample to concordance stats object
private final HashMap<String, double[]> concordanceSummary = new HashMap<String, double[]>();
@ -547,18 +553,18 @@ class SampleSummaryStats implements TableType {
updateSummaries(3, summary, numer, denom);
// Summary 4: % non-ref called as non-ref
// MAD: this is known as the variant sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
// MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes);
denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes);
updateSummaries(4, summary, numer, denom);
// Summary 5: genotype concordance of sites called in eval track
// Summary 5: overall genotype concordance of sites called in eval track
// MAD: this is the tradition genotype concordance
numer = sumStatsDiag(stats, allCalledGenotypes);
denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes);
updateSummaries(5, summary, numer, denom);
// Summary 6: genotype concordance of sites called non-ref in eval track
// Summary 6: overall genotype concordance of sites called non-ref in eval track
long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
long diag = sumStatsDiag(stats, allVariantGenotypes);
long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords;

View File

@ -16,11 +16,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
long nTv = 0;
@DataPoint(name = "ti/tv ratio", description = "the transition to transversion ratio")
double tiTvRatio = 0.0;
@DataPoint(name = "ti_count_std", description = "number of standard transition sites")
long nTiInStd = 0;
@DataPoint(name = "tv_count_std", description = "number of standard transversion sites")
long nTvInStd = 0;
@DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio")
@DataPoint(name = "ti_count_comp", description = "number of comp transition sites")
long nTiInComp = 0;
@DataPoint(name = "tv_count_comp", description = "number of comp transversion sites")
long nTvInComp = 0;
@DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio for comp sites")
double TiTvRatioStandard = 0.0;
public TiTvVariantEvaluator(VariantEvalWalker parent) {
@ -42,10 +42,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
public void updateTiTv(VariantContext vc, boolean updateStandard) {
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
if (vc.isTransition()) {
if (updateStandard) nTiInStd++;
if (updateStandard) nTiInComp++;
else nTi++;
} else {
if (updateStandard) nTvInStd++;
if (updateStandard) nTvInComp++;
else nTv++;
}
}
@ -62,6 +62,6 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
public void finalizeEvaluation() {
// the ti/tv ratio needs to be set (it's not calculated per-variant).
this.tiTvRatio = rate(nTi,nTv);
this.TiTvRatioStandard = rate(nTiInStd,nTvInStd);
this.TiTvRatioStandard = rate(nTiInComp, nTvInComp);
}
}

View File

@ -1,138 +0,0 @@
package org.broadinstitute.sting.gatk.walkers.varianteval;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.*;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
/**
* The Broad Institute
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
* This software and its documentation are copyright 2009 by the
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
* <p/>
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
*/
@Analysis(name = "Validation Rate", description = "Validation Rate")
public class ValidationRate extends VariantEvaluator {
@DataPoint(name="# mono in comp",description = "Number of mono calls in the comparison ROD")
long n_mono_in_comp;
@DataPoint(name="# poly in comp",description = "Number of poly calls in the comparison ROD")
long n_poly_in_comp;
@DataPoint(name="% poly in comp",description = "Percent of poly calls in the comparison ROD")
double percent_poly_in_comp;
@DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites")
long n_mono_calls_at_mono_sites;
@DataPoint(name="# poly calls at mono sites",description = "Number of poly calls at mono sites")
long n_poly_calls_at_mono_sites;
@DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites")
long n_nocalls_at_mono_sites;
@DataPoint(name="# mono sites called poly",description = "Percentage of mono sites called poly")
double percent_mono_sites_called_poly;
@DataPoint(name="# mono calls at poly sites",description = "Number of mono calls at poly sites")
long n_mono_calls_at_poly_sites;
@DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at poly sites")
long n_poly_calls_at_poly_sites;
@DataPoint(name="# nocalls at poly sites",description = "Number of no calls at poly sites")
long n_nocalls_at_poly_sites;
@DataPoint(name="% poly sites called poly",description = "Number of poly sites called poly")
double percent_poly_sites_called_poly;
@DataPoint(description = "The PPV")
double PPV;
@DataPoint(description = "The sensitivity")
double sensitivity;
@DataPoint(description = "The False Discovery Rate (FDR)")
double falseDiscoveryRate;
// todo -- subset validation data by list of samples, if provided
class SiteStats {
long nPoly = 0;
long nMono = 0;
long nNoCall = 0;
double polyPercent() {
return 100 * rate(nPoly, nPoly + nMono + nNoCall);
}
}
private SiteStats validationStats = new SiteStats();
private SiteStats evalOverlapAtMono = new SiteStats();
private SiteStats evalOverlapAtPoly = new SiteStats();
public ValidationRate(VariantEvalWalker parent) {
super(parent);
}
public String getName() {
return "validationRate";
}
public int getComparisonOrder() {
return 2; // we need to see each eval track and each comp track
}
@Override
public void finalizeEvaluation() {
long TP = evalOverlapAtPoly.nPoly; // + evalOverlapAtMono.nMono + evalOverlapAtMono.nNoCall;
long FP = evalOverlapAtMono.nPoly; // + evalOverlapAtPoly.nMono;
long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall;
// fill in the output fields
n_mono_in_comp = validationStats.nMono;
n_poly_in_comp = validationStats.nPoly;
percent_poly_in_comp = validationStats.polyPercent();
n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono;
n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly;
n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall;
percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent();
n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono;
n_poly_calls_at_poly_sites = evalOverlapAtPoly.nPoly;
n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall;
percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent();
PPV = 100 * rate(TP, TP + FP);
sensitivity = 100 * rate(TP, TP + FN);
falseDiscoveryRate = 100 * rate(FP, FP + TP);
}
public boolean enabled() {
return true;
}
public String update2(VariantContext eval, VariantContext rawValidationData, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
String interesting = null;
if (rawValidationData!= null && rawValidationData.hasGenotypes() && rawValidationData.isNotFiltered()) {
VariantContext validation = rawValidationData;
SiteStats overlap;
if (validation.isPolymorphic()) {
validationStats.nPoly++;
overlap = evalOverlapAtPoly;
if (eval == null || eval.isMonomorphic())
interesting = "ValidationStatus=FN";
} else {
validationStats.nMono++;
overlap = evalOverlapAtMono;
if (eval != null && eval.isPolymorphic())
interesting = "ValidationStatus=FP";
}
if (eval == null)
overlap.nNoCall++;
//else if (eval.isPolymorphic()) // requires genotypes
else if (eval.isSNP())
overlap.nPoly++;
else
overlap.nMono++;
}
return interesting; // we don't capture any interesting sites
}
}

View File

@ -29,7 +29,7 @@ public class
String extraArgs = "-L 1:1-10,000,000";
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
1, Arrays.asList("5a330d359b5c7ea0dfa6698b4830db82"));
1, Arrays.asList("97d2471ed6ee79d70ce5bd9cc0be2239"));
executeTest("testSelect1", spec);
}
}
@ -38,7 +38,7 @@ public class
public void testSelect2() {
String extraArgs = "-L 1:1-10,000,000";
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
1, Arrays.asList("e39d6790e4ee8709dfa2eab8598b168e"));
1, Arrays.asList("e3366d73ee7cdf630c29809ce230e32e"));
executeTest("testSelect2", spec);
}
@ -48,7 +48,7 @@ public class
for (String vcfFile : vcfFiles) {
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -E GenotypeConcordance -reportType CSV -o %s",
1,
Arrays.asList("608b32189818df7271c546dd5f257033"));
Arrays.asList("51574b4ab0b381c5a01268f91e78b25c"));
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
}
@ -57,8 +57,8 @@ public class
@Test
public void testVESimple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000", "08b93c2561742dc480534ae16d2d0508");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "65bf329cf1499544d1eb28de786a4561");
expectations.put("-L 1:1-10,000,000", "8f76d8c0e3a8a5836bb5bf423e04c268");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "6b128bacbd0402471bd6d4e3f9283c47");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
@ -80,10 +80,10 @@ public class
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String matchingMD5 = "fc1d4d2aca0667605a6b4d486fcbedf2";
String matchingMD5 = "aca5a64a0e4850906db2bd820253b784";
expectations.put("", matchingMD5);
expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5);
expectations.put(" -known comp_hapmap", "fd9cd9970f7e509ca476502869063929");
expectations.put(" -known comp_hapmap", "442213609c2866f7a90cbc4b3486441a");
for (String tests : testsEnumerations) {
for (Map.Entry<String, String> entry : expectations.entrySet()) {
String extraArgs2 = entry.getKey();
@ -103,7 +103,7 @@ public class
for (String tests : testsEnumerations) {
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("521837758da151b84fca57fd1bb7dad1", "b4a42c90318adc88361691ece50426f2"));
Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5"));
executeTest("testVEWriteVCF", spec);
}
}