Renamed some of the column names in Ti/Tv and Concordance modules so that they are clearer. Removed ValidationRate module (it was busted).
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3564 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
8cb16a1d45
commit
7a91dbd490
|
|
@ -230,7 +230,7 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
private boolean warnedAboutValidationData = false;
|
||||
|
||||
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
final String interesting = null;
|
||||
String interesting = null;
|
||||
|
||||
// sanity check that we at least have either eval or validation data
|
||||
if (eval == null && !isValidVC(validation)) {
|
||||
|
|
@ -266,12 +266,14 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
}
|
||||
|
||||
determineStats(eval, validation);
|
||||
interesting = determineStats(eval, validation);
|
||||
|
||||
return interesting; // we don't capture any interesting sites
|
||||
}
|
||||
|
||||
private void determineStats(final VariantContext eval, final VariantContext validation) {
|
||||
private String determineStats(final VariantContext eval, final VariantContext validation) {
|
||||
|
||||
String interesting = null;
|
||||
|
||||
final boolean validationIsValidVC = isValidVC(validation);
|
||||
|
||||
|
|
@ -285,6 +287,8 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
truth = Genotype.Type.NO_CALL;
|
||||
} else {
|
||||
truth = validation.getGenotype(sample).getType();
|
||||
// TODO -- capture "interesting" sites here, for example:
|
||||
// interesting = "ConcordanceStatus=FP";
|
||||
}
|
||||
|
||||
sampleStats.incrValue(sample, truth, called);
|
||||
|
|
@ -327,6 +331,8 @@ public class GenotypeConcordance extends VariantEvaluator {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
return interesting;
|
||||
}
|
||||
|
||||
private static boolean isValidVC(final VariantContext vc) {
|
||||
|
|
@ -429,9 +435,9 @@ class SampleSummaryStats implements TableType {
|
|||
"percent_comp_het_called_var",
|
||||
"percent_comp_hom_called_hom",
|
||||
"percent_comp_hom_called_var",
|
||||
"percent_variant_sensitivity",
|
||||
"percent_genotype_concordance",
|
||||
"percent_genotype_error_rate"};
|
||||
"percent_non-reference_sensitivity",
|
||||
"percent_overall_genotype_concordance",
|
||||
"percent_non-reference_discrepancy_rate"};
|
||||
|
||||
// sample to concordance stats object
|
||||
private final HashMap<String, double[]> concordanceSummary = new HashMap<String, double[]>();
|
||||
|
|
@ -547,18 +553,18 @@ class SampleSummaryStats implements TableType {
|
|||
updateSummaries(3, summary, numer, denom);
|
||||
|
||||
// Summary 4: % non-ref called as non-ref
|
||||
// MAD: this is known as the variant sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
|
||||
// MAD: this is known as the non-reference sensitivity (# non-ref according to comp found in eval / # non-ref in comp)
|
||||
numer = sumStatsAllPairs(stats, allVariantGenotypes, allVariantGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allVariantGenotypes, allGenotypes);
|
||||
updateSummaries(4, summary, numer, denom);
|
||||
|
||||
// Summary 5: genotype concordance of sites called in eval track
|
||||
// Summary 5: overall genotype concordance of sites called in eval track
|
||||
// MAD: this is the tradition genotype concordance
|
||||
numer = sumStatsDiag(stats, allCalledGenotypes);
|
||||
denom = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes);
|
||||
updateSummaries(5, summary, numer, denom);
|
||||
|
||||
// Summary 6: genotype concordance of sites called non-ref in eval track
|
||||
// Summary 6: overall genotype concordance of sites called non-ref in eval track
|
||||
long homrefConcords = stats[Genotype.Type.HOM_REF.ordinal()][Genotype.Type.HOM_REF.ordinal()];
|
||||
long diag = sumStatsDiag(stats, allVariantGenotypes);
|
||||
long allNoHomRef = sumStatsAllPairs(stats, allCalledGenotypes, allCalledGenotypes) - homrefConcords;
|
||||
|
|
|
|||
|
|
@ -16,11 +16,11 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
|
|||
long nTv = 0;
|
||||
@DataPoint(name = "ti/tv ratio", description = "the transition to transversion ratio")
|
||||
double tiTvRatio = 0.0;
|
||||
@DataPoint(name = "ti_count_std", description = "number of standard transition sites")
|
||||
long nTiInStd = 0;
|
||||
@DataPoint(name = "tv_count_std", description = "number of standard transversion sites")
|
||||
long nTvInStd = 0;
|
||||
@DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio")
|
||||
@DataPoint(name = "ti_count_comp", description = "number of comp transition sites")
|
||||
long nTiInComp = 0;
|
||||
@DataPoint(name = "tv_count_comp", description = "number of comp transversion sites")
|
||||
long nTvInComp = 0;
|
||||
@DataPoint(name = "ti/tv ratio standard", description = "the transition to transversion ratio for comp sites")
|
||||
double TiTvRatioStandard = 0.0;
|
||||
|
||||
public TiTvVariantEvaluator(VariantEvalWalker parent) {
|
||||
|
|
@ -42,10 +42,10 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
|
|||
public void updateTiTv(VariantContext vc, boolean updateStandard) {
|
||||
if (vc != null && vc.isSNP() && vc.isBiallelic()) {
|
||||
if (vc.isTransition()) {
|
||||
if (updateStandard) nTiInStd++;
|
||||
if (updateStandard) nTiInComp++;
|
||||
else nTi++;
|
||||
} else {
|
||||
if (updateStandard) nTvInStd++;
|
||||
if (updateStandard) nTvInComp++;
|
||||
else nTv++;
|
||||
}
|
||||
}
|
||||
|
|
@ -62,6 +62,6 @@ public class TiTvVariantEvaluator extends VariantEvaluator {
|
|||
public void finalizeEvaluation() {
|
||||
// the ti/tv ratio needs to be set (it's not calculated per-variant).
|
||||
this.tiTvRatio = rate(nTi,nTv);
|
||||
this.TiTvRatioStandard = rate(nTiInStd,nTvInStd);
|
||||
this.TiTvRatioStandard = rate(nTiInComp, nTvInComp);
|
||||
}
|
||||
}
|
||||
|
|
@ -1,138 +0,0 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.varianteval;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.*;
|
||||
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.Analysis;
|
||||
import org.broadinstitute.sting.playground.utils.report.tags.DataPoint;
|
||||
|
||||
/**
|
||||
* The Broad Institute
|
||||
* SOFTWARE COPYRIGHT NOTICE AGREEMENT
|
||||
* This software and its documentation are copyright 2009 by the
|
||||
* Broad Institute/Massachusetts Institute of Technology. All rights are reserved.
|
||||
* <p/>
|
||||
* This software is supplied without any warranty or guaranteed support whatsoever. Neither
|
||||
* the Broad Institute nor MIT can be responsible for its use, misuse, or functionality.
|
||||
*/
|
||||
@Analysis(name = "Validation Rate", description = "Validation Rate")
|
||||
public class ValidationRate extends VariantEvaluator {
|
||||
@DataPoint(name="# mono in comp",description = "Number of mono calls in the comparison ROD")
|
||||
long n_mono_in_comp;
|
||||
@DataPoint(name="# poly in comp",description = "Number of poly calls in the comparison ROD")
|
||||
long n_poly_in_comp;
|
||||
@DataPoint(name="% poly in comp",description = "Percent of poly calls in the comparison ROD")
|
||||
double percent_poly_in_comp;
|
||||
@DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites")
|
||||
long n_mono_calls_at_mono_sites;
|
||||
@DataPoint(name="# poly calls at mono sites",description = "Number of poly calls at mono sites")
|
||||
long n_poly_calls_at_mono_sites;
|
||||
@DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites")
|
||||
long n_nocalls_at_mono_sites;
|
||||
@DataPoint(name="# mono sites called poly",description = "Percentage of mono sites called poly")
|
||||
double percent_mono_sites_called_poly;
|
||||
@DataPoint(name="# mono calls at poly sites",description = "Number of mono calls at poly sites")
|
||||
long n_mono_calls_at_poly_sites;
|
||||
@DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at poly sites")
|
||||
long n_poly_calls_at_poly_sites;
|
||||
@DataPoint(name="# nocalls at poly sites",description = "Number of no calls at poly sites")
|
||||
long n_nocalls_at_poly_sites;
|
||||
@DataPoint(name="% poly sites called poly",description = "Number of poly sites called poly")
|
||||
double percent_poly_sites_called_poly;
|
||||
@DataPoint(description = "The PPV")
|
||||
double PPV;
|
||||
@DataPoint(description = "The sensitivity")
|
||||
double sensitivity;
|
||||
@DataPoint(description = "The False Discovery Rate (FDR)")
|
||||
double falseDiscoveryRate;
|
||||
|
||||
// todo -- subset validation data by list of samples, if provided
|
||||
class SiteStats {
|
||||
long nPoly = 0;
|
||||
long nMono = 0;
|
||||
long nNoCall = 0;
|
||||
|
||||
double polyPercent() {
|
||||
return 100 * rate(nPoly, nPoly + nMono + nNoCall);
|
||||
}
|
||||
}
|
||||
|
||||
private SiteStats validationStats = new SiteStats();
|
||||
private SiteStats evalOverlapAtMono = new SiteStats();
|
||||
private SiteStats evalOverlapAtPoly = new SiteStats();
|
||||
|
||||
public ValidationRate(VariantEvalWalker parent) {
|
||||
super(parent);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return "validationRate";
|
||||
}
|
||||
|
||||
public int getComparisonOrder() {
|
||||
return 2; // we need to see each eval track and each comp track
|
||||
}
|
||||
|
||||
@Override
|
||||
public void finalizeEvaluation() {
|
||||
long TP = evalOverlapAtPoly.nPoly; // + evalOverlapAtMono.nMono + evalOverlapAtMono.nNoCall;
|
||||
long FP = evalOverlapAtMono.nPoly; // + evalOverlapAtPoly.nMono;
|
||||
long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall;
|
||||
|
||||
// fill in the output fields
|
||||
n_mono_in_comp = validationStats.nMono;
|
||||
n_poly_in_comp = validationStats.nPoly;
|
||||
percent_poly_in_comp = validationStats.polyPercent();
|
||||
|
||||
n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono;
|
||||
n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly;
|
||||
n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall;
|
||||
percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent();
|
||||
|
||||
n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono;
|
||||
n_poly_calls_at_poly_sites = evalOverlapAtPoly.nPoly;
|
||||
n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall;
|
||||
percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent();
|
||||
PPV = 100 * rate(TP, TP + FP);
|
||||
sensitivity = 100 * rate(TP, TP + FN);
|
||||
falseDiscoveryRate = 100 * rate(FP, FP + TP);
|
||||
}
|
||||
|
||||
public boolean enabled() {
|
||||
return true;
|
||||
}
|
||||
|
||||
public String update2(VariantContext eval, VariantContext rawValidationData, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
|
||||
String interesting = null;
|
||||
|
||||
if (rawValidationData!= null && rawValidationData.hasGenotypes() && rawValidationData.isNotFiltered()) {
|
||||
VariantContext validation = rawValidationData;
|
||||
|
||||
SiteStats overlap;
|
||||
|
||||
if (validation.isPolymorphic()) {
|
||||
validationStats.nPoly++;
|
||||
overlap = evalOverlapAtPoly;
|
||||
if (eval == null || eval.isMonomorphic())
|
||||
interesting = "ValidationStatus=FN";
|
||||
} else {
|
||||
validationStats.nMono++;
|
||||
overlap = evalOverlapAtMono;
|
||||
|
||||
if (eval != null && eval.isPolymorphic())
|
||||
interesting = "ValidationStatus=FP";
|
||||
}
|
||||
|
||||
if (eval == null)
|
||||
overlap.nNoCall++;
|
||||
//else if (eval.isPolymorphic()) // requires genotypes
|
||||
else if (eval.isSNP())
|
||||
overlap.nPoly++;
|
||||
else
|
||||
overlap.nMono++;
|
||||
}
|
||||
|
||||
return interesting; // we don't capture any interesting sites
|
||||
}
|
||||
}
|
||||
|
|
@ -29,7 +29,7 @@ public class
|
|||
String extraArgs = "-L 1:1-10,000,000";
|
||||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(withSelect(tests, "DP < 50", "DP50") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("5a330d359b5c7ea0dfa6698b4830db82"));
|
||||
1, Arrays.asList("97d2471ed6ee79d70ce5bd9cc0be2239"));
|
||||
executeTest("testSelect1", spec);
|
||||
}
|
||||
}
|
||||
|
|
@ -38,7 +38,7 @@ public class
|
|||
public void testSelect2() {
|
||||
String extraArgs = "-L 1:1-10,000,000";
|
||||
WalkerTestSpec spec = new WalkerTestSpec( withSelect(withSelect(root, "DP < 50", "DP50"), "set==\"Intersection\"", "intersection") + " " + extraArgs + " -o %s",
|
||||
1, Arrays.asList("e39d6790e4ee8709dfa2eab8598b168e"));
|
||||
1, Arrays.asList("e3366d73ee7cdf630c29809ce230e32e"));
|
||||
executeTest("testSelect2", spec);
|
||||
}
|
||||
|
||||
|
|
@ -48,7 +48,7 @@ public class
|
|||
for (String vcfFile : vcfFiles) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(cmdRoot + " -B eval,VCF," + validationDataLocation + vcfFile + " -B comp,VCF," + validationDataLocation + "GenotypeConcordanceComp.vcf -E GenotypeConcordance -reportType CSV -o %s",
|
||||
1,
|
||||
Arrays.asList("608b32189818df7271c546dd5f257033"));
|
||||
Arrays.asList("51574b4ab0b381c5a01268f91e78b25c"));
|
||||
executeTest("testVEGenotypeConcordance" + vcfFile, spec);
|
||||
}
|
||||
|
||||
|
|
@ -57,8 +57,8 @@ public class
|
|||
@Test
|
||||
public void testVESimple() {
|
||||
HashMap<String, String> expectations = new HashMap<String, String>();
|
||||
expectations.put("-L 1:1-10,000,000", "08b93c2561742dc480534ae16d2d0508");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "65bf329cf1499544d1eb28de786a4561");
|
||||
expectations.put("-L 1:1-10,000,000", "8f76d8c0e3a8a5836bb5bf423e04c268");
|
||||
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "6b128bacbd0402471bd6d4e3f9283c47");
|
||||
|
||||
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
|
||||
String extraArgs = entry.getKey();
|
||||
|
|
@ -80,10 +80,10 @@ public class
|
|||
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
|
||||
|
||||
|
||||
String matchingMD5 = "fc1d4d2aca0667605a6b4d486fcbedf2";
|
||||
String matchingMD5 = "aca5a64a0e4850906db2bd820253b784";
|
||||
expectations.put("", matchingMD5);
|
||||
expectations.put(" -known comp_hapmap -known dbsnp", matchingMD5);
|
||||
expectations.put(" -known comp_hapmap", "fd9cd9970f7e509ca476502869063929");
|
||||
expectations.put(" -known comp_hapmap", "442213609c2866f7a90cbc4b3486441a");
|
||||
for (String tests : testsEnumerations) {
|
||||
for (Map.Entry<String, String> entry : expectations.entrySet()) {
|
||||
String extraArgs2 = entry.getKey();
|
||||
|
|
@ -103,7 +103,7 @@ public class
|
|||
for (String tests : testsEnumerations) {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(tests + " " + extraArgs + " -o %s -outputVCF %s",
|
||||
2,
|
||||
Arrays.asList("521837758da151b84fca57fd1bb7dad1", "b4a42c90318adc88361691ece50426f2"));
|
||||
Arrays.asList("dc53aaf7db9f05e3b0a38bf5efe3fbbe", "cd8616aca14eb77bd90732fbfce038d5"));
|
||||
executeTest("testVEWriteVCF", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue