More detailed validation output. Fixes for genotyping overflow -- these are temporary and need to be properly resolved

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3129 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
depristo 2010-04-07 16:38:28 +00:00
parent e7dad728df
commit 918b746798
3 changed files with 22 additions and 13 deletions

View File

@ -29,7 +29,7 @@ public class ValidationRate extends VariantEvaluator {
double percent_poly_in_comp;
@DataPoint(name="# mono calls at mono sites",description = "Number of mono calls at mono sites")
long n_mono_calls_at_mono_sites;
@DataPoint(name="# poly calls at poly sites",description = "Number of poly calls at mono sites")
@DataPoint(name="# poly calls at mono sites",description = "Number of poly calls at mono sites")
long n_poly_calls_at_mono_sites;
@DataPoint(name="# nocalls at mono sites",description = "Number of no calls at mono sites")
long n_nocalls_at_mono_sites;
@ -47,6 +47,8 @@ public class ValidationRate extends VariantEvaluator {
double PPV;
@DataPoint(description = "The sensitivity")
double sensitivity;
@DataPoint(description = "The False Discovery Rate (FDR)")
double falseDiscoveryRate;
// todo -- subset validation data by list of samples, if provided
class SiteStats {
@ -77,35 +79,41 @@ public class ValidationRate extends VariantEvaluator {
@Override
public void finalizeEvaluation() {
long TP = evalOverlapAtPoly.nPoly + evalOverlapAtMono.nMono;
long FP = evalOverlapAtMono.nPoly + evalOverlapAtPoly.nMono;
long TP = evalOverlapAtPoly.nPoly; // + evalOverlapAtMono.nMono + evalOverlapAtMono.nNoCall;
long FP = evalOverlapAtMono.nPoly; // + evalOverlapAtPoly.nMono;
long FN = evalOverlapAtPoly.nMono + evalOverlapAtPoly.nNoCall;
// fill in the output fields
n_mono_in_comp = validationStats.nMono;
n_poly_in_comp = validationStats.nPoly;
percent_poly_in_comp = validationStats.polyPercent();
n_mono_calls_at_mono_sites = evalOverlapAtMono.nMono;
n_poly_calls_at_mono_sites = evalOverlapAtMono.nPoly;
n_nocalls_at_mono_sites = evalOverlapAtMono.nNoCall;
percent_mono_sites_called_poly = evalOverlapAtMono.polyPercent();
n_mono_calls_at_poly_sites = evalOverlapAtPoly.nMono;
n_poly_calls_at_poly_sites = evalOverlapAtPoly.nMono;
n_poly_calls_at_poly_sites = evalOverlapAtPoly.nPoly;
n_nocalls_at_poly_sites = evalOverlapAtPoly.nNoCall;
percent_poly_sites_called_poly = evalOverlapAtPoly.polyPercent();
PPV = 100 * rate(TP, TP + FP);
sensitivity = 100 * rate(TP, TP + FN);
falseDiscoveryRate = 100 * rate(FP, FP + TP);
}
public boolean enabled() {
return true;
}
public String update2(VariantContext eval, VariantContext validation, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
public String update2(VariantContext eval, VariantContext rawValidationData, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) {
String interesting = null;
if (validation != null && validation.hasGenotypes() && validation.isNotFiltered()) {
if (rawValidationData!= null && rawValidationData.hasGenotypes() && rawValidationData.isNotFiltered()) {
VariantContext validation = rawValidationData;
//if ( eval != null ) // todo -- remove me when I can get the header from the VariantEval engine
// validation = rawValidationData.subContextFromGenotypes(eval.getGenotypes().values());
SiteStats overlap;
if (validation.isPolymorphic()) {

View File

@ -410,7 +410,8 @@ public class VariantEval2Walker extends RodWalker<Integer, Integer> {
}
private void writeInterestingSite(List<String> interestingReasons, VariantContext vc, char ref) {
if ( writer != null && interestingReasons.size() > 0 ) {
if ( vc != null && writer != null && interestingReasons.size() > 0 ) {
// todo -- the vc == null check is because you can be interesting because you are a FN, and so VC == null
MutableVariantContext mvc = new MutableVariantContext(vc);
for ( String why : interestingReasons ) {

View File

@ -18,8 +18,8 @@ public class VariantEval2IntegrationTest extends WalkerTest {
@Test
public void testVE2Simple() {
HashMap<String, String> expectations = new HashMap<String, String>();
expectations.put("-L 1:1-10,000,000", "c304b257004f9d11cbcd89725eb89319");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "59cce874ed647fce22789b82779c4f13");
expectations.put("-L 1:1-10,000,000", "d5a8c6550236e971ffe29f7f254ad076");
expectations.put("-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 0", "529b56760da952741e9d843453a8eb73");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs = entry.getKey();
@ -39,10 +39,10 @@ public class VariantEval2IntegrationTest extends WalkerTest {
" -B dbsnp_130,dbSNP," + GATKDataLocation + "dbsnp_130_b36.rod" +
" -B comp_hapmap,VCF," + validationDataLocation + "CEU_hapmap_nogt_23.vcf";
String eqMD5s = "cf20ea2b9c6b173311b735f6d4ae72e6"; // next two examples should be the same!
String eqMD5s = "13d2de47f9fe56a4ddfeca10d9c1d5a9"; // next two examples should be the same!
expectations.put("", eqMD5s);
expectations.put(" -known comp_hapmap -known dbsnp", eqMD5s);
expectations.put(" -known comp_hapmap", "f43630aa98764aecb867b175014879e1");
expectations.put(" -known comp_hapmap", "edd1f32a7f2f0390fdd0cc4c5ff14bf1");
for ( Map.Entry<String, String> entry : expectations.entrySet() ) {
String extraArgs2 = entry.getKey();
@ -60,7 +60,7 @@ public class VariantEval2IntegrationTest extends WalkerTest {
String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";
WalkerTestSpec spec = new WalkerTestSpec( root + " " + extraArgs + " -o %s -outputVCF %s",
2,
Arrays.asList("abc2a36c0375386f731461da5e7e2104", "a3ce1d70d8ae3874807e9d61994d42af"));
Arrays.asList("31f8cf5010d4b73b1520acef88207214", "a3ce1d70d8ae3874807e9d61994d42af"));
executeTest("testVE2WriteVCF", spec);
}
}