From d1a4cd2f733e9ba6f4d05a6f95123d8801bf3f80 Mon Sep 17 00:00:00 2001 From: andrewk Date: Fri, 16 Oct 2009 15:39:08 +0000 Subject: [PATCH] Added ValidationData analysis type to VariantEvalWalker; this eval takes a GFF file with validated truth data positions (bound to "validation")and calculates the accuracy of the genotype calls bound to "eval". git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1862 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/GenomeAnalysisEngine.java | 2 +- .../varianteval/ValidationDataAnalysis.java | 56 +++++++++++++++++++ .../varianteval/VariantEvalWalker.java | 3 +- 3 files changed, 59 insertions(+), 2 deletions(-) create mode 100755 java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ValidationDataAnalysis.java diff --git a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java index c9dbab884..7c0ef7980 100755 --- a/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java +++ b/java/src/org/broadinstitute/sting/gatk/GenomeAnalysisEngine.java @@ -444,7 +444,7 @@ public class GenomeAnalysisEngine { // Check to see that no forbidden rods are present. for (ReferenceOrderedData rod : rods) { if (!WalkerManager.isAllowed(walker, rod)) - throw new ArgumentException(String.format("Walker does not allow access to metadata: %s. If this is incorrect, change the @Allows metadata", rod.getName())); + throw new ArgumentException(String.format("Walker of type %s does not allow access to metadata: %s. If this is incorrect, change the @Allows metadata", walker.getClass(), rod.getName())); } } diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ValidationDataAnalysis.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ValidationDataAnalysis.java new file mode 100755 index 000000000..4ec966bf5 --- /dev/null +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/ValidationDataAnalysis.java @@ -0,0 +1,56 @@ +package org.broadinstitute.sting.playground.gatk.walkers.varianteval; + +import org.broadinstitute.sting.utils.genotype.Variation; +import org.broadinstitute.sting.gatk.refdata.*; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; + +import java.util.List; +import java.util.ArrayList; + +/** + * Created by IntelliJ IDEA. + * User: andrewk + * Date: Sep 30, 2009 + * Time: 5:28:57 PM + * To change this template use File | Settings | File Templates. + */ + +// @Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=VariationRod.class), @RMD(name="dbsnp",type= rodDbSNP.class),@RMD(name="hapmap-chip",type=RodGenotypeChipAsGFF.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)}) + +public class ValidationDataAnalysis extends BasicVariantAnalysis implements GenotypeAnalysis, PopulationAnalysis { + private int calls_at_validated_sites = 0; + private int calls_at_sites_validated_true = 0; + private int validated_sites = 0; + + public ValidationDataAnalysis() { + super("validation_data_analysis"); + } + + public String update(Variation eval, RefMetaDataTracker tracker, char ref, AlignmentContext context) { + + validated_sites++; + Variation val_data = (Variation) tracker.lookup("validation", null); + Variation dbsnp = (Variation) tracker.lookup("dbsnp",null); + + if (eval != null) { + calls_at_sites_validated_true++; + //out.format("Has validaiton data: %s\n", val_data.getLocation()); + if (val_data != null) { + //out.format("Validated true: %s\n", val_data.getLocation()); + calls_at_validated_sites++; + } + } + out.println(context.getLocation()); + + return ""; + } + + public List done() { + List s = new ArrayList(); + s.add(String.format("validated sites %d", validated_sites)); + s.add(String.format("calls at validated sites %d", calls_at_sites_validated_true)); + s.add(String.format("calls at sites validated true %d", calls_at_validated_sites)); + s.add(String.format("%% validated true %f", (float) calls_at_validated_sites / calls_at_sites_validated_true)); + return s; + } +} diff --git a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java index 2a6ae2d41..7444f8059 100755 --- a/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/playground/gatk/walkers/varianteval/VariantEvalWalker.java @@ -26,7 +26,7 @@ import java.util.*; * */ @Requires(value={DataSource.REFERENCE},referenceMetaData={@RMD(name="eval",type=ReferenceOrderedDatum.class)}) // right now we have no base variant class for rods, this should change -@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=ReferenceOrderedDatum.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=ReferenceOrderedDatum.class), @RMD(name="interval",type=IntervalRod.class)}) +@Allows(value={DataSource.REFERENCE},referenceMetaData = {@RMD(name="eval",type=ReferenceOrderedDatum.class), @RMD(name="dbsnp",type=rodDbSNP.class),@RMD(name="hapmap-chip",type=ReferenceOrderedDatum.class), @RMD(name="interval",type=IntervalRod.class), @RMD(name="validation",type=RodGenotypeChipAsGFF.class)}) public class VariantEvalWalker extends RefWalker { @Argument(shortName="minConfidenceScore", doc="Minimum confidence score to consider an evaluation SNP a variant", required=false) public int minConfidenceScore = -1; @@ -115,6 +115,7 @@ public class VariantEvalWalker extends RefWalker { // // Add new analyses here! // + analyses.add(new ValidationDataAnalysis()); analyses.add(new PooledGenotypeConcordance(pathToHapmapPoolFile)); analyses.add(new VariantCounter()); analyses.add(new VariantDBCoverage(knownSNPDBName));