From 8ff93f77e6bfcbbc368f82c25c9558ad9b73654d Mon Sep 17 00:00:00 2001 From: kiran Date: Fri, 18 Jun 2010 21:51:40 +0000 Subject: [PATCH] Added evaluation module to count functional classes (missense, nonsense, etc.). At the moment, it only understands Cancer's MAF annotations. Added integration test for the functional class counting. Added better description for VariantEval. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3595 348d0f76-0448-11de-a6fe-93d51630548a --- .../varianteval/CountFunctionalClasses.java | 77 +++++++++++++++++++ .../varianteval/VariantEvalWalker.java | 6 +- .../VariantEvalIntegrationTest.java | 15 ++++ 3 files changed, 97 insertions(+), 1 deletion(-) create mode 100755 java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java new file mode 100755 index 000000000..ff319a7f2 --- /dev/null +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/CountFunctionalClasses.java @@ -0,0 +1,77 @@ +package org.broadinstitute.sting.gatk.walkers.varianteval; + +import org.broadinstitute.sting.playground.utils.report.tags.Analysis; +import org.broadinstitute.sting.playground.utils.report.tags.DataPoint; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; + +@Analysis(name = "Count Functional Classes", description = "Counts instances of different functional variant classes (provided the variants are annotated with that information)") +public class CountFunctionalClasses extends VariantEvaluator { + // the following fields are in output order: + @DataPoint(description = "miRNA") + long nMiRNA= 0; + + @DataPoint(description = "3'-UTR") + long nUTR3 = 0; + + @DataPoint(description = "Intron") + long nIntron = 0; + + @DataPoint(description = "Splice-site") + long nSpliceSite= 0; + + @DataPoint(description = "Read-through") + long nReadThrough = 0; + + @DataPoint(description = "Nonsense") + long nNonsense = 0; + + @DataPoint(description = "Missense") + long nMissense = 0; + + @DataPoint(description = "Synonymous") + long nSynonymous = 0; + + @DataPoint(description = "5'-UTR") + long nUTR5= 0; + + @DataPoint(description = "Promoter") + long nPromoter = 0; + + public CountFunctionalClasses(VariantEvalWalker parent) { + super(parent); + } + + public String getName() { + return "functionalclasses"; + } + + public boolean enabled() { + return false; + } + + public int getComparisonOrder() { + return 1; + } + + public String update1(VariantContext vc1, RefMetaDataTracker tracker, ReferenceContext ref, AlignmentContext context) { + String type = vc1.getAttributeAsString("type"); + + if (type != null) { + if (type.equalsIgnoreCase("miRNA")) { nMiRNA++; } + else if (type.equalsIgnoreCase("3'-UTR")) { nUTR3++; } + else if (type.equalsIgnoreCase("Intron")) { nIntron++; } + else if (type.equalsIgnoreCase("Splice_site")) { nSpliceSite++; } + else if (type.equalsIgnoreCase("Read-through")) { nReadThrough++; } + else if (type.equalsIgnoreCase("Nonsense")) { nNonsense++; } + else if (type.equalsIgnoreCase("Missense")) { nMissense++; } + else if (type.equalsIgnoreCase("Synonymous")) { nSynonymous++; } + else if (type.equalsIgnoreCase("5'-UTR")) { nUTR5++; } + else if (type.equalsIgnoreCase("Promoter")) { nPromoter++; } + } + + return null; + } +} diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java index 4f36ab524..fe184ea04 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalWalker.java @@ -103,7 +103,7 @@ import java.util.*; /** - * Test routine for new VariantContext object + * General-purpose tool for variant evaluation (% in dbSNP, genotype concordance, Ts/Tv ratios, and a lot more) */ @Reference(window=@Window(start=-50,stop=50)) public class VariantEvalWalker extends RodWalker { @@ -389,6 +389,10 @@ public class VariantEvalWalker extends RodWalker { private List initializeEvaluationContexts(Set evalNames, Set compNames, List selectExps) { List contexts = new ArrayList(); + // todo -- add another for loop for each sample (be smart about the selection here - + // honor specifications of just one or a few samples), and put an "all" in here so + // that we don't lose multi-sample evaluations + selectExps = append(selectExps, null); for ( String evalName : evalNames ) { for ( String compName : compNames ) { diff --git a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index 3cf9a4843..902a7d86e 100755 --- a/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -97,6 +97,21 @@ public class } } + @Test + public void testVEGenomicallyAnnotated() { + String vecmd = "-T VariantEval " + + "-R " + oneKGLocation + "reference/human_b36_both.fasta " + + "-L 21 " + + "-D " + GATKDataLocation + "dbsnp_129_b36.rod " + + "-E CountFunctionalClasses " + + "-B eval,VCF," + validationDataLocation + "test.filtered.maf_annotated.vcf " + + "-o %s"; + String md5 = "d41d8cd98f00b204e9800998ecf8427e"; + + WalkerTestSpec spec = new WalkerTestSpec(vecmd, 1, Arrays.asList(md5)); + executeTest("testVEGenomicallyAnnotated", spec); + } + @Test public void testVEWriteVCF() { String extraArgs = "-L 1:1-10,000,000 -family NA19238+NA19239=NA19240 -MVQ 30";