From e1cb5f6459a9e1c534acb583dd94f9412f35322e Mon Sep 17 00:00:00 2001 From: David Roazen Date: Fri, 23 Sep 2011 15:00:03 -0400 Subject: [PATCH] SnpEff annotator now assigns a functional class to each effect and distinguishes between actual effects and mere modifiers. -We now assign a functional class (nonsense, missense, silent, or none) to each SnpEff effect, and add a SNPEFF_FUNCTIONAL_CLASS annotation to the INFO field of the output VCF. -Effects are now prioritized according to both biological impact and functional class, instead of impact only. -Many of SnpEff's "low-impact" effects are now classified as "modifiers" with lower priority than every other effect. This includes such "effects" as DOWNSTREAM, UPSTREAM, INTRON, GENE, EXON, and others that really describe the location of the variant rather than its biological effect. This code will be short-lived (likely 1.2-only), as the next version of SnpEff will include most of these features directly. Checking this change into Stable+Unstable instead of Unstable because the current functional class stratification in VariantEval is basically broken and urgently needs to be fixed for production purposes. --- .../sting/gatk/walkers/annotator/SnpEff.java | 160 ++++++++++++------ .../stratifications/FunctionalClass.java | 19 ++- .../VariantAnnotatorIntegrationTest.java | 4 +- .../VariantEvalIntegrationTest.java | 2 +- 4 files changed, 126 insertions(+), 59 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 8f99c6118..973b3277d 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -82,7 +82,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio GENE_NAME_KEY ("SNPEFF_GENE_NAME", 3), GENE_BIOTYPE_KEY ("SNPEFF_GENE_BIOTYPE", 4), TRANSCRIPT_ID_KEY ("SNPEFF_TRANSCRIPT_ID", 6), - EXON_ID_KEY ("SNPEFF_EXON_ID", 7); + EXON_ID_KEY ("SNPEFF_EXON_ID", 7), + FUNCTIONAL_CLASS_KEY ("SNPEFF_FUNCTIONAL_CLASS", -1); // Actual text of the key private final String keyName; @@ -108,48 +109,73 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // Possible SnpEff biological effects. All effect names found in the SnpEff input file // are validated against this list. public enum EffectType { - NONE, - CHROMOSOME, - INTERGENIC, - UPSTREAM, - UTR_5_PRIME, - UTR_5_DELETED, - START_GAINED, - SPLICE_SITE_ACCEPTOR, - SPLICE_SITE_DONOR, - START_LOST, - SYNONYMOUS_START, - NON_SYNONYMOUS_START, - CDS, - GENE, - TRANSCRIPT, - EXON, - EXON_DELETED, - NON_SYNONYMOUS_CODING, - SYNONYMOUS_CODING, - FRAME_SHIFT, - CODON_CHANGE, - CODON_INSERTION, - CODON_CHANGE_PLUS_CODON_INSERTION, - CODON_DELETION, - CODON_CHANGE_PLUS_CODON_DELETION, - STOP_GAINED, - SYNONYMOUS_STOP, - NON_SYNONYMOUS_STOP, - STOP_LOST, - INTRON, - UTR_3_PRIME, - UTR_3_DELETED, - DOWNSTREAM, - INTRON_CONSERVED, - INTERGENIC_CONSERVED, - REGULATION, - CUSTOM, - WITHIN_NON_CODING_GENE + // High-impact effects: + FRAME_SHIFT (EffectFunctionalClass.NONE, false), + STOP_GAINED (EffectFunctionalClass.NONSENSE, false), + START_LOST (EffectFunctionalClass.NONE, false), + SPLICE_SITE_ACCEPTOR (EffectFunctionalClass.NONE, false), + SPLICE_SITE_DONOR (EffectFunctionalClass.NONE, false), + EXON_DELETED (EffectFunctionalClass.NONE, false), + STOP_LOST (EffectFunctionalClass.NONE, false), + + // Moderate-impact effects: + NON_SYNONYMOUS_CODING (EffectFunctionalClass.MISSENSE, false), + CODON_CHANGE (EffectFunctionalClass.NONE, false), + CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectFunctionalClass.NONE, false), + CODON_DELETION (EffectFunctionalClass.NONE, false), + CODON_CHANGE_PLUS_CODON_DELETION (EffectFunctionalClass.NONE, false), + UTR_5_DELETED (EffectFunctionalClass.NONE, false), + UTR_3_DELETED (EffectFunctionalClass.NONE, false), + + // Low-impact effects: + SYNONYMOUS_CODING (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_START (EffectFunctionalClass.SILENT, false), + SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + NON_SYNONYMOUS_STOP (EffectFunctionalClass.SILENT, false), + START_GAINED (EffectFunctionalClass.NONE, false), + + // Modifiers: + NONE (EffectFunctionalClass.NONE, true), + CHROMOSOME (EffectFunctionalClass.NONE, true), + INTERGENIC (EffectFunctionalClass.NONE, true), + UPSTREAM (EffectFunctionalClass.NONE, true), + UTR_5_PRIME (EffectFunctionalClass.NONE, true), + CDS (EffectFunctionalClass.NONE, true), + GENE (EffectFunctionalClass.NONE, true), + TRANSCRIPT (EffectFunctionalClass.NONE, true), + EXON (EffectFunctionalClass.NONE, true), + INTRON (EffectFunctionalClass.NONE, true), + UTR_3_PRIME (EffectFunctionalClass.NONE, true), + DOWNSTREAM (EffectFunctionalClass.NONE, true), + INTRON_CONSERVED (EffectFunctionalClass.NONE, true), + INTERGENIC_CONSERVED (EffectFunctionalClass.NONE, true), + REGULATION (EffectFunctionalClass.NONE, true), + CUSTOM (EffectFunctionalClass.NONE, true), + WITHIN_NON_CODING_GENE (EffectFunctionalClass.NONE, true); + + private final EffectFunctionalClass functionalClass; + private final boolean isModifier; + + EffectType ( EffectFunctionalClass functionalClass, boolean isModifier ) { + this.functionalClass = functionalClass; + this.isModifier = isModifier; + } + + public EffectFunctionalClass getFunctionalClass() { + return functionalClass; + } + + public boolean isModifier() { + return isModifier; + } } - // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. + // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact. We take the additional step of + // classifying some of the LOW impact effects as MODIFIERs. public enum EffectImpact { + MODIFIER (0), LOW (1), MODERATE (2), HIGH (3); @@ -163,6 +189,10 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio public boolean isHigherImpactThan ( EffectImpact other ) { return this.severityRating > other.severityRating; } + + public boolean isSameImpactAs ( EffectImpact other ) { + return this.severityRating == other.severityRating; + } } // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information. @@ -172,6 +202,24 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } + // We assign a functional class to each SnpEff effect. + public enum EffectFunctionalClass { + NONE (0), + SILENT (1), + MISSENSE (2), + NONSENSE (3); + + private final int priority; + + EffectFunctionalClass ( int priority ) { + this.priority = priority; + } + + public boolean isHigherPriorityThan ( EffectFunctionalClass other ) { + return this.priority > other.priority; + } + } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff // without providing a SnpEff rod via --snpEffFile): @@ -336,7 +384,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio InfoFieldKey.GENE_NAME_KEY.getKeyName(), InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), - InfoFieldKey.EXON_ID_KEY.getKeyName() + InfoFieldKey.EXON_ID_KEY.getKeyName(), + InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName() ); } @@ -349,7 +398,8 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio new VCFInfoHeaderLine(InfoFieldKey.GENE_NAME_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"), new VCFInfoHeaderLine(InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"), - new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant") + new VCFInfoHeaderLine(InfoFieldKey.EXON_ID_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"), + new VCFInfoHeaderLine(InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), 1, VCFHeaderLineType.String, "Functional class of the highest-impact effect resulting from the current variant: " + Arrays.toString(EffectFunctionalClass.values())) ); } @@ -411,11 +461,16 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio return; } - try { - impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + if ( effect != null && effect.isModifier() ) { + impact = EffectImpact.MODIFIER; } - catch ( IllegalArgumentException e ) { - parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + else { + try { + impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()]); + } + catch ( IllegalArgumentException e ) { + parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.IMPACT_KEY.getFieldIndex()])); + } } codonChange = effectMetadata[InfoFieldKey.CODON_CHANGE_KEY.getFieldIndex()]; @@ -472,9 +527,17 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts - // of the effects themselves: + // of the effects themselves. Effects with the same impact are tie-broken using the + // functional class of the effect: - return impact.isHigherImpactThan(other.impact); + if ( impact.isHigherImpactThan(other.impact) ) { + return true; + } + else if ( impact.isSameImpactAs(other.impact) ) { + return effect.getFunctionalClass().isHigherPriorityThan(other.effect.getFunctionalClass()); + } + + return false; } public Map getAnnotations() { @@ -488,6 +551,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio addAnnotation(annotations, InfoFieldKey.GENE_BIOTYPE_KEY.getKeyName(), geneBiotype); addAnnotation(annotations, InfoFieldKey.TRANSCRIPT_ID_KEY.getKeyName(), transcriptID); addAnnotation(annotations, InfoFieldKey.EXON_ID_KEY.getKeyName(), exonID); + addAnnotation(annotations, InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName(), effect.getFunctionalClass().toString()); return annotations; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java index 88ffcaaeb..1dc047b5d 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/varianteval/stratifications/FunctionalClass.java @@ -62,14 +62,17 @@ public class FunctionalClass extends VariantStratifier { annotationId++; } while (eval.hasAttribute(key)); - } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName() ) ) { - SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFFECT_KEY.getKeyName()).toString()); - if ( snpEffType == SnpEff.EffectType.STOP_GAINED ) - type = FunctionalType.nonsense; - else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING ) - type = FunctionalType.missense; - else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING ) - type = FunctionalType.silent; + } else if ( eval.hasAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()) ) { + try { + SnpEff.EffectFunctionalClass snpEffFunctionalClass = SnpEff.EffectFunctionalClass.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.FUNCTIONAL_CLASS_KEY.getKeyName()).toString()); + if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.NONSENSE ) + type = FunctionalType.nonsense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.MISSENSE ) + type = FunctionalType.missense; + else if ( snpEffFunctionalClass == SnpEff.EffectFunctionalClass.SILENT ) + type = FunctionalType.silent; + } + catch ( Exception e ) {} // don't error out if the type isn't supported } if ( type != null ) { diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 2c06c6b7f..04bff8d41 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -132,9 +132,9 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { WalkerTestSpec spec = new WalkerTestSpec( "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + - "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000 -L 2:232,325,429", 1, - Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") + Arrays.asList("122321a85e448f21679f6ca15c5e22ad") ); executeTest("Testing SnpEff annotations", spec); } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java index b90e6d0ff..9fe253ecb 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java @@ -30,7 +30,7 @@ public class VariantEvalIntegrationTest extends WalkerTest { "-o %s" ), 1, - Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12") + Arrays.asList("d9dcb352c53106f54fcc981f15d35a90") ); executeTest("testFunctionClassWithSnpeff", spec); }