From d9ea764611c0e6b63e8b6e2022fafc52ee56bea9 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Tue, 20 Sep 2011 15:22:27 -0400 Subject: [PATCH] SnpEff annotator now adds OriginalSnpEffVersion and OriginalSnpEffCmd lines to the header of the VCF output file. This change is urgently required for production, which is why it's going into Stable+Unstable instead of just Unstable. The keys for the SnpEff version and command header lines in the VCF file output by VariantAnnotator (OriginalSnpEffVersion and OriginalSnpEffCmd) are intentionally different from the keys for those same lines in the SnpEff output file (SnpEffVersion and SnpEffCmd), so that output files from VariantAnnotator won't be confused with output files from SnpEff itself. --- .../sting/gatk/walkers/annotator/SnpEff.java | 49 +++++++++++++++---- .../walkers/annotator/VariantAnnotator.java | 4 +- .../annotator/VariantAnnotatorEngine.java | 6 +-- .../VariantAnnotatorAnnotation.java | 4 +- .../VariantAnnotatorIntegrationTest.java | 2 +- 5 files changed, 48 insertions(+), 17 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 4ead77506..8f99c6118 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // lacking a SnpEff version number in the VCF header: public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" }; public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion"; + public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd"; + + // When we write the SnpEff version number and command line to the output VCF, we change + // the key name slightly so that the output VCF won't be confused in the future for an + // output file produced by SnpEff directly: + public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY; + public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY; // SnpEff aggregates all effects (and effect metadata) together into a single INFO // field annotation with the key EFF: @@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } - - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff + // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); - checkSnpEffVersion(walker, toolkit); + RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); + + // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of + // the SnpEff rod, and that the file was generated by a supported version of SnpEff: + VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); + VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); + VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); + + checkSnpEffVersion(snpEffVersionLine); + checkSnpEffCommandLine(snpEffCommandLine); + + // If everything looks ok, add the SnpEff version number and command-line header lines to the + // header of the VCF output file, changing the key names so that our output file won't be + // mistaken in the future for a SnpEff output file: + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); } public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { @@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } - private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { - RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); - - VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); - VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); - + private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) { if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) { throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " + "input file, and so could not verify that the file was generated by a supported version of SnpEff (" + @@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } + private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) { + if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) { + throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " + + "input file, which should be added by all supported versions of SnpEff (" + + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")"); + } + } + private boolean isSupportedSnpEffVersion ( String versionString ) { for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) { if ( supportedVersion.equals(versionString) ) { @@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio List parsedEffects = new ArrayList(); Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY); - List individualEffects; + if ( effectFieldValue == null ) { + return parsedEffects; + } // The VCF codec stores multi-valued fields as a List, and single-valued fields as a String. // We can have either in the case of SnpEff, since there may be one or more than one effect in this record. + List individualEffects; if ( effectFieldValue instanceof List ) { individualEffects = (List)effectFieldValue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index fb3dbc3cf..f6a1c4f31 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker implements Ann engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit()); engine.initializeExpressions(expressionsToUse); - engine.invokeAnnotationInitializationMethods(); - // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); @@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker implements Ann hInfo.add(line); } + engine.invokeAnnotationInitializationMethods(hInfo); + VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 68cd07803..e5effe6d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -114,13 +114,13 @@ public class VariantAnnotatorEngine { dbAnnotations.put(rod, rod.getName()); } - public void invokeAnnotationInitializationMethods() { + public void invokeAnnotationInitializationMethods( Set headerLines ) { for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java index 160a3d258..521f89016 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.List; +import java.util.Set; @DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") public abstract class VariantAnnotatorAnnotation { @@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation { public abstract List getKeyNames(); // initialization method (optional for subclasses, and therefore non-abstract) - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 08baae7a7..2c06c6b7f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", 1, - Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9") + Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") ); executeTest("Testing SnpEff annotations", spec); }