diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index 4ead77506..8f99c6118 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio // lacking a SnpEff version number in the VCF header: public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" }; public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion"; + public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd"; + + // When we write the SnpEff version number and command line to the output VCF, we change + // the key name slightly so that the output VCF won't be confused in the future for an + // output file produced by SnpEff directly: + public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY; + public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY; // SnpEff aggregates all effects (and effect metadata) together into a single INFO // field annotation with the key EFF: @@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio UNKNOWN } - - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { + // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff + // without providing a SnpEff rod via --snpEffFile): validateRodBinding(walker.getSnpEffRodBinding()); - checkSnpEffVersion(walker, toolkit); + RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); + + // Make sure that the SnpEff version number and command-line header lines are present in the VCF header of + // the SnpEff rod, and that the file was generated by a supported version of SnpEff: + VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); + VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); + VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY); + + checkSnpEffVersion(snpEffVersionLine); + checkSnpEffCommandLine(snpEffCommandLine); + + // If everything looks ok, add the SnpEff version number and command-line header lines to the + // header of the VCF output file, changing the key names so that our output file won't be + // mistaken in the future for a SnpEff output file: + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue())); + headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue())); } public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { @@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } - private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { - RodBinding snpEffRodBinding = walker.getSnpEffRodBinding(); - - VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName()); - VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY); - + private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) { if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) { throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " + "input file, and so could not verify that the file was generated by a supported version of SnpEff (" + @@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio } } + private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) { + if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) { + throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " + + "input file, which should be added by all supported versions of SnpEff (" + + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")"); + } + } + private boolean isSupportedSnpEffVersion ( String versionString ) { for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) { if ( supportedVersion.equals(versionString) ) { @@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio List parsedEffects = new ArrayList(); Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY); - List individualEffects; + if ( effectFieldValue == null ) { + return parsedEffects; + } // The VCF codec stores multi-valued fields as a List, and single-valued fields as a String. // We can have either in the case of SnpEff, since there may be one or more than one effect in this record. + List individualEffects; if ( effectFieldValue instanceof List ) { individualEffects = (List)effectFieldValue; } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java index fb3dbc3cf..f6a1c4f31 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java @@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker implements Ann engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit()); engine.initializeExpressions(expressionsToUse); - engine.invokeAnnotationInitializationMethods(); - // setup the header fields // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones Set hInfo = new HashSet(); @@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker implements Ann hInfo.add(line); } + engine.invokeAnnotationInitializationMethods(hInfo); + VCFHeader vcfHeader = new VCFHeader(hInfo, samples); vcfWriter.writeHeader(vcfHeader); diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java index 68cd07803..e5effe6d8 100755 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java @@ -114,13 +114,13 @@ public class VariantAnnotatorEngine { dbAnnotations.put(rod, rod.getName()); } - public void invokeAnnotationInitializationMethods() { + public void invokeAnnotationInitializationMethods( Set headerLines ) { for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) { - annotation.initialize(walker, toolkit); + annotation.initialize(walker, toolkit, headerLines); } } diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java index 160a3d258..521f89016 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java @@ -25,9 +25,11 @@ package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import java.util.List; +import java.util.Set; @DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") public abstract class VariantAnnotatorAnnotation { @@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation { public abstract List getKeyNames(); // initialization method (optional for subclasses, and therefore non-abstract) - public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { } + public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set headerLines ) { } } \ No newline at end of file diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 08baae7a7..2c06c6b7f 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", 1, - Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9") + Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e") ); executeTest("Testing SnpEff annotations", spec); }