SnpEff annotator now adds OriginalSnpEffVersion and OriginalSnpEffCmd lines to the header of the VCF output file.

This change is urgently required for production, which is why it's going into Stable+Unstable
instead of just Unstable.

The keys for the SnpEff version and command header lines in the VCF file output by
VariantAnnotator (OriginalSnpEffVersion and OriginalSnpEffCmd) are intentionally
different from the keys for those same lines in the SnpEff output file (SnpEffVersion
and SnpEffCmd), so that output files from VariantAnnotator won't be confused
with output files from SnpEff itself.
This commit is contained in:
David Roazen 2011-09-20 15:22:27 -04:00
parent 61b89e236a
commit d9ea764611
5 changed files with 48 additions and 17 deletions

View File

@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
// lacking a SnpEff version number in the VCF header: // lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" }; public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion"; public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
// When we write the SnpEff version number and command line to the output VCF, we change
// the key name slightly so that the output VCF won't be confused in the future for an
// output file produced by SnpEff directly:
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
// SnpEff aggregates all effects (and effect metadata) together into a single INFO // SnpEff aggregates all effects (and effect metadata) together into a single INFO
// field annotation with the key EFF: // field annotation with the key EFF:
@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
UNKNOWN UNKNOWN
} }
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { // Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding()); validateRodBinding(walker.getSnpEffRodBinding());
checkSnpEffVersion(walker, toolkit); RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersion(snpEffVersionLine);
checkSnpEffCommandLine(snpEffCommandLine);
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
} }
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) { public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
} }
} }
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) { if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " + throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" + "input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
} }
} }
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, which should be added by all supported versions of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) { private boolean isSupportedSnpEffVersion ( String versionString ) {
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) { for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) { if ( supportedVersion.equals(versionString) ) {
@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>(); List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY); Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
List<String> individualEffects; if ( effectFieldValue == null ) {
return parsedEffects;
}
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String. // The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record. // We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
List<String> individualEffects;
if ( effectFieldValue instanceof List ) { if ( effectFieldValue instanceof List ) {
individualEffects = (List<String>)effectFieldValue; individualEffects = (List<String>)effectFieldValue;
} }

View File

@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit()); engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse); engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();
// setup the header fields // setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones // note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>(); Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
hInfo.add(line); hInfo.add(line);
} }
engine.invokeAnnotationInitializationMethods(hInfo);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples); VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader); vcfWriter.writeHeader(vcfHeader);

View File

@ -114,13 +114,13 @@ public class VariantAnnotatorEngine {
dbAnnotations.put(rod, rod.getName()); dbAnnotations.put(rod, rod.getName());
} }
public void invokeAnnotationInitializationMethods() { public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) { for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
annotation.initialize(walker, toolkit); annotation.initialize(walker, toolkit, headerLines);
} }
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) { for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.initialize(walker, toolkit); annotation.initialize(walker, toolkit, headerLines);
} }
} }

View File

@ -25,9 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces; package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine; import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature; import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.List; import java.util.List;
import java.util.Set;
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations") @DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
public abstract class VariantAnnotatorAnnotation { public abstract class VariantAnnotatorAnnotation {
@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation {
public abstract List<String> getKeyNames(); public abstract List<String> getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract) // initialization method (optional for subclasses, and therefore non-abstract)
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { } public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
} }

View File

@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation + validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000", "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1, 1,
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9") Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
); );
executeTest("Testing SnpEff annotations", spec); executeTest("Testing SnpEff annotations", spec);
} }