SnpEff annotator now adds OriginalSnpEffVersion and OriginalSnpEffCmd lines to the header of the VCF output file.

This change is urgently required for production, which is why it's going into Stable+Unstable
instead of just Unstable.

The keys for the SnpEff version and command header lines in the VCF file output by
VariantAnnotator (OriginalSnpEffVersion and OriginalSnpEffCmd) are intentionally
different from the keys for those same lines in the SnpEff output file (SnpEffVersion
and SnpEffCmd), so that output files from VariantAnnotator won't be confused
with output files from SnpEff itself.
This commit is contained in:
David Roazen 2011-09-20 15:22:27 -04:00
parent 61b89e236a
commit d9ea764611
5 changed files with 48 additions and 17 deletions

View File

@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
// When we write the SnpEff version number and command line to the output VCF, we change
// the key name slightly so that the output VCF won't be confused in the future for an
// output file produced by SnpEff directly:
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
// field annotation with the key EFF:
@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
UNKNOWN
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
// without providing a SnpEff rod via --snpEffFile):
validateRodBinding(walker.getSnpEffRodBinding());
checkSnpEffVersion(walker, toolkit);
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
checkSnpEffVersion(snpEffVersionLine);
checkSnpEffCommandLine(snpEffCommandLine);
// If everything looks ok, add the SnpEff version number and command-line header lines to the
// header of the VCF output file, changing the key names so that our output file won't be
// mistaken in the future for a SnpEff output file:
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
}
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
}
}
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
}
}
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, which should be added by all supported versions of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) {
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) {
@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
List<String> individualEffects;
if ( effectFieldValue == null ) {
return parsedEffects;
}
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
List<String> individualEffects;
if ( effectFieldValue instanceof List ) {
individualEffects = (List<String>)effectFieldValue;
}

View File

@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();
// setup the header fields
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
hInfo.add(line);
}
engine.invokeAnnotationInitializationMethods(hInfo);
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
vcfWriter.writeHeader(vcfHeader);

View File

@ -114,13 +114,13 @@ public class VariantAnnotatorEngine {
dbAnnotations.put(rod, rod.getName());
}
public void invokeAnnotationInitializationMethods() {
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
annotation.initialize(walker, toolkit);
annotation.initialize(walker, toolkit, headerLines);
}
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.initialize(walker, toolkit);
annotation.initialize(walker, toolkit, headerLines);
}
}

View File

@ -25,9 +25,11 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.List;
import java.util.Set;
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
public abstract class VariantAnnotatorAnnotation {
@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation {
public abstract List<String> getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract)
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
}

View File

@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1,
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
);
executeTest("Testing SnpEff annotations", spec);
}