SnpEff annotator now adds OriginalSnpEffVersion and OriginalSnpEffCmd lines to the header of the VCF output file.
This change is urgently required for production, which is why it's going into Stable+Unstable instead of just Unstable. The keys for the SnpEff version and command header lines in the VCF file output by VariantAnnotator (OriginalSnpEffVersion and OriginalSnpEffCmd) are intentionally different from the keys for those same lines in the SnpEff output file (SnpEffVersion and SnpEffCmd), so that output files from VariantAnnotator won't be confused with output files from SnpEff itself.
This commit is contained in:
parent
61b89e236a
commit
d9ea764611
|
|
@ -58,6 +58,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
|||
// lacking a SnpEff version number in the VCF header:
|
||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
||||
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||
public static final String SNPEFF_VCF_HEADER_COMMAND_LINE_KEY = "SnpEffCmd";
|
||||
|
||||
// When we write the SnpEff version number and command line to the output VCF, we change
|
||||
// the key name slightly so that the output VCF won't be confused in the future for an
|
||||
// output file produced by SnpEff directly:
|
||||
public static final String OUTPUT_VCF_HEADER_VERSION_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_VERSION_LINE_KEY;
|
||||
public static final String OUTPUT_VCF_HEADER_COMMAND_LINE_KEY = "Original" + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY;
|
||||
|
||||
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
|
||||
// field annotation with the key EFF:
|
||||
|
|
@ -165,10 +172,26 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
|||
UNKNOWN
|
||||
}
|
||||
|
||||
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) {
|
||||
// Make sure that we actually have a valid SnpEff rod binding (just in case the user specified -A SnpEff
|
||||
// without providing a SnpEff rod via --snpEffFile):
|
||||
validateRodBinding(walker.getSnpEffRodBinding());
|
||||
checkSnpEffVersion(walker, toolkit);
|
||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
|
||||
// Make sure that the SnpEff version number and command-line header lines are present in the VCF header of
|
||||
// the SnpEff rod, and that the file was generated by a supported version of SnpEff:
|
||||
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
||||
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||
VCFHeaderLine snpEffCommandLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_COMMAND_LINE_KEY);
|
||||
|
||||
checkSnpEffVersion(snpEffVersionLine);
|
||||
checkSnpEffCommandLine(snpEffCommandLine);
|
||||
|
||||
// If everything looks ok, add the SnpEff version number and command-line header lines to the
|
||||
// header of the VCF output file, changing the key names so that our output file won't be
|
||||
// mistaken in the future for a SnpEff output file:
|
||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_VERSION_LINE_KEY, snpEffVersionLine.getValue()));
|
||||
headerLines.add(new VCFHeaderLine(OUTPUT_VCF_HEADER_COMMAND_LINE_KEY, snpEffCommandLine.getValue()));
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||
|
|
@ -204,12 +227,7 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
|||
}
|
||||
}
|
||||
|
||||
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
|
||||
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
||||
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||
|
||||
private void checkSnpEffVersion ( VCFHeaderLine snpEffVersionLine ) {
|
||||
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
||||
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
|
||||
|
|
@ -224,6 +242,14 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
|||
}
|
||||
}
|
||||
|
||||
private void checkSnpEffCommandLine ( VCFHeaderLine snpEffCommandLine ) {
|
||||
if ( snpEffCommandLine == null || snpEffCommandLine.getValue() == null || snpEffCommandLine.getValue().trim().length() == 0 ) {
|
||||
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_COMMAND_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||
"input file, which should be added by all supported versions of SnpEff (" +
|
||||
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isSupportedSnpEffVersion ( String versionString ) {
|
||||
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
|
||||
if ( supportedVersion.equals(versionString) ) {
|
||||
|
|
@ -248,10 +274,13 @@ public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotatio
|
|||
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
|
||||
|
||||
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
|
||||
List<String> individualEffects;
|
||||
if ( effectFieldValue == null ) {
|
||||
return parsedEffects;
|
||||
}
|
||||
|
||||
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
|
||||
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
|
||||
List<String> individualEffects;
|
||||
if ( effectFieldValue instanceof List ) {
|
||||
individualEffects = (List<String>)effectFieldValue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -208,8 +208,6 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
|
||||
engine.initializeExpressions(expressionsToUse);
|
||||
|
||||
engine.invokeAnnotationInitializationMethods();
|
||||
|
||||
// setup the header fields
|
||||
// note that if any of the definitions conflict with our new ones, then we want to overwrite the old ones
|
||||
Set<VCFHeaderLine> hInfo = new HashSet<VCFHeaderLine>();
|
||||
|
|
@ -219,6 +217,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
hInfo.add(line);
|
||||
}
|
||||
|
||||
engine.invokeAnnotationInitializationMethods(hInfo);
|
||||
|
||||
VCFHeader vcfHeader = new VCFHeader(hInfo, samples);
|
||||
vcfWriter.writeHeader(vcfHeader);
|
||||
|
||||
|
|
|
|||
|
|
@ -114,13 +114,13 @@ public class VariantAnnotatorEngine {
|
|||
dbAnnotations.put(rod, rod.getName());
|
||||
}
|
||||
|
||||
public void invokeAnnotationInitializationMethods() {
|
||||
public void invokeAnnotationInitializationMethods( Set<VCFHeaderLine> headerLines ) {
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||
annotation.initialize(walker, toolkit);
|
||||
annotation.initialize(walker, toolkit, headerLines);
|
||||
}
|
||||
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.initialize(walker, toolkit);
|
||||
annotation.initialize(walker, toolkit, headerLines);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -25,9 +25,11 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
@DocumentedGATKFeature(enable = true, groupName = "VariantAnnotator annotations", summary = "VariantAnnotator annotations")
|
||||
public abstract class VariantAnnotatorAnnotation {
|
||||
|
|
@ -35,5 +37,5 @@ public abstract class VariantAnnotatorAnnotation {
|
|||
public abstract List<String> getKeyNames();
|
||||
|
||||
// initialization method (optional for subclasses, and therefore non-abstract)
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit, Set<VCFHeaderLine> headerLines ) { }
|
||||
}
|
||||
|
|
@ -134,7 +134,7 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
|
||||
1,
|
||||
Arrays.asList("486fc6a5ca1819f5ab180d5d72b1ebc9")
|
||||
Arrays.asList("ed9d1b37b0bd8b65ff9ce2688e0e102e")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue