Added an integration test for the SnpEff annotation support, as well as some extra safety checks and comments.
This commit is contained in:
parent
5e288136e0
commit
a13bc7b929
|
|
@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator;
|
|||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
|
|
@ -38,9 +38,22 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
|||
|
||||
import java.util.*;
|
||||
|
||||
public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||
/**
|
||||
* A set of genomic annotations based on the output of the SnpEff variant effect predictor tool
|
||||
* (http://snpeff.sourceforge.net/).
|
||||
*
|
||||
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
|
||||
* output file (which must be bound to an RMD track named "SnpEff"), and adds annotations
|
||||
* on that effect.
|
||||
*
|
||||
* The possible biological effects and their associated impacts are defined in the class:
|
||||
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
// SnpEff field keys:
|
||||
// SnpEff annotation key names:
|
||||
public static final String GENE_ID_KEY = "GENE_ID";
|
||||
public static final String GENE_NAME_KEY = "GENE_NAME";
|
||||
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
|
||||
|
|
@ -55,11 +68,14 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation {
|
|||
public static final String CODON_NUM_KEY = "CODON_NUM";
|
||||
public static final String CDS_SIZE_KEY = "CDS_SIZE";
|
||||
|
||||
// Name of the RMD track bound to the raw SnpEff-generated output file:
|
||||
public static final String RMD_TRACK_NAME = "SnpEff";
|
||||
|
||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||
List<Object> snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME);
|
||||
|
||||
// Add only annotations for one of the most biologically-significant effects as defined in
|
||||
// the SnpEffConstants class:
|
||||
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures);
|
||||
return generateAnnotations(mostSignificantEffect);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -34,6 +34,40 @@ import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygos
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Codec for decoding the output format of the SnpEff variant effect predictor tool
|
||||
* (http://snpeff.sourceforge.net/).
|
||||
*
|
||||
* This format has 23 tab-delimited fields:
|
||||
*
|
||||
* Chromosome
|
||||
* Position
|
||||
* Reference
|
||||
* Change
|
||||
* Change Type: {SNP, MNP, INS, DEL}
|
||||
* Zygosity: {Hom, Het}
|
||||
* Quality
|
||||
* Coverage
|
||||
* Warnings
|
||||
* Gene ID
|
||||
* Gene Name
|
||||
* Bio Type
|
||||
* Transcript ID
|
||||
* Exon ID
|
||||
* Exon Rank
|
||||
* Effect
|
||||
* Old/New Amino Acid
|
||||
* Old/New Codon
|
||||
* Codon Num
|
||||
* CDS Size
|
||||
* Codons Around
|
||||
* Amino Acids Around
|
||||
* Custom Interval ID
|
||||
*
|
||||
* We treat all except the Chromosome, Position, and Effect fields as optional.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffCodec implements FeatureCodec {
|
||||
|
||||
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
|
||||
|
|
@ -64,9 +98,13 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
"AAs around",
|
||||
"Custom_interval_ID"
|
||||
};
|
||||
|
||||
// The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
|
||||
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
|
||||
|
||||
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
|
||||
|
||||
|
||||
public Feature decodeLoc ( String line ) {
|
||||
return decode(line);
|
||||
}
|
||||
|
|
@ -101,6 +139,11 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
|
||||
|
||||
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
|
||||
|
||||
// Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
|
||||
// otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
|
||||
// in the final field (when present) from being inappropriately tokenized:
|
||||
|
||||
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
|
||||
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
|
||||
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
|
||||
|
|
@ -150,6 +193,9 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
String effectName = "";
|
||||
|
||||
// If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
|
||||
// otherwise it will be in the first subfield:
|
||||
|
||||
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
|
||||
effectName = effectFieldTokens[1].trim();
|
||||
}
|
||||
|
|
@ -161,6 +207,9 @@ public class SnpEffCodec implements FeatureCodec {
|
|||
}
|
||||
|
||||
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
|
||||
// The extra effect-related information, if present, will always be the last subfield:
|
||||
|
||||
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||
return effectFieldTokens[effectFieldTokens.length - 1].trim();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,8 +24,14 @@
|
|||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
/**
|
||||
* A set of constants associated with the SnpEff codec.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffConstants {
|
||||
|
||||
// Possible SnpEff biological effects and their associated impacts:
|
||||
public enum EffectType {
|
||||
START_GAINED (EffectImpact.HIGH),
|
||||
START_LOST (EffectImpact.HIGH),
|
||||
|
|
@ -93,6 +99,7 @@ public class SnpEffConstants {
|
|||
}
|
||||
}
|
||||
|
||||
// The kinds of variants supported by the SnpEff output format:
|
||||
public enum ChangeType {
|
||||
SNP,
|
||||
MNP,
|
||||
|
|
@ -100,6 +107,7 @@ public class SnpEffConstants {
|
|||
DEL
|
||||
}
|
||||
|
||||
// Possible zygosities of SnpEff variants:
|
||||
public enum Zygosity {
|
||||
Hom,
|
||||
Het
|
||||
|
|
|
|||
|
|
@ -26,15 +26,26 @@ package org.broadinstitute.sting.utils.codecs.snpEff;
|
|||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
/**
|
||||
* Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
|
||||
*
|
||||
* Many fields are optional, and missing values are represented by nulls. You should always call the
|
||||
* hasX() method before calling the corresponding getX() method. Required fields can never be null
|
||||
* and do not have a hasX() method.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffFeature implements Feature {
|
||||
|
||||
private String contig;
|
||||
private long position;
|
||||
private String contig; // REQUIRED FIELD
|
||||
private long position; // REQUIRED FIELD
|
||||
private String reference;
|
||||
private String change;
|
||||
private ChangeType changeType;
|
||||
|
|
@ -48,8 +59,8 @@ public class SnpEffFeature implements Feature {
|
|||
private String transcriptID;
|
||||
private String exonID;
|
||||
private Integer exonRank;
|
||||
private boolean isNonCodingGene;
|
||||
private EffectType effect;
|
||||
private boolean isNonCodingGene; // REQUIRED FIELD
|
||||
private EffectType effect; // REQUIRED FIELD
|
||||
private String effectExtraInformation;
|
||||
private String oldAndNewAA;
|
||||
private String oldAndNewCodon;
|
||||
|
|
@ -85,6 +96,10 @@ public class SnpEffFeature implements Feature {
|
|||
String aasAround,
|
||||
String customIntervalID ) {
|
||||
|
||||
if ( contig == null || effect == null ) {
|
||||
throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
|
||||
}
|
||||
|
||||
this.contig = contig;
|
||||
this.position = position;
|
||||
this.reference = reference;
|
||||
|
|
@ -113,6 +128,10 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public boolean isHigherImpactThan ( SnpEffFeature other ) {
|
||||
|
||||
// If one effect is in a non-coding gene and the other is not, the effect NOT in the
|
||||
// non-coding gene has higher impact:
|
||||
|
||||
if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
|
||||
return true;
|
||||
}
|
||||
|
|
@ -120,6 +139,9 @@ public class SnpEffFeature implements Feature {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
|
||||
// of the effects themselves as defined in the SnpEffConstants class:
|
||||
|
||||
return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
|
||||
}
|
||||
|
||||
|
|
@ -140,6 +162,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getReference() {
|
||||
if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
|
||||
return reference;
|
||||
}
|
||||
|
||||
|
|
@ -148,6 +171,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getChange() {
|
||||
if ( change == null ) throw new NoSuchElementException("This feature has no change field");
|
||||
return change;
|
||||
}
|
||||
|
||||
|
|
@ -156,6 +180,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public ChangeType getChangeType() {
|
||||
if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
|
||||
return changeType;
|
||||
}
|
||||
|
||||
|
|
@ -164,6 +189,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Zygosity getZygosity() {
|
||||
if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
|
||||
return zygosity;
|
||||
}
|
||||
|
||||
|
|
@ -172,6 +198,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Double getQuality() {
|
||||
if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
|
||||
return quality;
|
||||
}
|
||||
|
||||
|
|
@ -180,6 +207,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Long getCoverage() {
|
||||
if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
|
||||
return coverage;
|
||||
}
|
||||
|
||||
|
|
@ -188,6 +216,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getWarnings() {
|
||||
if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
|
||||
return warnings;
|
||||
}
|
||||
|
||||
|
|
@ -196,6 +225,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getGeneID() {
|
||||
if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
|
||||
return geneID;
|
||||
}
|
||||
|
||||
|
|
@ -204,6 +234,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getGeneName() {
|
||||
if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
|
||||
return geneName;
|
||||
}
|
||||
|
||||
|
|
@ -212,6 +243,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getBioType() {
|
||||
if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
|
||||
return bioType;
|
||||
}
|
||||
|
||||
|
|
@ -220,6 +252,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getTranscriptID() {
|
||||
if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
|
||||
return transcriptID;
|
||||
}
|
||||
|
||||
|
|
@ -228,6 +261,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getExonID() {
|
||||
if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
|
||||
return exonID;
|
||||
}
|
||||
|
||||
|
|
@ -236,6 +270,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Integer getExonRank() {
|
||||
if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
|
||||
return exonRank;
|
||||
}
|
||||
|
||||
|
|
@ -256,6 +291,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getEffectExtraInformation() {
|
||||
if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
|
||||
return effectExtraInformation;
|
||||
}
|
||||
|
||||
|
|
@ -264,6 +300,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getOldAndNewAA() {
|
||||
if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
|
||||
return oldAndNewAA;
|
||||
}
|
||||
|
||||
|
|
@ -272,6 +309,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getOldAndNewCodon() {
|
||||
if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
|
||||
return oldAndNewCodon;
|
||||
}
|
||||
|
||||
|
|
@ -280,6 +318,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Integer getCodonNum() {
|
||||
if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
|
||||
return codonNum;
|
||||
}
|
||||
|
||||
|
|
@ -288,6 +327,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public Integer getCdsSize() {
|
||||
if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
|
||||
return cdsSize;
|
||||
}
|
||||
|
||||
|
|
@ -296,6 +336,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getCodonsAround() {
|
||||
if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
|
||||
return codonsAround;
|
||||
}
|
||||
|
||||
|
|
@ -304,6 +345,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getAasAround() {
|
||||
if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
|
||||
return aasAround;
|
||||
}
|
||||
|
||||
|
|
@ -312,6 +354,7 @@ public class SnpEffFeature implements Feature {
|
|||
}
|
||||
|
||||
public String getCustomIntervalID() {
|
||||
if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
|
||||
return customIntervalID;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,4 +125,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSnpEffAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -o %s -A SnpEff -B:variant,VCF " + validationDataLocation + "/1000G.exomes.vcf " +
|
||||
"-B:SnpEff,SnpEff " + validationDataLocation + "/snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out" + " -L 1",
|
||||
1,
|
||||
Arrays.asList("5fe3644744d3c084a179c3d204555333")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue