diff --git a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
index 30b2f828d..8e241bb2c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/filters/PlatformFilter.java
@@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1
*/
public class PlatformFilter extends ReadFilter {
- @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
+ @Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) {
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
index fdfac6bf7..4f072e88c 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/PrintReadsWalker.java
@@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
+ *
+ * java -Xmx2g -jar GenomeAnalysisTK.jar \
+ * -R ref.fasta \
+ * -T PrintReads \
+ * -o output.bam \
+ * -I input.bam \
+ * -n 2000
*
*
*/
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
index 350c683c2..bb3685fb5 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java
@@ -24,7 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
+import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.Utils;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
-import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
+import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -46,134 +45,421 @@ import java.util.*;
* (http://snpeff.sourceforge.net/).
*
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
- * output file (which must be provided on the command line via --snpEffFile:SnpEff ),
+ * output file (which must be provided on the command line via --snpEffFile filename.vcf),
* and adds annotations on that effect.
*
- * The possible biological effects and their associated impacts are defined in the class:
- * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
- *
* @author David Roazen
*/
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
- // SnpEff annotation key names:
- public static final String GENE_ID_KEY = "GENE_ID";
- public static final String GENE_NAME_KEY = "GENE_NAME";
- public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
- public static final String EXON_ID_KEY = "EXON_ID";
- public static final String EXON_RANK_KEY = "EXON_RANK";
- public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
- public static final String EFFECT_KEY = "EFFECT";
- public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
- public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
- public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
- public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
- public static final String CODON_NUM_KEY = "CODON_NUM";
- public static final String CDS_SIZE_KEY = "CDS_SIZE";
+ private static Logger logger = Logger.getLogger(SnpEff.class);
+
+ // We refuse to parse SnpEff output files generated by unsupported versions, or
+ // lacking a SnpEff version number in the VCF header:
+ public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
+ public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
+
+ // SnpEff aggregates all effects (and effect metadata) together into a single INFO
+ // field annotation with the key EFF:
+ public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
+ public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
+ public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
+
+ // Key names for the INFO field annotations we will add to each record, along
+ // with parsing-related information:
+ public enum InfoFieldKey {
+ EFF (-1),
+ EFF_IMPACT (0),
+ EFF_CODON_CHANGE (1),
+ EFF_AMINO_ACID_CHANGE (2),
+ EFF_GENE_NAME (3),
+ EFF_GENE_BIOTYPE (4),
+ EFF_TRANSCRIPT_ID (6),
+ EFF_EXON_ID (7);
+
+ // Index within the effect metadata subfields from the SnpEff EFF annotation
+ // where each key's associated value can be found during parsing.
+ private final int fieldIndex;
+
+ InfoFieldKey ( int fieldIndex ) {
+ this.fieldIndex = fieldIndex;
+ }
+
+ public int getFieldIndex() {
+ return fieldIndex;
+ }
+ }
+
+ // Possible SnpEff biological effects. All effect names found in the SnpEff input file
+ // are validated against this list.
+ public enum EffectType {
+ NONE,
+ CHROMOSOME,
+ INTERGENIC,
+ UPSTREAM,
+ UTR_5_PRIME,
+ UTR_5_DELETED,
+ START_GAINED,
+ SPLICE_SITE_ACCEPTOR,
+ SPLICE_SITE_DONOR,
+ START_LOST,
+ SYNONYMOUS_START,
+ NON_SYNONYMOUS_START,
+ CDS,
+ GENE,
+ TRANSCRIPT,
+ EXON,
+ EXON_DELETED,
+ NON_SYNONYMOUS_CODING,
+ SYNONYMOUS_CODING,
+ FRAME_SHIFT,
+ CODON_CHANGE,
+ CODON_INSERTION,
+ CODON_CHANGE_PLUS_CODON_INSERTION,
+ CODON_DELETION,
+ CODON_CHANGE_PLUS_CODON_DELETION,
+ STOP_GAINED,
+ SYNONYMOUS_STOP,
+ NON_SYNONYMOUS_STOP,
+ STOP_LOST,
+ INTRON,
+ UTR_3_PRIME,
+ UTR_3_DELETED,
+ DOWNSTREAM,
+ INTRON_CONSERVED,
+ INTERGENIC_CONSERVED,
+ REGULATION,
+ CUSTOM,
+ WITHIN_NON_CODING_GENE
+ }
+
+ // SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
+ public enum EffectImpact {
+ LOW (1),
+ MODERATE (2),
+ HIGH (3);
+
+ private final int severityRating;
+
+ EffectImpact ( int severityRating ) {
+ this.severityRating = severityRating;
+ }
+
+ public boolean isHigherImpactThan ( EffectImpact other ) {
+ return this.severityRating > other.severityRating;
+ }
+ }
+
+ // SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
+ public enum EffectCoding {
+ CODING,
+ NON_CODING,
+ UNKNOWN
+ }
+
+
+ public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
+ validateRodBinding(walker.getSnpEffRodBinding());
+ checkSnpEffVersion(walker, toolkit);
+ }
public Map annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) {
- RodBinding snpEffRodBinding = walker.getSnpEffRodBinding();
- validateRodBinding(snpEffRodBinding);
+ RodBinding snpEffRodBinding = walker.getSnpEffRodBinding();
- List features = tracker.getValues(snpEffRodBinding, ref.getLocus());
+ // Get only SnpEff records that start at this locus, not merely span it:
+ List snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
- // Add only annotations for one of the most biologically-significant effects as defined in
- // the SnpEffConstants class:
- SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
-
- if ( mostSignificantEffect == null ) {
+ // Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
+ // If there is more than one such record, we only need to pick the first one, since the biological
+ // effects will be the same across all such records:
+ VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
+ if ( matchingRecord == null ) {
return null;
}
- return generateAnnotations(mostSignificantEffect);
+ // Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
+ List effects = parseSnpEffRecord(matchingRecord);
+ if ( effects.size() == 0 ) {
+ return null;
+ }
+
+ // Add only annotations for one of the most biologically-significant effects from this set:
+ SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
+ return mostSignificantEffect.getAnnotations();
}
- private void validateRodBinding ( RodBinding snpEffRodBinding ) {
+ private void validateRodBinding ( RodBinding snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
- throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
- "as a rodbinding on the command line, but no SnpEff rodbinding was found.");
+ throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
+ "as a rodbinding on the command line via the --snpEffFile option, but " +
+ "no SnpEff rodbinding was found.");
}
}
- private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) {
- SnpEffFeature mostSignificantEffect = null;
+ private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
+ RodBinding snpEffRodBinding = walker.getSnpEffRodBinding();
- for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
+ VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
+ VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
+
+ if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
+ throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
+ "input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
+ Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
+ }
+
+ String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
+
+ if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
+ throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
+ "is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
+ }
+ }
+
+ private boolean isSupportedSnpEffVersion ( String versionString ) {
+ for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
+ if ( supportedVersion.equals(versionString) ) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ private VariantContext getMatchingSnpEffRecord ( List snpEffRecords, VariantContext vc ) {
+ for ( VariantContext snpEffRecord : snpEffRecords ) {
+ if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
+ return snpEffRecord;
+ }
+ }
+
+ return null;
+ }
+
+ private List parseSnpEffRecord ( VariantContext snpEffRecord ) {
+ List parsedEffects = new ArrayList();
+
+ Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
+ List individualEffects;
+
+ // The VCF codec stores multi-valued fields as a List, and single-valued fields as a String.
+ // We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
+ if ( effectFieldValue instanceof List ) {
+ individualEffects = (List)effectFieldValue;
+ }
+ else {
+ individualEffects = Arrays.asList((String)effectFieldValue);
+ }
+
+ for ( String effectString : individualEffects ) {
+ String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
+
+ if ( effectNameAndMetadata.length != 2 ) {
+ logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
+ snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
+ continue;
+ }
+
+ String effectName = effectNameAndMetadata[0];
+ String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
+
+ SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
+
+ if ( parsedEffect.isWellFormed() ) {
+ parsedEffects.add(parsedEffect);
+ }
+ else {
+ logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
+ snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
+ }
+ }
+
+ return parsedEffects;
+ }
+
+ private SnpEffEffect getMostSignificantEffect ( List effects ) {
+ SnpEffEffect mostSignificantEffect = null;
+
+ for ( SnpEffEffect effect : effects ) {
if ( mostSignificantEffect == null ||
- snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
+ effect.isHigherImpactThan(mostSignificantEffect) ) {
- mostSignificantEffect = snpEffFeature;
+ mostSignificantEffect = effect;
}
}
return mostSignificantEffect;
}
- private Map generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
- Map annotations = new LinkedHashMap(Utils.optimumHashSize(getKeyNames().size()));
-
- if ( mostSignificantEffect.hasGeneID() )
- annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
- if ( mostSignificantEffect.hasGeneName() )
- annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
- if ( mostSignificantEffect.hasTranscriptID() )
- annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
- if ( mostSignificantEffect.hasExonID() )
- annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
- if ( mostSignificantEffect.hasExonRank() )
- annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
- if ( mostSignificantEffect.isNonCodingGene() )
- annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
-
- annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
- annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
- if ( mostSignificantEffect.hasEffectExtraInformation() )
- annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
-
- if ( mostSignificantEffect.hasOldAndNewAA() )
- annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
- if ( mostSignificantEffect.hasOldAndNewCodon() )
- annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
- if ( mostSignificantEffect.hasCodonNum() )
- annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
- if ( mostSignificantEffect.hasCdsSize() )
- annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
-
- return annotations;
- }
-
public List getKeyNames() {
- return Arrays.asList( GENE_ID_KEY,
- GENE_NAME_KEY,
- TRANSCRIPT_ID_KEY,
- EXON_ID_KEY,
- EXON_RANK_KEY,
- WITHIN_NON_CODING_GENE_KEY,
- EFFECT_KEY,
- EFFECT_IMPACT_KEY,
- EFFECT_EXTRA_INFORMATION_KEY,
- OLD_NEW_AA_KEY,
- OLD_NEW_CODON_KEY,
- CODON_NUM_KEY,
- CDS_SIZE_KEY
+ return Arrays.asList( InfoFieldKey.EFF.toString(),
+ InfoFieldKey.EFF_IMPACT.toString(),
+ InfoFieldKey.EFF_CODON_CHANGE.toString(),
+ InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(),
+ InfoFieldKey.EFF_GENE_NAME.toString(),
+ InfoFieldKey.EFF_GENE_BIOTYPE.toString(),
+ InfoFieldKey.EFF_TRANSCRIPT_ID.toString(),
+ InfoFieldKey.EFF_EXON_ID.toString()
);
}
public List getDescriptions() {
return Arrays.asList(
- new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
- new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
- new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
- new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
- new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
+ new VCFInfoHeaderLine(InfoFieldKey.EFF.toString(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_IMPACT.toString(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_CODON_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_NAME.toString(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_BIOTYPE.toString(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
+ new VCFInfoHeaderLine(InfoFieldKey.EFF_EXON_ID.toString(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
+
+ /**
+ * Helper class to parse, validate, and store a single SnpEff effect and its metadata.
+ */
+ protected static class SnpEffEffect {
+ private EffectType effect;
+ private EffectImpact impact;
+ private String codonChange;
+ private String aminoAcidChange;
+ private String geneName;
+ private String geneBiotype;
+ private EffectCoding coding;
+ private String transcriptID;
+ private String exonID;
+
+ private String parseError = null;
+ private boolean isWellFormed = true;
+
+ private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
+ private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
+ private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
+
+ // Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
+ // errors come after warnings, not vice versa:
+ private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
+ private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
+
+ private static final int SNPEFF_CODING_FIELD_INDEX = 5;
+
+ public SnpEffEffect ( String effectName, String[] effectMetadata ) {
+ parseEffectName(effectName);
+ parseEffectMetadata(effectMetadata);
+ }
+
+ private void parseEffectName ( String effectName ) {
+ try {
+ effect = EffectType.valueOf(effectName);
+ }
+ catch ( IllegalArgumentException e ) {
+ parseError(String.format("%s is not a recognized effect type", effectName));
+ }
+ }
+
+ private void parseEffectMetadata ( String[] effectMetadata ) {
+ if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
+ if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
+ parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
+ }
+ else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
+ parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
+ }
+ else {
+ parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
+ EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
+ }
+
+ return;
+ }
+
+ try {
+ impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]);
+ }
+ catch ( IllegalArgumentException e ) {
+ parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]));
+ }
+
+ codonChange = effectMetadata[InfoFieldKey.EFF_CODON_CHANGE.getFieldIndex()];
+ aminoAcidChange = effectMetadata[InfoFieldKey.EFF_AMINO_ACID_CHANGE.getFieldIndex()];
+ geneName = effectMetadata[InfoFieldKey.EFF_GENE_NAME.getFieldIndex()];
+ geneBiotype = effectMetadata[InfoFieldKey.EFF_GENE_BIOTYPE.getFieldIndex()];
+
+ if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
+ try {
+ coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
+ }
+ catch ( IllegalArgumentException e ) {
+ parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
+ }
+ }
+ else {
+ coding = EffectCoding.UNKNOWN;
+ }
+
+ transcriptID = effectMetadata[InfoFieldKey.EFF_TRANSCRIPT_ID.getFieldIndex()];
+ exonID = effectMetadata[InfoFieldKey.EFF_EXON_ID.getFieldIndex()];
+ }
+
+ private void parseError ( String message ) {
+ isWellFormed = false;
+
+ // Cache only the first error encountered:
+ if ( parseError == null ) {
+ parseError = message;
+ }
+ }
+
+ public boolean isWellFormed() {
+ return isWellFormed;
+ }
+
+ public String getParseError() {
+ return parseError == null ? "" : parseError;
+ }
+
+ public boolean isCoding() {
+ return coding == EffectCoding.CODING;
+ }
+
+ public boolean isHigherImpactThan ( SnpEffEffect other ) {
+ // If one effect is within a coding gene and the other is not, the effect that is
+ // within the coding gene has higher impact:
+
+ if ( isCoding() && ! other.isCoding() ) {
+ return true;
+ }
+ else if ( ! isCoding() && other.isCoding() ) {
+ return false;
+ }
+
+ // Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
+ // of the effects themselves:
+
+ return impact.isHigherImpactThan(other.impact);
+ }
+
+ public Map getAnnotations() {
+ Map annotations = new LinkedHashMap(Utils.optimumHashSize(InfoFieldKey.values().length));
+
+ addAnnotation(annotations, InfoFieldKey.EFF.toString(), effect.toString());
+ addAnnotation(annotations, InfoFieldKey.EFF_IMPACT.toString(), impact.toString());
+ addAnnotation(annotations, InfoFieldKey.EFF_CODON_CHANGE.toString(), codonChange);
+ addAnnotation(annotations, InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), aminoAcidChange);
+ addAnnotation(annotations, InfoFieldKey.EFF_GENE_NAME.toString(), geneName);
+ addAnnotation(annotations, InfoFieldKey.EFF_GENE_BIOTYPE.toString(), geneBiotype);
+ addAnnotation(annotations, InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), transcriptID);
+ addAnnotation(annotations, InfoFieldKey.EFF_EXON_ID.toString(), exonID);
+
+ return annotations;
+ }
+
+ private void addAnnotation ( Map annotations, String keyName, String keyValue ) {
+ // Only add annotations for keys associated with non-empty values:
+ if ( keyValue != null && keyValue.trim().length() > 0 ) {
+ annotations.put(keyName, keyValue);
+ }
+ }
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
index 971727727..fb3dbc3cf 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotator.java
@@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@@ -93,8 +92,8 @@ public class VariantAnnotator extends RodWalker implements Ann
* listed in the SnpEff output file for each variant.
*/
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
- public RodBinding snpEffFile;
- public RodBinding getSnpEffRodBinding() { return snpEffFile; }
+ public RodBinding snpEffFile;
+ public RodBinding getSnpEffRodBinding() { return snpEffFile; }
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
@@ -204,9 +203,9 @@ public class VariantAnnotator extends RodWalker implements Ann
}
if ( USE_ALL_ANNOTATIONS )
- engine = new VariantAnnotatorEngine(this);
+ engine = new VariantAnnotatorEngine(this, getToolkit());
else
- engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
+ engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
index 17830f129..68cd07803 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorEngine.java
@@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.RodBinding;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@@ -46,6 +47,7 @@ public class VariantAnnotatorEngine {
private HashMap, String> dbAnnotations = new HashMap, String>();
private AnnotatorCompatibleWalker walker;
+ private GenomeAnalysisEngine toolkit;
private static class VAExpression {
@@ -71,16 +73,18 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want all possible annotations
- public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
+ public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
+ this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
initializeDBs();
}
// use this constructor if you want to select specific annotations (and/or interfaces)
- public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, AnnotatorCompatibleWalker walker) {
+ public VariantAnnotatorEngine(List annotationGroupsToUse, List annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
+ this.toolkit = toolkit;
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
initializeDBs();
}
@@ -112,11 +116,11 @@ public class VariantAnnotatorEngine {
public void invokeAnnotationInitializationMethods() {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
- annotation.initialize(walker);
+ annotation.initialize(walker, toolkit);
}
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
- annotation.initialize(walker);
+ annotation.initialize(walker, toolkit);
}
}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
index 9dda57ae3..7200f841b 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/AnnotatorCompatibleWalker.java
@@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.commandline.RodBinding;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
@@ -10,8 +9,8 @@ public interface AnnotatorCompatibleWalker {
// getter methods for various used bindings
public abstract RodBinding getVariantRodBinding();
- public abstract RodBinding getSnpEffRodBinding();
+ public abstract RodBinding getSnpEffRodBinding();
public abstract RodBinding getDbsnpRodBinding();
public abstract List> getCompRodBindings();
public abstract List> getResourceRodBindings();
-}
\ No newline at end of file
+}
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
index 9e48de9c3..160a3d258 100644
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/interfaces/VariantAnnotatorAnnotation.java
@@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
+import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.List;
@@ -34,5 +35,5 @@ public abstract class VariantAnnotatorAnnotation {
public abstract List getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract)
- public void initialize ( AnnotatorCompatibleWalker walker ) { }
+ public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
}
\ No newline at end of file
diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
index 4ee2d5f44..428f97e2a 100755
--- a/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
+++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/genotyper/UnifiedGenotyper.java
@@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
-import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@@ -128,7 +127,7 @@ public class UnifiedGenotyper extends LocusWalker getDbsnpRodBinding() { return dbsnp.dbsnp; }
public RodBinding getVariantRodBinding() { return null; }
- public RodBinding getSnpEffRodBinding() { return null; }
+ public RodBinding getSnpEffRodBinding() { return null; }
public List> getCompRodBindings() { return Collections.emptyList(); }
public List> getResourceRodBindings() { return Collections.emptyList(); }
@@ -211,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker
- * This format has 23 tab-delimited fields:
- *
- *
- * Chromosome
- * Position
- * Reference
- * Change
- * Change Type: {SNP, MNP, INS, DEL}
- * Zygosity: {Hom, Het}
- * Quality
- * Coverage
- * Warnings
- * Gene ID
- * Gene Name
- * Bio Type
- * Transcript ID
- * Exon ID
- * Exon Rank
- * Effect
- * Old/New Amino Acid
- * Old/New Codon
- * Codon Num
- * CDS Size
- * Codons Around
- * Amino Acids Around
- * Custom Interval ID
- *
- * Note that we treat all except the Chromosome, Position, and Effect fields as optional.
- *
- *
- *
- * See also: @see SNPEff project page
- *
- *
- * @author David Roazen
- * @since 2011
- */
-public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
-
- public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
- public static final String FIELD_DELIMITER_PATTERN = "\\t";
- public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
- public static final String HEADER_LINE_START = "# ";
- public static final String[] HEADER_FIELD_NAMES = { "Chromo",
- "Position",
- "Reference",
- "Change",
- "Change type",
- "Homozygous",
- "Quality",
- "Coverage",
- "Warnings",
- "Gene_ID",
- "Gene_name",
- "Bio_type",
- "Trancript_ID", // yes, this is how it's spelled in the SnpEff output
- "Exon_ID",
- "Exon_Rank",
- "Effect",
- "old_AA/new_AA",
- "Old_codon/New_codon",
- "Codon_Num(CDS)",
- "CDS_size",
- "Codons around",
- "AAs around",
- "Custom_interval_ID"
- };
-
- // The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
- public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
-
- public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
-
-
- public Feature decodeLoc ( String line ) {
- return decode(line);
- }
-
- public Feature decode ( String line ) {
- String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
-
- if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
- throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
- ") number of fields: found " + tokens.length + " fields.", line);
- }
-
- try {
- trimAllFields(tokens);
- checkForRequiredFields(tokens, line);
-
- String contig = tokens[0];
- long position = Long.parseLong(tokens[1]);
-
- String reference = tokens[2].isEmpty() ? null : tokens[2];
- String change = tokens[3].isEmpty() ? null : tokens[3];
- ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
- Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
- Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
- Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
- String warnings = tokens[8].isEmpty() ? null : tokens[8];
- String geneID = tokens[9].isEmpty() ? null : tokens[9];
- String geneName = tokens[10].isEmpty() ? null : tokens[10];
- String bioType = tokens[11].isEmpty() ? null : tokens[11];
- String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
- String exonID = tokens[13].isEmpty() ? null : tokens[13];
- Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
-
- boolean isNonCodingGene = isNonCodingGene(tokens[15]);
-
- // Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
- // otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
- // in the final field (when present) from being inappropriately tokenized:
-
- int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
- String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
- EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
- String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
-
- String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
- String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
- Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
- Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
- String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
- String aasAround = tokens[21].isEmpty() ? null : tokens[21];
- String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
-
- return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
- warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
- effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
- codonsAround, aasAround, customIntervalID);
- }
- catch ( NumberFormatException e ) {
- throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
- }
- catch ( IllegalArgumentException e ) {
- throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
- }
- }
-
- private void trimAllFields ( String[] tokens ) {
- for ( int i = 0; i < tokens.length; i++ ) {
- tokens[i] = tokens[i].trim();
- }
- }
-
- private void checkForRequiredFields ( String[] tokens, String line ) {
- for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
- if ( tokens[requiredFieldIndex].isEmpty() ) {
- throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
- HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
- line);
- }
- }
- }
-
- private boolean isNonCodingGene ( String effectField ) {
- return effectField.startsWith(NON_CODING_GENE_FLAG);
- }
-
- private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
- String effectName = "";
-
- // If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
- // otherwise it will be in the first subfield:
-
- if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
- effectName = effectFieldTokens[1].trim();
- }
- else {
- effectName = effectFieldTokens[0].trim();
- }
-
- return EffectType.valueOf(effectName);
- }
-
- private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
-
- // The extra effect-related information, if present, will always be the last subfield:
-
- if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
- return effectFieldTokens[effectFieldTokens.length - 1].trim();
- }
-
- return null;
- }
-
- public Class getFeatureType() {
- return SnpEffFeature.class;
- }
-
- public Object readHeader ( LineReader reader ) {
- String headerLine = "";
-
- try {
- headerLine = reader.readLine();
- }
- catch ( IOException e ) {
- throw new TribbleException("Unable to read header line from input file.");
- }
-
- validateHeaderLine(headerLine);
- return headerLine;
- }
-
- private void validateHeaderLine ( String headerLine ) {
- if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
- throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
- }
-
- String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
-
- if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
- throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
- EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
- }
-
- for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
- if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
- throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
- HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
- headerTokens[columnIndex] + "\"");
- }
- }
- }
-
- public boolean canDecode ( final File potentialInput ) {
- try {
- LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
- readHeader(reader);
- }
- catch ( Exception e ) {
- return false;
- }
-
- return true;
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java
deleted file mode 100644
index 270db470f..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.codecs.snpEff;
-
-/**
- * A set of constants associated with the SnpEff codec.
- *
- * @author David Roazen
- */
-public class SnpEffConstants {
-
- // Possible SnpEff biological effects and their associated impacts:
- public enum EffectType {
- START_GAINED (EffectImpact.HIGH),
- START_LOST (EffectImpact.HIGH),
- EXON_DELETED (EffectImpact.HIGH),
- FRAME_SHIFT (EffectImpact.HIGH),
- STOP_GAINED (EffectImpact.HIGH),
- STOP_LOST (EffectImpact.HIGH),
- SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
- SPLICE_SITE_DONOR (EffectImpact.HIGH),
-
- NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
- UTR_5_DELETED (EffectImpact.MODERATE),
- UTR_3_DELETED (EffectImpact.MODERATE),
- CODON_INSERTION (EffectImpact.MODERATE),
- CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
- CODON_DELETION (EffectImpact.MODERATE),
- CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
-
- NONE (EffectImpact.LOW),
- CHROMOSOME (EffectImpact.LOW),
- INTERGENIC (EffectImpact.LOW),
- UPSTREAM (EffectImpact.LOW),
- UTR_5_PRIME (EffectImpact.LOW),
- SYNONYMOUS_START (EffectImpact.LOW),
- NON_SYNONYMOUS_START (EffectImpact.LOW),
- CDS (EffectImpact.LOW),
- GENE (EffectImpact.LOW),
- TRANSCRIPT (EffectImpact.LOW),
- EXON (EffectImpact.LOW),
- SYNONYMOUS_CODING (EffectImpact.LOW),
- CODON_CHANGE (EffectImpact.LOW),
- SYNONYMOUS_STOP (EffectImpact.LOW),
- NON_SYNONYMOUS_STOP (EffectImpact.LOW),
- INTRON (EffectImpact.LOW),
- UTR_3_PRIME (EffectImpact.LOW),
- DOWNSTREAM (EffectImpact.LOW),
- INTRON_CONSERVED (EffectImpact.LOW),
- INTERGENIC_CONSERVED (EffectImpact.LOW),
- CUSTOM (EffectImpact.LOW);
-
- private final EffectImpact impact;
-
- EffectType ( EffectImpact impact ) {
- this.impact = impact;
- }
-
- public EffectImpact getImpact() {
- return impact;
- }
- }
-
- public enum EffectImpact {
- LOW (1),
- MODERATE (2),
- HIGH (3);
-
- private final int severityRating;
-
- EffectImpact ( int severityRating ) {
- this.severityRating = severityRating;
- }
-
- public boolean isHigherImpactThan ( EffectImpact other ) {
- return this.severityRating > other.severityRating;
- }
- }
-
- // The kinds of variants supported by the SnpEff output format:
- public enum ChangeType {
- SNP,
- MNP,
- INS,
- DEL
- }
-
- // Possible zygosities of SnpEff variants:
- public enum Zygosity {
- Hom,
- Het
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java
deleted file mode 100644
index 2f120b7d2..000000000
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java
+++ /dev/null
@@ -1,423 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.codecs.snpEff;
-
-import org.broad.tribble.Feature;
-
-import java.util.NoSuchElementException;
-
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
-
-/**
- * Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
- *
- * Many fields are optional, and missing values are represented by nulls. You should always call the
- * hasX() method before calling the corresponding getX() method. Required fields can never be null
- * and do not have a hasX() method.
- *
- * @author David Roazen
- */
-public class SnpEffFeature implements Feature {
-
- private String contig; // REQUIRED FIELD
- private long position; // REQUIRED FIELD
- private String reference;
- private String change;
- private ChangeType changeType;
- private Zygosity zygosity;
- private Double quality;
- private Long coverage;
- private String warnings;
- private String geneID;
- private String geneName;
- private String bioType;
- private String transcriptID;
- private String exonID;
- private Integer exonRank;
- private boolean isNonCodingGene; // REQUIRED FIELD
- private EffectType effect; // REQUIRED FIELD
- private String effectExtraInformation;
- private String oldAndNewAA;
- private String oldAndNewCodon;
- private Integer codonNum;
- private Integer cdsSize;
- private String codonsAround;
- private String aasAround;
- private String customIntervalID;
-
- public SnpEffFeature ( String contig,
- long position,
- String reference,
- String change,
- ChangeType changeType,
- Zygosity zygosity,
- Double quality,
- Long coverage,
- String warnings,
- String geneID,
- String geneName,
- String bioType,
- String transcriptID,
- String exonID,
- Integer exonRank,
- boolean isNonCodingGene,
- EffectType effect,
- String effectExtraInformation,
- String oldAndNewAA,
- String oldAndNewCodon,
- Integer codonNum,
- Integer cdsSize,
- String codonsAround,
- String aasAround,
- String customIntervalID ) {
-
- if ( contig == null || effect == null ) {
- throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
- }
-
- this.contig = contig;
- this.position = position;
- this.reference = reference;
- this.change = change;
- this.changeType = changeType;
- this.zygosity = zygosity;
- this.quality = quality;
- this.coverage = coverage;
- this.warnings = warnings;
- this.geneID = geneID;
- this.geneName = geneName;
- this.bioType = bioType;
- this.transcriptID = transcriptID;
- this.exonID = exonID;
- this.exonRank = exonRank;
- this.isNonCodingGene = isNonCodingGene;
- this.effect = effect;
- this.effectExtraInformation = effectExtraInformation;
- this.oldAndNewAA = oldAndNewAA;
- this.oldAndNewCodon = oldAndNewCodon;
- this.codonNum = codonNum;
- this.cdsSize = cdsSize;
- this.codonsAround = codonsAround;
- this.aasAround = aasAround;
- this.customIntervalID = customIntervalID;
- }
-
- public boolean isHigherImpactThan ( SnpEffFeature other ) {
-
- // If one effect is in a non-coding gene and the other is not, the effect NOT in the
- // non-coding gene has higher impact:
-
- if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
- return true;
- }
- else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
- return false;
- }
-
- // Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
- // of the effects themselves as defined in the SnpEffConstants class:
-
- return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
- }
-
- public String getChr() {
- return contig;
- }
-
- public int getStart() {
- return (int)position;
- }
-
- public int getEnd() {
- return (int)position;
- }
-
- public boolean hasReference() {
- return reference != null;
- }
-
- public String getReference() {
- if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
- return reference;
- }
-
- public boolean hasChange() {
- return change != null;
- }
-
- public String getChange() {
- if ( change == null ) throw new NoSuchElementException("This feature has no change field");
- return change;
- }
-
- public boolean hasChangeType() {
- return changeType != null;
- }
-
- public ChangeType getChangeType() {
- if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
- return changeType;
- }
-
- public boolean hasZygosity() {
- return zygosity != null;
- }
-
- public Zygosity getZygosity() {
- if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
- return zygosity;
- }
-
- public boolean hasQuality() {
- return quality != null;
- }
-
- public Double getQuality() {
- if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
- return quality;
- }
-
- public boolean hasCoverage() {
- return coverage != null;
- }
-
- public Long getCoverage() {
- if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
- return coverage;
- }
-
- public boolean hasWarnings() {
- return warnings != null;
- }
-
- public String getWarnings() {
- if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
- return warnings;
- }
-
- public boolean hasGeneID() {
- return geneID != null;
- }
-
- public String getGeneID() {
- if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
- return geneID;
- }
-
- public boolean hasGeneName() {
- return geneName != null;
- }
-
- public String getGeneName() {
- if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
- return geneName;
- }
-
- public boolean hasBioType() {
- return bioType != null;
- }
-
- public String getBioType() {
- if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
- return bioType;
- }
-
- public boolean hasTranscriptID() {
- return transcriptID != null;
- }
-
- public String getTranscriptID() {
- if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
- return transcriptID;
- }
-
- public boolean hasExonID() {
- return exonID != null;
- }
-
- public String getExonID() {
- if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
- return exonID;
- }
-
- public boolean hasExonRank() {
- return exonRank != null;
- }
-
- public Integer getExonRank() {
- if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
- return exonRank;
- }
-
- public boolean isNonCodingGene() {
- return isNonCodingGene;
- }
-
- public EffectType getEffect() {
- return effect;
- }
-
- public EffectImpact getEffectImpact() {
- return effect.getImpact();
- }
-
- public boolean hasEffectExtraInformation() {
- return effectExtraInformation != null;
- }
-
- public String getEffectExtraInformation() {
- if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
- return effectExtraInformation;
- }
-
- public boolean hasOldAndNewAA() {
- return oldAndNewAA != null;
- }
-
- public String getOldAndNewAA() {
- if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
- return oldAndNewAA;
- }
-
- public boolean hasOldAndNewCodon() {
- return oldAndNewCodon != null;
- }
-
- public String getOldAndNewCodon() {
- if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
- return oldAndNewCodon;
- }
-
- public boolean hasCodonNum() {
- return codonNum != null;
- }
-
- public Integer getCodonNum() {
- if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
- return codonNum;
- }
-
- public boolean hasCdsSize() {
- return cdsSize != null;
- }
-
- public Integer getCdsSize() {
- if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
- return cdsSize;
- }
-
- public boolean hasCodonsAround() {
- return codonsAround != null;
- }
-
- public String getCodonsAround() {
- if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
- return codonsAround;
- }
-
- public boolean hadAasAround() {
- return aasAround != null;
- }
-
- public String getAasAround() {
- if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
- return aasAround;
- }
-
- public boolean hasCustomIntervalID() {
- return customIntervalID != null;
- }
-
- public String getCustomIntervalID() {
- if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
- return customIntervalID;
- }
-
- public boolean equals ( Object o ) {
- if ( o == null || ! (o instanceof SnpEffFeature) ) {
- return false;
- }
-
- SnpEffFeature other = (SnpEffFeature)o;
-
- return contig.equals(other.contig) &&
- position == other.position &&
- (reference == null ? other.reference == null : reference.equals(other.reference)) &&
- (change == null ? other.change == null : change.equals(other.change)) &&
- changeType == other.changeType &&
- zygosity == other.zygosity &&
- (quality == null ? other.quality == null : quality.equals(other.quality)) &&
- (coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
- (warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
- (geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
- (geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
- (bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
- (transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
- (exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
- (exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
- isNonCodingGene == other.isNonCodingGene &&
- effect == other.effect &&
- (effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
- (oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
- (oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
- (codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
- (cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
- (codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
- (aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
- (customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
- }
-
- public String toString() {
- return "[Contig: " + contig +
- " Position: " + position +
- " Reference: " + reference +
- " Change: " + change +
- " Change Type: " + changeType +
- " Zygosity: " + zygosity +
- " Quality: " + quality +
- " Coverage: " + coverage +
- " Warnings: " + warnings +
- " Gene ID: " + geneID +
- " Gene Name: " + geneName +
- " Bio Type: " + bioType +
- " Transcript ID: " + transcriptID +
- " Exon ID: " + exonID +
- " Exon Rank: " + exonRank +
- " Non-Coding Gene: " + isNonCodingGene +
- " Effect: " + effect +
- " Effect Extra Information: " + effectExtraInformation +
- " Old/New AA: " + oldAndNewAA +
- " Old/New Codon: " + oldAndNewCodon +
- " Codon Num: " + codonNum +
- " CDS Size: " + cdsSize +
- " Codons Around: " + codonsAround +
- " AAs Around: " + aasAround +
- " Custom Interval ID: " + customIntervalID +
- "]";
- }
-}
diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
index eb01e5dca..fd1c74993 100755
--- a/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
+++ b/public/java/src/org/broadinstitute/sting/utils/codecs/vcf/VCFHeader.java
@@ -24,6 +24,7 @@ public class VCFHeader {
private final Set mMetaData;
private final Map mInfoMetaData = new HashMap();
private final Map mFormatMetaData = new HashMap();
+ private final Map mOtherMetaData = new HashMap();
// the list of auxillary tags
private final Set mGenotypeSampleNames = new LinkedHashSet();
@@ -110,6 +111,9 @@ public class VCFHeader {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
mFormatMetaData.put(formatLine.getName(), formatLine);
}
+ else {
+ mOtherMetaData.put(line.getKey(), line);
+ }
}
}
@@ -185,6 +189,14 @@ public class VCFHeader {
public VCFFormatHeaderLine getFormatHeaderLine(String key) {
return mFormatMetaData.get(key);
}
+
+ /**
+ * @param key the header key name
+ * @return the meta data line, or null if there is none
+ */
+ public VCFHeaderLine getOtherHeaderLine(String key) {
+ return mOtherMetaData.get(key);
+ }
}
diff --git a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
index 1c65102ae..cfd59b504 100755
--- a/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
+++ b/public/java/src/org/broadinstitute/sting/utils/variantcontext/VariantContext.java
@@ -817,6 +817,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
}
+ /**
+ * @param other VariantContext whose alternate alleles to compare against
+ * @return true if this VariantContext has the same alternate alleles as other,
+ * regardless of ordering. Otherwise returns false.
+ */
+ public boolean hasSameAlternateAllelesAs ( VariantContext other ) {
+ Set thisAlternateAlleles = getAlternateAlleles();
+ Set otherAlternateAlleles = other.getAlternateAlleles();
+
+ if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) {
+ return false;
+ }
+
+ for ( Allele allele : thisAlternateAlleles ) {
+ if ( ! otherAlternateAlleles.contains(allele) ) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
// ---------------------------------------------------------------------------------------------------------
//
// Working with genotypes
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
new file mode 100644
index 000000000..462abeba1
--- /dev/null
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/SnpEffUnitTest.java
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2011, The Broad Institute
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+package org.broadinstitute.sting.gatk.walkers.annotator;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.SnpEffEffect;
+
+public class SnpEffUnitTest {
+
+ @Test
+ public void testParseWellFormedEffect() {
+ String effectName = "NON_SYNONYMOUS_CODING";
+ String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
+ }
+
+ @Test
+ public void testParseInvalidEffectNameEffect() {
+ String effectName = "MADE_UP_EFFECT";
+ String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertFalse(effect.isWellFormed());
+ }
+
+ @Test
+ public void testParseInvalidEffectImpactEffect() {
+ String effectName = "NON_SYNONYMOUS_CODING";
+ String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertFalse(effect.isWellFormed());
+ }
+
+ @Test
+ public void testParseWrongNumberOfMetadataFieldsEffect() {
+ String effectName = "NON_SYNONYMOUS_CODING";
+ String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertFalse(effect.isWellFormed());
+ }
+
+ @Test
+ public void testParseSnpEffWarningEffect() {
+ String effectName = "NON_SYNONYMOUS_CODING";
+ String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
+ }
+
+ @Test
+ public void testParseSnpEffErrorEffect() {
+ String effectName = "NON_SYNONYMOUS_CODING";
+ String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
+
+ SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
+ Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
+ }
+}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
index 832079807..f902ce276 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java
@@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.WalkerTest;
+import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@@ -129,12 +130,24 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testSnpEffAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
- "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
- validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
- "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
+ "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
+ validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
+ "snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1,
- Arrays.asList("03eae1dab19a9358250890594bf53607")
+ Arrays.asList("a1c3ba9efc28ee0606339604095076ea")
);
executeTest("Testing SnpEff annotations", spec);
}
+
+ @Test
+ public void testSnpEffAnnotationsUnsupportedVersion() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ "-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
+ validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
+ "snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000",
+ 1,
+ UserException.class
+ );
+ executeTest("Testing SnpEff annotations (unsupported version)", spec);
+ }
}
diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
index e992684bc..d8f7ad3b6 100755
--- a/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
+++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/varianteval/VariantEvalIntegrationTest.java
@@ -6,7 +6,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class VariantEvalIntegrationTest extends WalkerTest {
- private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
+ private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
@@ -14,6 +14,27 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference;
+ @Test
+ public void testFunctionClassWithSnpeff() {
+ WalkerTestSpec spec = new WalkerTestSpec(
+ buildCommandLine(
+ "-T VariantEval",
+ "-R " + b37KGReference,
+ "--dbsnp " + b37dbSNP132,
+ "--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
+ "-noEV",
+ "-EV TiTvVariantEvaluator",
+ "-noST",
+ "-ST FunctionalClass",
+ "-BTI eval",
+ "-o %s"
+ ),
+ 1,
+ Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
+ );
+ executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
+ }
+
@Test
public void testStratifySamplesAndExcludeMonomorphicSites() {
WalkerTestSpec spec = new WalkerTestSpec(
@@ -21,7 +42,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
- "--eval " + variantEvalTestDataRoot + "/CEU.trio.callsForVE.vcf",
+ "--eval " + variantEvalTestDataRoot + "CEU.trio.callsForVE.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-ST Sample",
diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java
deleted file mode 100644
index 6d492565b..000000000
--- a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2011, The Broad Institute
- *
- * Permission is hereby granted, free of charge, to any person
- * obtaining a copy of this software and associated documentation
- * files (the "Software"), to deal in the Software without
- * restriction, including without limitation the rights to use,
- * copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following
- * conditions:
- *
- * The above copyright notice and this permission notice shall be
- * included in all copies or substantial portions of the Software.
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
- * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
- * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
- * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- */
-
-package org.broadinstitute.sting.utils.codecs.snpEff;
-
-import org.apache.commons.io.input.ReaderInputStream;
-import org.broad.tribble.TribbleException;
-import org.broad.tribble.readers.AsciiLineReader;
-import org.broad.tribble.readers.LineReader;
-import org.testng.Assert;
-import org.testng.annotations.Test;
-
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
-import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
-
-import java.io.StringReader;
-
-public class SnpEffCodecUnitTest {
-
- @Test
- public void testParseWellFormedSnpEffHeaderLine() {
- String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
- "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
- "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
- "AAs around\tCustom_interval_ID";
-
- SnpEffCodec codec = new SnpEffCodec();
- LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
- String headerReturned = (String)codec.readHeader(reader);
-
- Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
- }
-
- @Test(expectedExceptions = TribbleException.InvalidHeader.class)
- public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
- String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
- "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
- "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
- "AAs around";
-
- SnpEffCodec codec = new SnpEffCodec();
- LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
- codec.readHeader(reader);
- }
-
- @Test(expectedExceptions = TribbleException.InvalidHeader.class)
- public void testParseMisnamedColumnSnpEffHeaderLine() {
- String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
- "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
- "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
- "AAs around\tCustom_interval_ID";
-
- SnpEffCodec codec = new SnpEffCodec();
- LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
- codec.readHeader(reader);
- }
-
- @Test
- public void testParseSimpleEffectSnpEffLine() {
- String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
- "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
-
- SnpEffFeature expectedFeature = new SnpEffFeature("1",
- 69428l,
- "T",
- "G",
- ChangeType.SNP,
- Zygosity.Hom,
- 6049.69,
- 61573l,
- null,
- "ENSG00000177693",
- "OR4F5",
- "mRNA",
- "ENST00000326183",
- "exon_1_69055_70108",
- 1,
- false,
- EffectType.NON_SYNONYMOUS_CODING,
- null,
- "F/C",
- "TTT/TGT",
- 113,
- 918,
- null,
- null,
- null
- );
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
-
- Assert.assertEquals(feature, expectedFeature);
- }
-
- @Test
- public void testParseNonCodingRegionSnpEffLine() {
- String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
- "RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
- "L/V\tCTA/GTA\t272\t952\t\t\t";
-
- SnpEffFeature expectedFeature = new SnpEffFeature("1",
- 1337592l,
- "G",
- "C",
- ChangeType.SNP,
- Zygosity.Hom,
- 1935.52,
- 21885l,
- null,
- "ENSG00000250188",
- "RP4-758J18.5",
- "mRNA",
- "ENST00000514958",
- "exon_1_1337454_1338076",
- 2,
- true,
- EffectType.NON_SYNONYMOUS_CODING,
- null,
- "L/V",
- "CTA/GTA",
- 272,
- 952,
- null,
- null,
- null
- );
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
-
- Assert.assertEquals(feature, expectedFeature);
- }
-
- @Test
- public void testParseExtraEffectInformationSnpEffLine() {
- String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
- "mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
-
- SnpEffFeature expectedFeature = new SnpEffFeature("1",
- 879537l,
- "T",
- "C",
- ChangeType.SNP,
- Zygosity.Hom,
- 341.58,
- 13733l,
- null,
- "ENSG00000187634",
- "SAMD11",
- "mRNA",
- "ENST00000341065",
- null,
- null,
- false,
- EffectType.UTR_3_PRIME,
- "4 bases from transcript end",
- null,
- null,
- null,
- null,
- null,
- null,
- null
- );
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
-
- Assert.assertEquals(feature, expectedFeature);
- }
-
- @Test
- public void testParseMultiEffectSnpEffLine() {
- String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
- "ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
-
- SnpEffFeature expectedFeature = new SnpEffFeature("1",
- 901901l,
- "C",
- "T",
- ChangeType.SNP,
- Zygosity.Hom,
- 162.91,
- 4646l,
- null,
- "ENSG00000187583",
- "PLEKHN1",
- "mRNA",
- "ENST00000379410",
- "exon_1_901877_901994",
- 1,
- false,
- EffectType.START_GAINED,
- "ATG, UTR_5_PRIME: 11 bases from TSS",
- null,
- null,
- null,
- null,
- null,
- null,
- null
- );
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
-
- Assert.assertEquals(feature, expectedFeature);
- }
-
- @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
- public void testParseWrongNumberOfFieldsSnpEffLine() {
- String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
- "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
- }
-
- @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
- public void testParseBlankEffectFieldSnpEffLine() {
- String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
- "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
- }
-
- @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
- public void testParseInvalidNumericFieldSnpEffLine() {
- String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
- "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
-
- SnpEffCodec codec = new SnpEffCodec();
- SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
- }
-}