From c1061e994ceb0b0f2b8b6c25193a3ffce202c4c6 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 4 Aug 2011 19:36:26 -0400 Subject: [PATCH] Initial support for adding genomic annotations through VariantAnnotator using the output from the SnpEff tool, which replaces the old Genomic Annotator. --- .../sting/gatk/walkers/annotator/SnpEff.java | 171 ++++++++++ .../utils/codecs/snpEff/SnpEffCodec.java | 202 ++++++++++++ .../utils/codecs/snpEff/SnpEffConstants.java | 107 ++++++ .../utils/codecs/snpEff/SnpEffFeature.java | 306 ++++++++++++++++++ 4 files changed, 786 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java new file mode 100644 index 000000000..e834e6324 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { + + // SnpEff field keys: + public static final String GENE_ID_KEY = "GENE_ID"; + public static final String GENE_NAME_KEY = "GENE_NAME"; + public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; + public static final String EXON_ID_KEY = "EXON_ID"; + public static final String EXON_RANK_KEY = "EXON_RANK"; + public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE"; + public static final String EFFECT_KEY = "EFFECT"; + public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT"; + public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION"; + public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA"; + public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON"; + public static final String CODON_NUM_KEY = "CODON_NUM"; + public static final String CDS_SIZE_KEY = "CDS_SIZE"; + + private static final String RMD_TRACK_NAME = "SnpEff"; + private static final Logger logger = Logger.getLogger(SnpEff.class); + + public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + + sanityCheckSnpEffFeatures(snpEffFeatures); + + SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); + return generateAnnotations(mostSignificantEffect); + } + + private void sanityCheckSnpEffFeatures( List snpEffFeatures ) { + Boolean locusIsNonCodingGene = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( locusIsNonCodingGene == null ) { + locusIsNonCodingGene = snpEffFeature.isNonCodingGene(); + } + else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) { + logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene", + snpEffFeature.getChr(), snpEffFeature.getStart())); + return; + } + } + } + + private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { + SnpEffFeature mostSignificantEffect = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( mostSignificantEffect == null || + snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) { + + mostSignificantEffect = snpEffFeature; + } + } + + return mostSignificantEffect; + } + + private Map generateAnnotations ( SnpEffFeature mostSignificantEffect ) { + Map annotations = new LinkedHashMap(Utils.optimumHashSize(getKeyNames().size())); + + if ( mostSignificantEffect.hasGeneID() ) + annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID()); + if ( mostSignificantEffect.hasGeneName() ) + annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName()); + if ( mostSignificantEffect.hasTranscriptID() ) + annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID()); + if ( mostSignificantEffect.hasExonID() ) + annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID()); + if ( mostSignificantEffect.hasExonRank() ) + annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank())); + if ( mostSignificantEffect.isNonCodingGene() ) + annotations.put(WITHIN_NON_CODING_GENE_KEY, null); + + annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString()); + annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString()); + if ( mostSignificantEffect.hasEffectExtraInformation() ) + annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation()); + + if ( mostSignificantEffect.hasOldAndNewAA() ) + annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA()); + if ( mostSignificantEffect.hasOldAndNewCodon() ) + annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon()); + if ( mostSignificantEffect.hasCodonNum() ) + annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum())); + if ( mostSignificantEffect.hasCdsSize() ) + annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize())); + + return annotations; + } + + public List getKeyNames() { + return Arrays.asList( GENE_ID_KEY, + GENE_NAME_KEY, + TRANSCRIPT_ID_KEY, + EXON_ID_KEY, + EXON_RANK_KEY, + WITHIN_NON_CODING_GENE_KEY, + EFFECT_KEY, + EFFECT_IMPACT_KEY, + EFFECT_EXTRA_INFORMATION_KEY, + OLD_NEW_AA_KEY, + OLD_NEW_CODON_KEY, + CODON_NUM_KEY, + CDS_SIZE_KEY + ); + } + + public List getDescriptions() { + return Arrays.asList( + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java new file mode 100644 index 000000000..f5d77635a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.LineReader; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +import java.io.IOException; + +public class SnpEffCodec implements FeatureCodec { + + public static final int EXPECTED_NUMBER_OF_FIELDS = 23; + public static final String FIELD_DELIMITER_PATTERN = "\\t"; + public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]"; + public static final String HEADER_LINE_START = "# "; + public static final String[] HEADER_FIELD_NAMES = { "Chromo", + "Position", + "Reference", + "Change", + "Change type", + "Homozygous", + "Quality", + "Coverage", + "Warnings", + "Gene_ID", + "Gene_name", + "Bio_type", + "Trancript_ID", // yes, this is how it's spelled in the SnpEff output + "Exon_ID", + "Exon_Rank", + "Effect", + "old_AA/new_AA", + "Old_codon/New_codon", + "Codon_Num(CDS)", + "CDS_size", + "Codons around", + "AAs around", + "Custom_interval_ID" + }; + public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; + public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; + + public Feature decodeLoc ( String line ) { + return decode(line); + } + + public Feature decode ( String line ) { + String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1); + + if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS + + ") number of fields: found " + tokens.length + " fields.", line); + } + + try { + checkForRequiredFields(tokens, line); + + String contig = tokens[0]; + long position = Long.parseLong(tokens[1]); + + String reference = tokens[2].isEmpty() ? null : tokens[2]; + String change = tokens[3].isEmpty() ? null : tokens[3]; + ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]); + Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]); + Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]); + Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]); + String warnings = tokens[8].isEmpty() ? null : tokens[8]; + String geneID = tokens[9].isEmpty() ? null : tokens[9]; + String geneName = tokens[10].isEmpty() ? null : tokens[10]; + String bioType = tokens[11].isEmpty() ? null : tokens[11]; + String transcriptID = tokens[12].isEmpty() ? null : tokens[12]; + String exonID = tokens[13].isEmpty() ? null : tokens[13]; + Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); + + boolean isNonCodingGene = isNonCodingGene(tokens[15]); + int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; + String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); + EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); + String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene); + + String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16]; + String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17]; + Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]); + Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]); + String codonsAround = tokens[20].isEmpty() ? null : tokens[20]; + String aasAround = tokens[21].isEmpty() ? null : tokens[21]; + String customIntervalID = tokens[22].isEmpty() ? null : tokens[22]; + + return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage, + warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene, + effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize, + codonsAround, aasAround, customIntervalID); + } + catch ( NumberFormatException e ) { + throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line); + } + catch ( IllegalArgumentException e ) { + throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line); + } + } + + private void checkForRequiredFields ( String[] tokens, String line ) { + for ( int requiredFieldIndex : REQUIRED_FIELDS ) { + if ( tokens[requiredFieldIndex].isEmpty() ) { + throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" + + HEADER_FIELD_NAMES[requiredFieldIndex] + "\"", + line); + } + } + } + + private boolean isNonCodingGene ( String effectField ) { + return effectField.startsWith(NON_CODING_GENE_FLAG); + } + + private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { + String effectName = ""; + + if ( effectFieldTokens.length > 1 && isNonCodingGene ) { + effectName = effectFieldTokens[1].trim(); + } + else { + effectName = effectFieldTokens[0].trim(); + } + + return EffectType.valueOf(effectName); + } + + private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { + if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { + return effectFieldTokens[effectFieldTokens.length - 1]; + } + + return null; + } + + public Class getFeatureType() { + return SnpEffFeature.class; + } + + public Object readHeader ( LineReader reader ) { + String headerLine = ""; + + try { + headerLine = reader.readLine(); + } + catch ( IOException e ) { + throw new TribbleException("Unable to read header line from input file."); + } + + validateHeaderLine(headerLine); + return headerLine; + } + + private void validateHeaderLine ( String headerLine ) { + if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) { + throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START); + } + + String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN); + + if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" + + EXPECTED_NUMBER_OF_FIELDS + ") of columns."); + } + + for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) { + if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) { + throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" + + HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" + + headerTokens[columnIndex] + "\""); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java new file mode 100644 index 000000000..f226c3523 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +public class SnpEffConstants { + + public enum EffectType { + START_GAINED (EffectImpact.HIGH), + START_LOST (EffectImpact.HIGH), + EXON_DELETED (EffectImpact.HIGH), + FRAME_SHIFT (EffectImpact.HIGH), + STOP_GAINED (EffectImpact.HIGH), + STOP_LOST (EffectImpact.HIGH), + SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH), + SPLICE_SITE_DONOR (EffectImpact.HIGH), + + NON_SYNONYMOUS_CODING (EffectImpact.MODERATE), + UTR_5_DELETED (EffectImpact.MODERATE), + UTR_3_DELETED (EffectImpact.MODERATE), + CODON_INSERTION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE), + CODON_DELETION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE), + + NONE (EffectImpact.LOW), + CHROMOSOME (EffectImpact.LOW), + INTERGENIC (EffectImpact.LOW), + UPSTREAM (EffectImpact.LOW), + UTR_5_PRIME (EffectImpact.LOW), + SYNONYMOUS_START (EffectImpact.LOW), + NON_SYNONYMOUS_START (EffectImpact.LOW), + CDS (EffectImpact.LOW), + GENE (EffectImpact.LOW), + TRANSCRIPT (EffectImpact.LOW), + EXON (EffectImpact.LOW), + SYNONYMOUS_CODING (EffectImpact.LOW), + CODON_CHANGE (EffectImpact.LOW), + SYNONYMOUS_STOP (EffectImpact.LOW), + NON_SYNONYMOUS_STOP (EffectImpact.LOW), + INTRON (EffectImpact.LOW), + UTR_3_PRIME (EffectImpact.LOW), + DOWNSTREAM (EffectImpact.LOW), + INTRON_CONSERVED (EffectImpact.LOW), + INTERGENIC_CONSERVED (EffectImpact.LOW), + CUSTOM (EffectImpact.LOW); + + private final EffectImpact impact; + + EffectType ( EffectImpact impact ) { + this.impact = impact; + } + + public EffectImpact getImpact() { + return impact; + } + } + + public enum EffectImpact { + LOW (1), + MODERATE (2), + HIGH (3); + + private final int severityRating; + + EffectImpact ( int severityRating ) { + this.severityRating = severityRating; + } + + public boolean isHigherImpactThan ( EffectImpact other ) { + return this.severityRating > other.severityRating; + } + } + + public enum ChangeType { + SNP, + MNP, + INS, + DEL + } + + public enum Zygosity { + Hom, + Het + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java new file mode 100644 index 000000000..3b9d6d4d6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; + +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +public class SnpEffFeature implements Feature { + + private String contig; + private long position; + private String reference; + private String change; + private ChangeType changeType; + private Zygosity zygosity; + private Double quality; + private Long coverage; + private String warnings; + private String geneID; + private String geneName; + private String bioType; + private String transcriptID; + private String exonID; + private Integer exonRank; + private boolean isNonCodingGene; + private EffectType effect; + private String effectExtraInformation; + private String oldAndNewAA; + private String oldAndNewCodon; + private Integer codonNum; + private Integer cdsSize; + private String codonsAround; + private String aasAround; + private String customIntervalID; + + public SnpEffFeature ( String contig, + long position, + String reference, + String change, + ChangeType changeType, + Zygosity zygosity, + Double quality, + Long coverage, + String warnings, + String geneID, + String geneName, + String bioType, + String transcriptID, + String exonID, + Integer exonRank, + boolean isNonCodingGene, + EffectType effect, + String effectExtraInformation, + String oldAndNewAA, + String oldAndNewCodon, + Integer codonNum, + Integer cdsSize, + String codonsAround, + String aasAround, + String customIntervalID ) { + + this.contig = contig; + this.position = position; + this.reference = reference; + this.change = change; + this.changeType = changeType; + this.zygosity = zygosity; + this.quality = quality; + this.coverage = coverage; + this.warnings = warnings; + this.geneID = geneID; + this.geneName = geneName; + this.bioType = bioType; + this.transcriptID = transcriptID; + this.exonID = exonID; + this.exonRank = exonRank; + this.isNonCodingGene = isNonCodingGene; + this.effect = effect; + this.effectExtraInformation = effectExtraInformation; + this.oldAndNewAA = oldAndNewAA; + this.oldAndNewCodon = oldAndNewCodon; + this.codonNum = codonNum; + this.cdsSize = cdsSize; + this.codonsAround = codonsAround; + this.aasAround = aasAround; + this.customIntervalID = customIntervalID; + } + + public String getChr() { + return contig; + } + + public int getStart() { + return (int)position; + } + + public int getEnd() { + return (int)position; + } + + public boolean hasReference() { + return reference != null; + } + + public String getReference() { + return reference; + } + + public boolean hasChange() { + return change != null; + } + + public String getChange() { + return change; + } + + public boolean hasChangeType() { + return changeType != null; + } + + public ChangeType getChangeType() { + return changeType; + } + + public boolean hasZygosity() { + return zygosity != null; + } + + public Zygosity getZygosity() { + return zygosity; + } + + public boolean hasQuality() { + return quality != null; + } + + public Double getQuality() { + return quality; + } + + public boolean hasCoverage() { + return coverage != null; + } + + public Long getCoverage() { + return coverage; + } + + public boolean hasWarnings() { + return warnings != null; + } + + public String getWarnings() { + return warnings; + } + + public boolean hasGeneID() { + return geneID != null; + } + + public String getGeneID() { + return geneID; + } + + public boolean hasGeneName() { + return geneName != null; + } + + public String getGeneName() { + return geneName; + } + + public boolean hasBioType() { + return bioType != null; + } + + public String getBioType() { + return bioType; + } + + public boolean hasTranscriptID() { + return transcriptID != null; + } + + public String getTranscriptID() { + return transcriptID; + } + + public boolean hasExonID() { + return exonID != null; + } + + public String getExonID() { + return exonID; + } + + public boolean hasExonRank() { + return exonRank != null; + } + + public Integer getExonRank() { + return exonRank; + } + + public boolean isNonCodingGene() { + return isNonCodingGene; + } + + public EffectType getEffect() { + return effect; + } + + public EffectImpact getEffectImpact() { + return effect.getImpact(); + } + + public boolean hasEffectExtraInformation() { + return effectExtraInformation != null; + } + + public String getEffectExtraInformation() { + return effectExtraInformation; + } + + public boolean hasOldAndNewAA() { + return oldAndNewAA != null; + } + + public String getOldAndNewAA() { + return oldAndNewAA; + } + + public boolean hasOldAndNewCodon() { + return oldAndNewCodon != null; + } + + public String getOldAndNewCodon() { + return oldAndNewCodon; + } + + public boolean hasCodonNum() { + return codonNum != null; + } + + public Integer getCodonNum() { + return codonNum; + } + + public boolean hasCdsSize() { + return cdsSize != null; + } + + public Integer getCdsSize() { + return cdsSize; + } + + public boolean hasCodonsAround() { + return codonsAround != null; + } + + public String getCodonsAround() { + return codonsAround; + } + + public boolean hadAasAround() { + return aasAround != null; + } + + public String getAasAround() { + return aasAround; + } + + public boolean hasCustomIntervalID() { + return customIntervalID != null; + } + + public String getCustomIntervalID() { + return customIntervalID; + } +}