From c1061e994ceb0b0f2b8b6c25193a3ffce202c4c6 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Thu, 4 Aug 2011 19:36:26 -0400 Subject: [PATCH 1/5] Initial support for adding genomic annotations through VariantAnnotator using the output from the SnpEff tool, which replaces the old Genomic Annotator. --- .../sting/gatk/walkers/annotator/SnpEff.java | 171 ++++++++++ .../utils/codecs/snpEff/SnpEffCodec.java | 202 ++++++++++++ .../utils/codecs/snpEff/SnpEffConstants.java | 107 ++++++ .../utils/codecs/snpEff/SnpEffFeature.java | 306 ++++++++++++++++++ 4 files changed, 786 insertions(+) create mode 100644 public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java create mode 100644 public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java new file mode 100644 index 000000000..e834e6324 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -0,0 +1,171 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.gatk.walkers.annotator; + +import org.apache.log4j.Logger; +import org.broadinstitute.sting.gatk.contexts.AlignmentContext; +import org.broadinstitute.sting.gatk.contexts.ReferenceContext; +import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; +import org.broadinstitute.sting.utils.Utils; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; +import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; +import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType; +import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine; +import org.broadinstitute.sting.utils.variantcontext.VariantContext; + +import java.util.*; + +public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { + + // SnpEff field keys: + public static final String GENE_ID_KEY = "GENE_ID"; + public static final String GENE_NAME_KEY = "GENE_NAME"; + public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; + public static final String EXON_ID_KEY = "EXON_ID"; + public static final String EXON_RANK_KEY = "EXON_RANK"; + public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE"; + public static final String EFFECT_KEY = "EFFECT"; + public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT"; + public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION"; + public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA"; + public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON"; + public static final String CODON_NUM_KEY = "CODON_NUM"; + public static final String CDS_SIZE_KEY = "CDS_SIZE"; + + private static final String RMD_TRACK_NAME = "SnpEff"; + private static final Logger logger = Logger.getLogger(SnpEff.class); + + public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { + List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + + sanityCheckSnpEffFeatures(snpEffFeatures); + + SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); + return generateAnnotations(mostSignificantEffect); + } + + private void sanityCheckSnpEffFeatures( List snpEffFeatures ) { + Boolean locusIsNonCodingGene = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( locusIsNonCodingGene == null ) { + locusIsNonCodingGene = snpEffFeature.isNonCodingGene(); + } + else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) { + logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene", + snpEffFeature.getChr(), snpEffFeature.getStart())); + return; + } + } + } + + private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { + SnpEffFeature mostSignificantEffect = null; + + for ( Object feature : snpEffFeatures ) { + SnpEffFeature snpEffFeature = (SnpEffFeature)feature; + + if ( mostSignificantEffect == null || + snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) { + + mostSignificantEffect = snpEffFeature; + } + } + + return mostSignificantEffect; + } + + private Map generateAnnotations ( SnpEffFeature mostSignificantEffect ) { + Map annotations = new LinkedHashMap(Utils.optimumHashSize(getKeyNames().size())); + + if ( mostSignificantEffect.hasGeneID() ) + annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID()); + if ( mostSignificantEffect.hasGeneName() ) + annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName()); + if ( mostSignificantEffect.hasTranscriptID() ) + annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID()); + if ( mostSignificantEffect.hasExonID() ) + annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID()); + if ( mostSignificantEffect.hasExonRank() ) + annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank())); + if ( mostSignificantEffect.isNonCodingGene() ) + annotations.put(WITHIN_NON_CODING_GENE_KEY, null); + + annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString()); + annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString()); + if ( mostSignificantEffect.hasEffectExtraInformation() ) + annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation()); + + if ( mostSignificantEffect.hasOldAndNewAA() ) + annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA()); + if ( mostSignificantEffect.hasOldAndNewCodon() ) + annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon()); + if ( mostSignificantEffect.hasCodonNum() ) + annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum())); + if ( mostSignificantEffect.hasCdsSize() ) + annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize())); + + return annotations; + } + + public List getKeyNames() { + return Arrays.asList( GENE_ID_KEY, + GENE_NAME_KEY, + TRANSCRIPT_ID_KEY, + EXON_ID_KEY, + EXON_RANK_KEY, + WITHIN_NON_CODING_GENE_KEY, + EFFECT_KEY, + EFFECT_IMPACT_KEY, + EFFECT_EXTRA_INFORMATION_KEY, + OLD_NEW_AA_KEY, + OLD_NEW_CODON_KEY, + CODON_NUM_KEY, + CDS_SIZE_KEY + ); + } + + public List getDescriptions() { + return Arrays.asList( + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + ); + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java new file mode 100644 index 000000000..f5d77635a --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; +import org.broad.tribble.FeatureCodec; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.LineReader; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +import java.io.IOException; + +public class SnpEffCodec implements FeatureCodec { + + public static final int EXPECTED_NUMBER_OF_FIELDS = 23; + public static final String FIELD_DELIMITER_PATTERN = "\\t"; + public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]"; + public static final String HEADER_LINE_START = "# "; + public static final String[] HEADER_FIELD_NAMES = { "Chromo", + "Position", + "Reference", + "Change", + "Change type", + "Homozygous", + "Quality", + "Coverage", + "Warnings", + "Gene_ID", + "Gene_name", + "Bio_type", + "Trancript_ID", // yes, this is how it's spelled in the SnpEff output + "Exon_ID", + "Exon_Rank", + "Effect", + "old_AA/new_AA", + "Old_codon/New_codon", + "Codon_Num(CDS)", + "CDS_size", + "Codons around", + "AAs around", + "Custom_interval_ID" + }; + public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; + public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; + + public Feature decodeLoc ( String line ) { + return decode(line); + } + + public Feature decode ( String line ) { + String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1); + + if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS + + ") number of fields: found " + tokens.length + " fields.", line); + } + + try { + checkForRequiredFields(tokens, line); + + String contig = tokens[0]; + long position = Long.parseLong(tokens[1]); + + String reference = tokens[2].isEmpty() ? null : tokens[2]; + String change = tokens[3].isEmpty() ? null : tokens[3]; + ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]); + Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]); + Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]); + Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]); + String warnings = tokens[8].isEmpty() ? null : tokens[8]; + String geneID = tokens[9].isEmpty() ? null : tokens[9]; + String geneName = tokens[10].isEmpty() ? null : tokens[10]; + String bioType = tokens[11].isEmpty() ? null : tokens[11]; + String transcriptID = tokens[12].isEmpty() ? null : tokens[12]; + String exonID = tokens[13].isEmpty() ? null : tokens[13]; + Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); + + boolean isNonCodingGene = isNonCodingGene(tokens[15]); + int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; + String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); + EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); + String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene); + + String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16]; + String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17]; + Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]); + Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]); + String codonsAround = tokens[20].isEmpty() ? null : tokens[20]; + String aasAround = tokens[21].isEmpty() ? null : tokens[21]; + String customIntervalID = tokens[22].isEmpty() ? null : tokens[22]; + + return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage, + warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene, + effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize, + codonsAround, aasAround, customIntervalID); + } + catch ( NumberFormatException e ) { + throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line); + } + catch ( IllegalArgumentException e ) { + throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line); + } + } + + private void checkForRequiredFields ( String[] tokens, String line ) { + for ( int requiredFieldIndex : REQUIRED_FIELDS ) { + if ( tokens[requiredFieldIndex].isEmpty() ) { + throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" + + HEADER_FIELD_NAMES[requiredFieldIndex] + "\"", + line); + } + } + } + + private boolean isNonCodingGene ( String effectField ) { + return effectField.startsWith(NON_CODING_GENE_FLAG); + } + + private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { + String effectName = ""; + + if ( effectFieldTokens.length > 1 && isNonCodingGene ) { + effectName = effectFieldTokens[1].trim(); + } + else { + effectName = effectFieldTokens[0].trim(); + } + + return EffectType.valueOf(effectName); + } + + private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { + if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { + return effectFieldTokens[effectFieldTokens.length - 1]; + } + + return null; + } + + public Class getFeatureType() { + return SnpEffFeature.class; + } + + public Object readHeader ( LineReader reader ) { + String headerLine = ""; + + try { + headerLine = reader.readLine(); + } + catch ( IOException e ) { + throw new TribbleException("Unable to read header line from input file."); + } + + validateHeaderLine(headerLine); + return headerLine; + } + + private void validateHeaderLine ( String headerLine ) { + if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) { + throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START); + } + + String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN); + + if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) { + throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" + + EXPECTED_NUMBER_OF_FIELDS + ") of columns."); + } + + for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) { + if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) { + throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" + + HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" + + headerTokens[columnIndex] + "\""); + } + } + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java new file mode 100644 index 000000000..f226c3523 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +public class SnpEffConstants { + + public enum EffectType { + START_GAINED (EffectImpact.HIGH), + START_LOST (EffectImpact.HIGH), + EXON_DELETED (EffectImpact.HIGH), + FRAME_SHIFT (EffectImpact.HIGH), + STOP_GAINED (EffectImpact.HIGH), + STOP_LOST (EffectImpact.HIGH), + SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH), + SPLICE_SITE_DONOR (EffectImpact.HIGH), + + NON_SYNONYMOUS_CODING (EffectImpact.MODERATE), + UTR_5_DELETED (EffectImpact.MODERATE), + UTR_3_DELETED (EffectImpact.MODERATE), + CODON_INSERTION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE), + CODON_DELETION (EffectImpact.MODERATE), + CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE), + + NONE (EffectImpact.LOW), + CHROMOSOME (EffectImpact.LOW), + INTERGENIC (EffectImpact.LOW), + UPSTREAM (EffectImpact.LOW), + UTR_5_PRIME (EffectImpact.LOW), + SYNONYMOUS_START (EffectImpact.LOW), + NON_SYNONYMOUS_START (EffectImpact.LOW), + CDS (EffectImpact.LOW), + GENE (EffectImpact.LOW), + TRANSCRIPT (EffectImpact.LOW), + EXON (EffectImpact.LOW), + SYNONYMOUS_CODING (EffectImpact.LOW), + CODON_CHANGE (EffectImpact.LOW), + SYNONYMOUS_STOP (EffectImpact.LOW), + NON_SYNONYMOUS_STOP (EffectImpact.LOW), + INTRON (EffectImpact.LOW), + UTR_3_PRIME (EffectImpact.LOW), + DOWNSTREAM (EffectImpact.LOW), + INTRON_CONSERVED (EffectImpact.LOW), + INTERGENIC_CONSERVED (EffectImpact.LOW), + CUSTOM (EffectImpact.LOW); + + private final EffectImpact impact; + + EffectType ( EffectImpact impact ) { + this.impact = impact; + } + + public EffectImpact getImpact() { + return impact; + } + } + + public enum EffectImpact { + LOW (1), + MODERATE (2), + HIGH (3); + + private final int severityRating; + + EffectImpact ( int severityRating ) { + this.severityRating = severityRating; + } + + public boolean isHigherImpactThan ( EffectImpact other ) { + return this.severityRating > other.severityRating; + } + } + + public enum ChangeType { + SNP, + MNP, + INS, + DEL + } + + public enum Zygosity { + Hom, + Het + } +} diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java new file mode 100644 index 000000000..3b9d6d4d6 --- /dev/null +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.broad.tribble.Feature; + +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +public class SnpEffFeature implements Feature { + + private String contig; + private long position; + private String reference; + private String change; + private ChangeType changeType; + private Zygosity zygosity; + private Double quality; + private Long coverage; + private String warnings; + private String geneID; + private String geneName; + private String bioType; + private String transcriptID; + private String exonID; + private Integer exonRank; + private boolean isNonCodingGene; + private EffectType effect; + private String effectExtraInformation; + private String oldAndNewAA; + private String oldAndNewCodon; + private Integer codonNum; + private Integer cdsSize; + private String codonsAround; + private String aasAround; + private String customIntervalID; + + public SnpEffFeature ( String contig, + long position, + String reference, + String change, + ChangeType changeType, + Zygosity zygosity, + Double quality, + Long coverage, + String warnings, + String geneID, + String geneName, + String bioType, + String transcriptID, + String exonID, + Integer exonRank, + boolean isNonCodingGene, + EffectType effect, + String effectExtraInformation, + String oldAndNewAA, + String oldAndNewCodon, + Integer codonNum, + Integer cdsSize, + String codonsAround, + String aasAround, + String customIntervalID ) { + + this.contig = contig; + this.position = position; + this.reference = reference; + this.change = change; + this.changeType = changeType; + this.zygosity = zygosity; + this.quality = quality; + this.coverage = coverage; + this.warnings = warnings; + this.geneID = geneID; + this.geneName = geneName; + this.bioType = bioType; + this.transcriptID = transcriptID; + this.exonID = exonID; + this.exonRank = exonRank; + this.isNonCodingGene = isNonCodingGene; + this.effect = effect; + this.effectExtraInformation = effectExtraInformation; + this.oldAndNewAA = oldAndNewAA; + this.oldAndNewCodon = oldAndNewCodon; + this.codonNum = codonNum; + this.cdsSize = cdsSize; + this.codonsAround = codonsAround; + this.aasAround = aasAround; + this.customIntervalID = customIntervalID; + } + + public String getChr() { + return contig; + } + + public int getStart() { + return (int)position; + } + + public int getEnd() { + return (int)position; + } + + public boolean hasReference() { + return reference != null; + } + + public String getReference() { + return reference; + } + + public boolean hasChange() { + return change != null; + } + + public String getChange() { + return change; + } + + public boolean hasChangeType() { + return changeType != null; + } + + public ChangeType getChangeType() { + return changeType; + } + + public boolean hasZygosity() { + return zygosity != null; + } + + public Zygosity getZygosity() { + return zygosity; + } + + public boolean hasQuality() { + return quality != null; + } + + public Double getQuality() { + return quality; + } + + public boolean hasCoverage() { + return coverage != null; + } + + public Long getCoverage() { + return coverage; + } + + public boolean hasWarnings() { + return warnings != null; + } + + public String getWarnings() { + return warnings; + } + + public boolean hasGeneID() { + return geneID != null; + } + + public String getGeneID() { + return geneID; + } + + public boolean hasGeneName() { + return geneName != null; + } + + public String getGeneName() { + return geneName; + } + + public boolean hasBioType() { + return bioType != null; + } + + public String getBioType() { + return bioType; + } + + public boolean hasTranscriptID() { + return transcriptID != null; + } + + public String getTranscriptID() { + return transcriptID; + } + + public boolean hasExonID() { + return exonID != null; + } + + public String getExonID() { + return exonID; + } + + public boolean hasExonRank() { + return exonRank != null; + } + + public Integer getExonRank() { + return exonRank; + } + + public boolean isNonCodingGene() { + return isNonCodingGene; + } + + public EffectType getEffect() { + return effect; + } + + public EffectImpact getEffectImpact() { + return effect.getImpact(); + } + + public boolean hasEffectExtraInformation() { + return effectExtraInformation != null; + } + + public String getEffectExtraInformation() { + return effectExtraInformation; + } + + public boolean hasOldAndNewAA() { + return oldAndNewAA != null; + } + + public String getOldAndNewAA() { + return oldAndNewAA; + } + + public boolean hasOldAndNewCodon() { + return oldAndNewCodon != null; + } + + public String getOldAndNewCodon() { + return oldAndNewCodon; + } + + public boolean hasCodonNum() { + return codonNum != null; + } + + public Integer getCodonNum() { + return codonNum; + } + + public boolean hasCdsSize() { + return cdsSize != null; + } + + public Integer getCdsSize() { + return cdsSize; + } + + public boolean hasCodonsAround() { + return codonsAround != null; + } + + public String getCodonsAround() { + return codonsAround; + } + + public boolean hadAasAround() { + return aasAround != null; + } + + public String getAasAround() { + return aasAround; + } + + public boolean hasCustomIntervalID() { + return customIntervalID != null; + } + + public String getCustomIntervalID() { + return customIntervalID; + } +} From dd974040af16b9d40dc9781b4926931bba24b304 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 10:46:18 -0400 Subject: [PATCH 2/5] When finding the highest-impact effect at a locus, all effects that are not within a non-coding gene are now considered higher impact than all effects that are within a non-coding gene. --- .../sting/gatk/walkers/annotator/SnpEff.java | 51 ++++++------------- .../utils/codecs/snpEff/SnpEffFeature.java | 11 ++++ 2 files changed, 26 insertions(+), 36 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index e834e6324..c307d4cc0 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -24,7 +24,6 @@ package org.broadinstitute.sting.gatk.walkers.annotator; -import org.apache.log4j.Logger; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; @@ -56,35 +55,15 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; - private static final String RMD_TRACK_NAME = "SnpEff"; - private static final Logger logger = Logger.getLogger(SnpEff.class); + public static final String RMD_TRACK_NAME = "SnpEff"; public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); - sanityCheckSnpEffFeatures(snpEffFeatures); - SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); return generateAnnotations(mostSignificantEffect); } - private void sanityCheckSnpEffFeatures( List snpEffFeatures ) { - Boolean locusIsNonCodingGene = null; - - for ( Object feature : snpEffFeatures ) { - SnpEffFeature snpEffFeature = (SnpEffFeature)feature; - - if ( locusIsNonCodingGene == null ) { - locusIsNonCodingGene = snpEffFeature.isNonCodingGene(); - } - else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) { - logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene", - snpEffFeature.getChr(), snpEffFeature.getStart())); - return; - } - } - } - private SnpEffFeature getMostSignificantEffect ( List snpEffFeatures ) { SnpEffFeature mostSignificantEffect = null; @@ -92,7 +71,7 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { SnpEffFeature snpEffFeature = (SnpEffFeature)feature; if ( mostSignificantEffect == null || - snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) { + snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) { mostSignificantEffect = snpEffFeature; } @@ -153,19 +132,19 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public List getDescriptions() { return Arrays.asList( - new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), - new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), - new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), - new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), - new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), - new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), - new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), - new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), - new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), - new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), - new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), - new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), - new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") + new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"), + new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"), + new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"), + new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"), + new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"), + new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"), + new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"), + new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())), + new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"), + new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"), + new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"), + new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"), + new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size") ); } } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index 3b9d6d4d6..cfa5a91ab 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -112,6 +112,17 @@ public class SnpEffFeature implements Feature { this.customIntervalID = customIntervalID; } + public boolean isHigherImpactThan ( SnpEffFeature other ) { + if ( ! isNonCodingGene() && other.isNonCodingGene() ) { + return true; + } + else if ( isNonCodingGene() && ! other.isNonCodingGene() ) { + return false; + } + + return getEffectImpact().isHigherImpactThan(other.getEffectImpact()); + } + public String getChr() { return contig; } From 5e288136e02a0816155f83472e391a7ef9cbbef5 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 16:51:43 -0400 Subject: [PATCH 3/5] Added unit tests for the SnpEff codec, and made minor adjustments to the codec itself. --- .../utils/codecs/snpEff/SnpEffCodec.java | 9 +- .../utils/codecs/snpEff/SnpEffFeature.java | 63 +++++ .../codecs/snpEff/SnpEffCodecUnitTest.java | 259 ++++++++++++++++++ 3 files changed, 330 insertions(+), 1 deletion(-) create mode 100644 public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index f5d77635a..dfe1f5f1a 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -80,6 +80,7 @@ public class SnpEffCodec implements FeatureCodec { } try { + trimAllFields(tokens); checkForRequiredFields(tokens, line); String contig = tokens[0]; @@ -126,6 +127,12 @@ public class SnpEffCodec implements FeatureCodec { } } + private void trimAllFields ( String[] tokens ) { + for ( int i = 0; i < tokens.length; i++ ) { + tokens[i] = tokens[i].trim(); + } + } + private void checkForRequiredFields ( String[] tokens, String line ) { for ( int requiredFieldIndex : REQUIRED_FIELDS ) { if ( tokens[requiredFieldIndex].isEmpty() ) { @@ -155,7 +162,7 @@ public class SnpEffCodec implements FeatureCodec { private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { - return effectFieldTokens[effectFieldTokens.length - 1]; + return effectFieldTokens[effectFieldTokens.length - 1].trim(); } return null; diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index cfa5a91ab..4a68d7cf1 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -314,4 +314,67 @@ public class SnpEffFeature implements Feature { public String getCustomIntervalID() { return customIntervalID; } + + public boolean equals ( Object o ) { + if ( o == null || ! (o instanceof SnpEffFeature) ) { + return false; + } + + SnpEffFeature other = (SnpEffFeature)o; + + return contig.equals(other.contig) && + position == other.position && + (reference == null ? other.reference == null : reference.equals(other.reference)) && + (change == null ? other.change == null : change.equals(other.change)) && + changeType == other.changeType && + zygosity == other.zygosity && + (quality == null ? other.quality == null : quality.equals(other.quality)) && + (coverage == null ? other.coverage == null : coverage.equals(other.coverage)) && + (warnings == null ? other.warnings == null : warnings.equals(other.warnings)) && + (geneID == null ? other.geneID == null : geneID.equals(other.geneID)) && + (geneName == null ? other.geneName == null : geneName.equals(other.geneName)) && + (bioType == null ? other.bioType == null : bioType.equals(other.bioType)) && + (transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) && + (exonID == null ? other.exonID == null : exonID.equals(other.exonID)) && + (exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) && + isNonCodingGene == other.isNonCodingGene && + effect == other.effect && + (effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) && + (oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) && + (oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) && + (codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) && + (cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) && + (codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) && + (aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) && + (customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID)); + } + + public String toString() { + return "[Contig: " + contig + + " Position: " + position + + " Reference: " + reference + + " Change: " + change + + " Change Type: " + changeType + + " Zygosity: " + zygosity + + " Quality: " + quality + + " Coverage: " + coverage + + " Warnings: " + warnings + + " Gene ID: " + geneID + + " Gene Name: " + geneName + + " Bio Type: " + bioType + + " Transcript ID: " + transcriptID + + " Exon ID: " + exonID + + " Exon Rank: " + exonRank + + " Non-Coding Gene: " + isNonCodingGene + + " Effect: " + effect + + " Effect Extra Information: " + effectExtraInformation + + " Old/New AA: " + oldAndNewAA + + " Old/New Codon: " + oldAndNewCodon + + " Codon Num: " + codonNum + + " CDS Size: " + cdsSize + + " Codons Around: " + codonsAround + + " AAs Around: " + aasAround + + " Custom Interval ID: " + customIntervalID + + "]"; + } } diff --git a/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java new file mode 100644 index 000000000..6d492565b --- /dev/null +++ b/public/java/test/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodecUnitTest.java @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2011, The Broad Institute + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +package org.broadinstitute.sting.utils.codecs.snpEff; + +import org.apache.commons.io.input.ReaderInputStream; +import org.broad.tribble.TribbleException; +import org.broad.tribble.readers.AsciiLineReader; +import org.broad.tribble.readers.LineReader; +import org.testng.Assert; +import org.testng.annotations.Test; + +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; +import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; + +import java.io.StringReader; + +public class SnpEffCodecUnitTest { + + @Test + public void testParseWellFormedSnpEffHeaderLine() { + String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around\tCustom_interval_ID"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine))); + String headerReturned = (String)codec.readHeader(reader); + + Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine); + } + + @Test(expectedExceptions = TribbleException.InvalidHeader.class) + public void testParseWrongNumberOfFieldsSnpEffHeaderLine() { + String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine))); + codec.readHeader(reader); + } + + @Test(expectedExceptions = TribbleException.InvalidHeader.class) + public void testParseMisnamedColumnSnpEffHeaderLine() { + String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" + + "Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" + + "Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" + + "AAs around\tCustom_interval_ID"; + + SnpEffCodec codec = new SnpEffCodec(); + LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine))); + codec.readHeader(reader); + } + + @Test + public void testParseSimpleEffectSnpEffLine() { + String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 69428l, + "T", + "G", + ChangeType.SNP, + Zygosity.Hom, + 6049.69, + 61573l, + null, + "ENSG00000177693", + "OR4F5", + "mRNA", + "ENST00000326183", + "exon_1_69055_70108", + 1, + false, + EffectType.NON_SYNONYMOUS_CODING, + null, + "F/C", + "TTT/TGT", + 113, + 918, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseNonCodingRegionSnpEffLine() { + String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" + + "RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" + + "L/V\tCTA/GTA\t272\t952\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 1337592l, + "G", + "C", + ChangeType.SNP, + Zygosity.Hom, + 1935.52, + 21885l, + null, + "ENSG00000250188", + "RP4-758J18.5", + "mRNA", + "ENST00000514958", + "exon_1_1337454_1338076", + 2, + true, + EffectType.NON_SYNONYMOUS_CODING, + null, + "L/V", + "CTA/GTA", + 272, + 952, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseExtraEffectInformationSnpEffLine() { + String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" + + "mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 879537l, + "T", + "C", + ChangeType.SNP, + Zygosity.Hom, + 341.58, + 13733l, + null, + "ENSG00000187634", + "SAMD11", + "mRNA", + "ENST00000341065", + null, + null, + false, + EffectType.UTR_3_PRIME, + "4 bases from transcript end", + null, + null, + null, + null, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test + public void testParseMultiEffectSnpEffLine() { + String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" + + "ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t"; + + SnpEffFeature expectedFeature = new SnpEffFeature("1", + 901901l, + "C", + "T", + ChangeType.SNP, + Zygosity.Hom, + 162.91, + 4646l, + null, + "ENSG00000187583", + "PLEKHN1", + "mRNA", + "ENST00000379410", + "exon_1_901877_901994", + 1, + false, + EffectType.START_GAINED, + "ATG, UTR_5_PRIME: 11 bases from TSS", + null, + null, + null, + null, + null, + null, + null + ); + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine); + + Assert.assertEquals(feature, expectedFeature); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseWrongNumberOfFieldsSnpEffLine() { + String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t"; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseBlankEffectFieldSnpEffLine() { + String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t"; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine); + } + + @Test(expectedExceptions = TribbleException.InvalidDecodeLine.class) + public void testParseInvalidNumericFieldSnpEffLine() { + String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" + + "OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";; + + SnpEffCodec codec = new SnpEffCodec(); + SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine); + } +} From a13bc7b9290a914f788a10e3bd451e3788082c70 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 20:01:24 -0400 Subject: [PATCH 4/5] Added an integration test for the SnpEff annotation support, as well as some extra safety checks and comments. --- .../sting/gatk/walkers/annotator/SnpEff.java | 22 ++++++-- .../utils/codecs/snpEff/SnpEffCodec.java | 49 ++++++++++++++++++ .../utils/codecs/snpEff/SnpEffConstants.java | 8 +++ .../utils/codecs/snpEff/SnpEffFeature.java | 51 +++++++++++++++++-- .../VariantAnnotatorIntegrationTest.java | 11 ++++ 5 files changed, 134 insertions(+), 7 deletions(-) diff --git a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java index c307d4cc0..b9b97e154 100644 --- a/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java +++ b/public/java/src/org/broadinstitute/sting/gatk/walkers/annotator/SnpEff.java @@ -27,8 +27,8 @@ package org.broadinstitute.sting.gatk.walkers.annotator; import org.broadinstitute.sting.gatk.contexts.AlignmentContext; import org.broadinstitute.sting.gatk.contexts.ReferenceContext; import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker; +import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation; import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation; -import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation; import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants; import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature; @@ -38,9 +38,22 @@ import org.broadinstitute.sting.utils.variantcontext.VariantContext; import java.util.*; -public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { +/** + * A set of genomic annotations based on the output of the SnpEff variant effect predictor tool + * (http://snpeff.sourceforge.net/). + * + * For each variant, chooses one of the effects of highest biological impact from the SnpEff + * output file (which must be bound to an RMD track named "SnpEff"), and adds annotations + * on that effect. + * + * The possible biological effects and their associated impacts are defined in the class: + * org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants + * + * @author David Roazen + */ +public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation { - // SnpEff field keys: + // SnpEff annotation key names: public static final String GENE_ID_KEY = "GENE_ID"; public static final String GENE_NAME_KEY = "GENE_NAME"; public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID"; @@ -55,11 +68,14 @@ public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation { public static final String CODON_NUM_KEY = "CODON_NUM"; public static final String CDS_SIZE_KEY = "CDS_SIZE"; + // Name of the RMD track bound to the raw SnpEff-generated output file: public static final String RMD_TRACK_NAME = "SnpEff"; public Map annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map stratifiedContexts, VariantContext vc ) { List snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME); + // Add only annotations for one of the most biologically-significant effects as defined in + // the SnpEffConstants class: SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures); return generateAnnotations(mostSignificantEffect); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java index dfe1f5f1a..827df16bb 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffCodec.java @@ -34,6 +34,40 @@ import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygos import java.io.IOException; +/** + * Codec for decoding the output format of the SnpEff variant effect predictor tool + * (http://snpeff.sourceforge.net/). + * + * This format has 23 tab-delimited fields: + * + * Chromosome + * Position + * Reference + * Change + * Change Type: {SNP, MNP, INS, DEL} + * Zygosity: {Hom, Het} + * Quality + * Coverage + * Warnings + * Gene ID + * Gene Name + * Bio Type + * Transcript ID + * Exon ID + * Exon Rank + * Effect + * Old/New Amino Acid + * Old/New Codon + * Codon Num + * CDS Size + * Codons Around + * Amino Acids Around + * Custom Interval ID + * + * We treat all except the Chromosome, Position, and Effect fields as optional. + * + * @author David Roazen + */ public class SnpEffCodec implements FeatureCodec { public static final int EXPECTED_NUMBER_OF_FIELDS = 23; @@ -64,9 +98,13 @@ public class SnpEffCodec implements FeatureCodec { "AAs around", "Custom_interval_ID" }; + + // The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line: public static final int[] REQUIRED_FIELDS = { 0, 1, 15 }; + public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE"; + public Feature decodeLoc ( String line ) { return decode(line); } @@ -101,6 +139,11 @@ public class SnpEffCodec implements FeatureCodec { Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]); boolean isNonCodingGene = isNonCodingGene(tokens[15]); + + // Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present, + // otherwise split it into two subfields. We need this limit to prevent the extra effect-related information + // in the final field (when present) from being inappropriately tokenized: + int effectFieldTokenLimit = isNonCodingGene ? 3 : 2; String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit); EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene); @@ -150,6 +193,9 @@ public class SnpEffCodec implements FeatureCodec { private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) { String effectName = ""; + // If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield, + // otherwise it will be in the first subfield: + if ( effectFieldTokens.length > 1 && isNonCodingGene ) { effectName = effectFieldTokens[1].trim(); } @@ -161,6 +207,9 @@ public class SnpEffCodec implements FeatureCodec { } private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) { + + // The extra effect-related information, if present, will always be the last subfield: + if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) { return effectFieldTokens[effectFieldTokens.length - 1].trim(); } diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java index f226c3523..270db470f 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffConstants.java @@ -24,8 +24,14 @@ package org.broadinstitute.sting.utils.codecs.snpEff; +/** + * A set of constants associated with the SnpEff codec. + * + * @author David Roazen + */ public class SnpEffConstants { + // Possible SnpEff biological effects and their associated impacts: public enum EffectType { START_GAINED (EffectImpact.HIGH), START_LOST (EffectImpact.HIGH), @@ -93,6 +99,7 @@ public class SnpEffConstants { } } + // The kinds of variants supported by the SnpEff output format: public enum ChangeType { SNP, MNP, @@ -100,6 +107,7 @@ public class SnpEffConstants { DEL } + // Possible zygosities of SnpEff variants: public enum Zygosity { Hom, Het diff --git a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java index 4a68d7cf1..2f120b7d2 100644 --- a/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java +++ b/public/java/src/org/broadinstitute/sting/utils/codecs/snpEff/SnpEffFeature.java @@ -26,15 +26,26 @@ package org.broadinstitute.sting.utils.codecs.snpEff; import org.broad.tribble.Feature; +import java.util.NoSuchElementException; + import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType; import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity; +/** + * Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output. + * + * Many fields are optional, and missing values are represented by nulls. You should always call the + * hasX() method before calling the corresponding getX() method. Required fields can never be null + * and do not have a hasX() method. + * + * @author David Roazen + */ public class SnpEffFeature implements Feature { - private String contig; - private long position; + private String contig; // REQUIRED FIELD + private long position; // REQUIRED FIELD private String reference; private String change; private ChangeType changeType; @@ -48,8 +59,8 @@ public class SnpEffFeature implements Feature { private String transcriptID; private String exonID; private Integer exonRank; - private boolean isNonCodingGene; - private EffectType effect; + private boolean isNonCodingGene; // REQUIRED FIELD + private EffectType effect; // REQUIRED FIELD private String effectExtraInformation; private String oldAndNewAA; private String oldAndNewCodon; @@ -85,6 +96,10 @@ public class SnpEffFeature implements Feature { String aasAround, String customIntervalID ) { + if ( contig == null || effect == null ) { + throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields"); + } + this.contig = contig; this.position = position; this.reference = reference; @@ -113,6 +128,10 @@ public class SnpEffFeature implements Feature { } public boolean isHigherImpactThan ( SnpEffFeature other ) { + + // If one effect is in a non-coding gene and the other is not, the effect NOT in the + // non-coding gene has higher impact: + if ( ! isNonCodingGene() && other.isNonCodingGene() ) { return true; } @@ -120,6 +139,9 @@ public class SnpEffFeature implements Feature { return false; } + // Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts + // of the effects themselves as defined in the SnpEffConstants class: + return getEffectImpact().isHigherImpactThan(other.getEffectImpact()); } @@ -140,6 +162,7 @@ public class SnpEffFeature implements Feature { } public String getReference() { + if ( reference == null ) throw new NoSuchElementException("This feature has no reference field"); return reference; } @@ -148,6 +171,7 @@ public class SnpEffFeature implements Feature { } public String getChange() { + if ( change == null ) throw new NoSuchElementException("This feature has no change field"); return change; } @@ -156,6 +180,7 @@ public class SnpEffFeature implements Feature { } public ChangeType getChangeType() { + if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field"); return changeType; } @@ -164,6 +189,7 @@ public class SnpEffFeature implements Feature { } public Zygosity getZygosity() { + if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field"); return zygosity; } @@ -172,6 +198,7 @@ public class SnpEffFeature implements Feature { } public Double getQuality() { + if ( quality == null ) throw new NoSuchElementException("This feature has no quality field"); return quality; } @@ -180,6 +207,7 @@ public class SnpEffFeature implements Feature { } public Long getCoverage() { + if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field"); return coverage; } @@ -188,6 +216,7 @@ public class SnpEffFeature implements Feature { } public String getWarnings() { + if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field"); return warnings; } @@ -196,6 +225,7 @@ public class SnpEffFeature implements Feature { } public String getGeneID() { + if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field"); return geneID; } @@ -204,6 +234,7 @@ public class SnpEffFeature implements Feature { } public String getGeneName() { + if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field"); return geneName; } @@ -212,6 +243,7 @@ public class SnpEffFeature implements Feature { } public String getBioType() { + if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field"); return bioType; } @@ -220,6 +252,7 @@ public class SnpEffFeature implements Feature { } public String getTranscriptID() { + if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field"); return transcriptID; } @@ -228,6 +261,7 @@ public class SnpEffFeature implements Feature { } public String getExonID() { + if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field"); return exonID; } @@ -236,6 +270,7 @@ public class SnpEffFeature implements Feature { } public Integer getExonRank() { + if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field"); return exonRank; } @@ -256,6 +291,7 @@ public class SnpEffFeature implements Feature { } public String getEffectExtraInformation() { + if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field"); return effectExtraInformation; } @@ -264,6 +300,7 @@ public class SnpEffFeature implements Feature { } public String getOldAndNewAA() { + if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field"); return oldAndNewAA; } @@ -272,6 +309,7 @@ public class SnpEffFeature implements Feature { } public String getOldAndNewCodon() { + if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field"); return oldAndNewCodon; } @@ -280,6 +318,7 @@ public class SnpEffFeature implements Feature { } public Integer getCodonNum() { + if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field"); return codonNum; } @@ -288,6 +327,7 @@ public class SnpEffFeature implements Feature { } public Integer getCdsSize() { + if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field"); return cdsSize; } @@ -296,6 +336,7 @@ public class SnpEffFeature implements Feature { } public String getCodonsAround() { + if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field"); return codonsAround; } @@ -304,6 +345,7 @@ public class SnpEffFeature implements Feature { } public String getAasAround() { + if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field"); return aasAround; } @@ -312,6 +354,7 @@ public class SnpEffFeature implements Feature { } public String getCustomIntervalID() { + if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field"); return customIntervalID; } diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index e6300e6c9..5dc7299a9 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -125,4 +125,15 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { executeTest("Testing lookup vcf tabix vs. vcf tribble", spec); } } + + @Test + public void testSnpEffAnnotations() { + WalkerTestSpec spec = new WalkerTestSpec( + "-T VariantAnnotator -R " + b37KGReference + " -o %s -A SnpEff -B:variant,VCF " + validationDataLocation + "/1000G.exomes.vcf " + + "-B:SnpEff,SnpEff " + validationDataLocation + "/snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out" + " -L 1", + 1, + Arrays.asList("5fe3644744d3c084a179c3d204555333") + ); + executeTest("Testing SnpEff annotations", spec); + } } From 28d8c8fcbc5487cd1525118cc38d8a9dc6f26663 Mon Sep 17 00:00:00 2001 From: David Roazen Date: Mon, 8 Aug 2011 21:07:13 -0400 Subject: [PATCH 5/5] Modified the SnpEff integration test to run on a much smaller interval. --- .../walkers/annotator/VariantAnnotatorIntegrationTest.java | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java index 5dc7299a9..173c57a15 100755 --- a/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java +++ b/public/java/test/org/broadinstitute/sting/gatk/walkers/annotator/VariantAnnotatorIntegrationTest.java @@ -129,10 +129,11 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest { @Test public void testSnpEffAnnotations() { WalkerTestSpec spec = new WalkerTestSpec( - "-T VariantAnnotator -R " + b37KGReference + " -o %s -A SnpEff -B:variant,VCF " + validationDataLocation + "/1000G.exomes.vcf " + - "-B:SnpEff,SnpEff " + validationDataLocation + "/snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out" + " -L 1", + "-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff -B:variant,VCF " + + validationDataLocation + "1000G.exomes.vcf -B:SnpEff,SnpEff " + validationDataLocation + + "snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000", 1, - Arrays.asList("5fe3644744d3c084a179c3d204555333") + Arrays.asList("c08648a078368c80530bff004b3157f1") ); executeTest("Testing SnpEff annotations", spec); }