Merge branch 'snpEff'
This commit is contained in:
commit
b180a1311a
|
|
@ -0,0 +1,166 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* A set of genomic annotations based on the output of the SnpEff variant effect predictor tool
|
||||
* (http://snpeff.sourceforge.net/).
|
||||
*
|
||||
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
|
||||
* output file (which must be bound to an RMD track named "SnpEff"), and adds annotations
|
||||
* on that effect.
|
||||
*
|
||||
* The possible biological effects and their associated impacts are defined in the class:
|
||||
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
// SnpEff annotation key names:
|
||||
public static final String GENE_ID_KEY = "GENE_ID";
|
||||
public static final String GENE_NAME_KEY = "GENE_NAME";
|
||||
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
|
||||
public static final String EXON_ID_KEY = "EXON_ID";
|
||||
public static final String EXON_RANK_KEY = "EXON_RANK";
|
||||
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
|
||||
public static final String EFFECT_KEY = "EFFECT";
|
||||
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
|
||||
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
|
||||
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
|
||||
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
|
||||
public static final String CODON_NUM_KEY = "CODON_NUM";
|
||||
public static final String CDS_SIZE_KEY = "CDS_SIZE";
|
||||
|
||||
// Name of the RMD track bound to the raw SnpEff-generated output file:
|
||||
public static final String RMD_TRACK_NAME = "SnpEff";
|
||||
|
||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||
List<Object> snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME);
|
||||
|
||||
// Add only annotations for one of the most biologically-significant effects as defined in
|
||||
// the SnpEffConstants class:
|
||||
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures);
|
||||
return generateAnnotations(mostSignificantEffect);
|
||||
}
|
||||
|
||||
private SnpEffFeature getMostSignificantEffect ( List<Object> snpEffFeatures ) {
|
||||
SnpEffFeature mostSignificantEffect = null;
|
||||
|
||||
for ( Object feature : snpEffFeatures ) {
|
||||
SnpEffFeature snpEffFeature = (SnpEffFeature)feature;
|
||||
|
||||
if ( mostSignificantEffect == null ||
|
||||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
|
||||
|
||||
mostSignificantEffect = snpEffFeature;
|
||||
}
|
||||
}
|
||||
|
||||
return mostSignificantEffect;
|
||||
}
|
||||
|
||||
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
|
||||
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
|
||||
|
||||
if ( mostSignificantEffect.hasGeneID() )
|
||||
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
|
||||
if ( mostSignificantEffect.hasGeneName() )
|
||||
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
|
||||
if ( mostSignificantEffect.hasTranscriptID() )
|
||||
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
|
||||
if ( mostSignificantEffect.hasExonID() )
|
||||
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
|
||||
if ( mostSignificantEffect.hasExonRank() )
|
||||
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
|
||||
if ( mostSignificantEffect.isNonCodingGene() )
|
||||
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
|
||||
|
||||
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
|
||||
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
|
||||
if ( mostSignificantEffect.hasEffectExtraInformation() )
|
||||
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
|
||||
|
||||
if ( mostSignificantEffect.hasOldAndNewAA() )
|
||||
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
|
||||
if ( mostSignificantEffect.hasOldAndNewCodon() )
|
||||
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
|
||||
if ( mostSignificantEffect.hasCodonNum() )
|
||||
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
|
||||
if ( mostSignificantEffect.hasCdsSize() )
|
||||
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList( GENE_ID_KEY,
|
||||
GENE_NAME_KEY,
|
||||
TRANSCRIPT_ID_KEY,
|
||||
EXON_ID_KEY,
|
||||
EXON_RANK_KEY,
|
||||
WITHIN_NON_CODING_GENE_KEY,
|
||||
EFFECT_KEY,
|
||||
EFFECT_IMPACT_KEY,
|
||||
EFFECT_EXTRA_INFORMATION_KEY,
|
||||
OLD_NEW_AA_KEY,
|
||||
OLD_NEW_CODON_KEY,
|
||||
CODON_NUM_KEY,
|
||||
CDS_SIZE_KEY
|
||||
);
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"),
|
||||
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"),
|
||||
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"),
|
||||
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"),
|
||||
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"),
|
||||
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"),
|
||||
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"),
|
||||
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
||||
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"),
|
||||
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"),
|
||||
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"),
|
||||
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"),
|
||||
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size")
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,258 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Codec for decoding the output format of the SnpEff variant effect predictor tool
|
||||
* (http://snpeff.sourceforge.net/).
|
||||
*
|
||||
* This format has 23 tab-delimited fields:
|
||||
*
|
||||
* Chromosome
|
||||
* Position
|
||||
* Reference
|
||||
* Change
|
||||
* Change Type: {SNP, MNP, INS, DEL}
|
||||
* Zygosity: {Hom, Het}
|
||||
* Quality
|
||||
* Coverage
|
||||
* Warnings
|
||||
* Gene ID
|
||||
* Gene Name
|
||||
* Bio Type
|
||||
* Transcript ID
|
||||
* Exon ID
|
||||
* Exon Rank
|
||||
* Effect
|
||||
* Old/New Amino Acid
|
||||
* Old/New Codon
|
||||
* Codon Num
|
||||
* CDS Size
|
||||
* Codons Around
|
||||
* Amino Acids Around
|
||||
* Custom Interval ID
|
||||
*
|
||||
* We treat all except the Chromosome, Position, and Effect fields as optional.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffCodec implements FeatureCodec {
|
||||
|
||||
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
|
||||
public static final String FIELD_DELIMITER_PATTERN = "\\t";
|
||||
public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
|
||||
public static final String HEADER_LINE_START = "# ";
|
||||
public static final String[] HEADER_FIELD_NAMES = { "Chromo",
|
||||
"Position",
|
||||
"Reference",
|
||||
"Change",
|
||||
"Change type",
|
||||
"Homozygous",
|
||||
"Quality",
|
||||
"Coverage",
|
||||
"Warnings",
|
||||
"Gene_ID",
|
||||
"Gene_name",
|
||||
"Bio_type",
|
||||
"Trancript_ID", // yes, this is how it's spelled in the SnpEff output
|
||||
"Exon_ID",
|
||||
"Exon_Rank",
|
||||
"Effect",
|
||||
"old_AA/new_AA",
|
||||
"Old_codon/New_codon",
|
||||
"Codon_Num(CDS)",
|
||||
"CDS_size",
|
||||
"Codons around",
|
||||
"AAs around",
|
||||
"Custom_interval_ID"
|
||||
};
|
||||
|
||||
// The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
|
||||
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
|
||||
|
||||
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
|
||||
|
||||
|
||||
public Feature decodeLoc ( String line ) {
|
||||
return decode(line);
|
||||
}
|
||||
|
||||
public Feature decode ( String line ) {
|
||||
String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
|
||||
|
||||
if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
|
||||
") number of fields: found " + tokens.length + " fields.", line);
|
||||
}
|
||||
|
||||
try {
|
||||
trimAllFields(tokens);
|
||||
checkForRequiredFields(tokens, line);
|
||||
|
||||
String contig = tokens[0];
|
||||
long position = Long.parseLong(tokens[1]);
|
||||
|
||||
String reference = tokens[2].isEmpty() ? null : tokens[2];
|
||||
String change = tokens[3].isEmpty() ? null : tokens[3];
|
||||
ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
|
||||
Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
|
||||
Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
|
||||
Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
|
||||
String warnings = tokens[8].isEmpty() ? null : tokens[8];
|
||||
String geneID = tokens[9].isEmpty() ? null : tokens[9];
|
||||
String geneName = tokens[10].isEmpty() ? null : tokens[10];
|
||||
String bioType = tokens[11].isEmpty() ? null : tokens[11];
|
||||
String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
|
||||
String exonID = tokens[13].isEmpty() ? null : tokens[13];
|
||||
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
|
||||
|
||||
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
|
||||
|
||||
// Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
|
||||
// otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
|
||||
// in the final field (when present) from being inappropriately tokenized:
|
||||
|
||||
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
|
||||
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
|
||||
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
|
||||
String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
|
||||
|
||||
String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
|
||||
String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
|
||||
Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
|
||||
Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
|
||||
String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
|
||||
String aasAround = tokens[21].isEmpty() ? null : tokens[21];
|
||||
String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
|
||||
|
||||
return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
|
||||
warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
|
||||
effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
|
||||
codonsAround, aasAround, customIntervalID);
|
||||
}
|
||||
catch ( NumberFormatException e ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
|
||||
}
|
||||
}
|
||||
|
||||
private void trimAllFields ( String[] tokens ) {
|
||||
for ( int i = 0; i < tokens.length; i++ ) {
|
||||
tokens[i] = tokens[i].trim();
|
||||
}
|
||||
}
|
||||
|
||||
private void checkForRequiredFields ( String[] tokens, String line ) {
|
||||
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
||||
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
|
||||
HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
|
||||
line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isNonCodingGene ( String effectField ) {
|
||||
return effectField.startsWith(NON_CODING_GENE_FLAG);
|
||||
}
|
||||
|
||||
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
String effectName = "";
|
||||
|
||||
// If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
|
||||
// otherwise it will be in the first subfield:
|
||||
|
||||
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
|
||||
effectName = effectFieldTokens[1].trim();
|
||||
}
|
||||
else {
|
||||
effectName = effectFieldTokens[0].trim();
|
||||
}
|
||||
|
||||
return EffectType.valueOf(effectName);
|
||||
}
|
||||
|
||||
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
|
||||
// The extra effect-related information, if present, will always be the last subfield:
|
||||
|
||||
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||
return effectFieldTokens[effectFieldTokens.length - 1].trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Class getFeatureType() {
|
||||
return SnpEffFeature.class;
|
||||
}
|
||||
|
||||
public Object readHeader ( LineReader reader ) {
|
||||
String headerLine = "";
|
||||
|
||||
try {
|
||||
headerLine = reader.readLine();
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new TribbleException("Unable to read header line from input file.");
|
||||
}
|
||||
|
||||
validateHeaderLine(headerLine);
|
||||
return headerLine;
|
||||
}
|
||||
|
||||
private void validateHeaderLine ( String headerLine ) {
|
||||
if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
|
||||
throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
|
||||
}
|
||||
|
||||
String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
|
||||
|
||||
if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||
throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
|
||||
EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
|
||||
}
|
||||
|
||||
for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
|
||||
if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
|
||||
throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
|
||||
HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
|
||||
headerTokens[columnIndex] + "\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,115 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
/**
|
||||
* A set of constants associated with the SnpEff codec.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffConstants {
|
||||
|
||||
// Possible SnpEff biological effects and their associated impacts:
|
||||
public enum EffectType {
|
||||
START_GAINED (EffectImpact.HIGH),
|
||||
START_LOST (EffectImpact.HIGH),
|
||||
EXON_DELETED (EffectImpact.HIGH),
|
||||
FRAME_SHIFT (EffectImpact.HIGH),
|
||||
STOP_GAINED (EffectImpact.HIGH),
|
||||
STOP_LOST (EffectImpact.HIGH),
|
||||
SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
|
||||
SPLICE_SITE_DONOR (EffectImpact.HIGH),
|
||||
|
||||
NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
|
||||
UTR_5_DELETED (EffectImpact.MODERATE),
|
||||
UTR_3_DELETED (EffectImpact.MODERATE),
|
||||
CODON_INSERTION (EffectImpact.MODERATE),
|
||||
CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
|
||||
CODON_DELETION (EffectImpact.MODERATE),
|
||||
CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
|
||||
|
||||
NONE (EffectImpact.LOW),
|
||||
CHROMOSOME (EffectImpact.LOW),
|
||||
INTERGENIC (EffectImpact.LOW),
|
||||
UPSTREAM (EffectImpact.LOW),
|
||||
UTR_5_PRIME (EffectImpact.LOW),
|
||||
SYNONYMOUS_START (EffectImpact.LOW),
|
||||
NON_SYNONYMOUS_START (EffectImpact.LOW),
|
||||
CDS (EffectImpact.LOW),
|
||||
GENE (EffectImpact.LOW),
|
||||
TRANSCRIPT (EffectImpact.LOW),
|
||||
EXON (EffectImpact.LOW),
|
||||
SYNONYMOUS_CODING (EffectImpact.LOW),
|
||||
CODON_CHANGE (EffectImpact.LOW),
|
||||
SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||
NON_SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||
INTRON (EffectImpact.LOW),
|
||||
UTR_3_PRIME (EffectImpact.LOW),
|
||||
DOWNSTREAM (EffectImpact.LOW),
|
||||
INTRON_CONSERVED (EffectImpact.LOW),
|
||||
INTERGENIC_CONSERVED (EffectImpact.LOW),
|
||||
CUSTOM (EffectImpact.LOW);
|
||||
|
||||
private final EffectImpact impact;
|
||||
|
||||
EffectType ( EffectImpact impact ) {
|
||||
this.impact = impact;
|
||||
}
|
||||
|
||||
public EffectImpact getImpact() {
|
||||
return impact;
|
||||
}
|
||||
}
|
||||
|
||||
public enum EffectImpact {
|
||||
LOW (1),
|
||||
MODERATE (2),
|
||||
HIGH (3);
|
||||
|
||||
private final int severityRating;
|
||||
|
||||
EffectImpact ( int severityRating ) {
|
||||
this.severityRating = severityRating;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||
return this.severityRating > other.severityRating;
|
||||
}
|
||||
}
|
||||
|
||||
// The kinds of variants supported by the SnpEff output format:
|
||||
public enum ChangeType {
|
||||
SNP,
|
||||
MNP,
|
||||
INS,
|
||||
DEL
|
||||
}
|
||||
|
||||
// Possible zygosities of SnpEff variants:
|
||||
public enum Zygosity {
|
||||
Hom,
|
||||
Het
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,423 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
/**
|
||||
* Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
|
||||
*
|
||||
* Many fields are optional, and missing values are represented by nulls. You should always call the
|
||||
* hasX() method before calling the corresponding getX() method. Required fields can never be null
|
||||
* and do not have a hasX() method.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffFeature implements Feature {
|
||||
|
||||
private String contig; // REQUIRED FIELD
|
||||
private long position; // REQUIRED FIELD
|
||||
private String reference;
|
||||
private String change;
|
||||
private ChangeType changeType;
|
||||
private Zygosity zygosity;
|
||||
private Double quality;
|
||||
private Long coverage;
|
||||
private String warnings;
|
||||
private String geneID;
|
||||
private String geneName;
|
||||
private String bioType;
|
||||
private String transcriptID;
|
||||
private String exonID;
|
||||
private Integer exonRank;
|
||||
private boolean isNonCodingGene; // REQUIRED FIELD
|
||||
private EffectType effect; // REQUIRED FIELD
|
||||
private String effectExtraInformation;
|
||||
private String oldAndNewAA;
|
||||
private String oldAndNewCodon;
|
||||
private Integer codonNum;
|
||||
private Integer cdsSize;
|
||||
private String codonsAround;
|
||||
private String aasAround;
|
||||
private String customIntervalID;
|
||||
|
||||
public SnpEffFeature ( String contig,
|
||||
long position,
|
||||
String reference,
|
||||
String change,
|
||||
ChangeType changeType,
|
||||
Zygosity zygosity,
|
||||
Double quality,
|
||||
Long coverage,
|
||||
String warnings,
|
||||
String geneID,
|
||||
String geneName,
|
||||
String bioType,
|
||||
String transcriptID,
|
||||
String exonID,
|
||||
Integer exonRank,
|
||||
boolean isNonCodingGene,
|
||||
EffectType effect,
|
||||
String effectExtraInformation,
|
||||
String oldAndNewAA,
|
||||
String oldAndNewCodon,
|
||||
Integer codonNum,
|
||||
Integer cdsSize,
|
||||
String codonsAround,
|
||||
String aasAround,
|
||||
String customIntervalID ) {
|
||||
|
||||
if ( contig == null || effect == null ) {
|
||||
throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
|
||||
}
|
||||
|
||||
this.contig = contig;
|
||||
this.position = position;
|
||||
this.reference = reference;
|
||||
this.change = change;
|
||||
this.changeType = changeType;
|
||||
this.zygosity = zygosity;
|
||||
this.quality = quality;
|
||||
this.coverage = coverage;
|
||||
this.warnings = warnings;
|
||||
this.geneID = geneID;
|
||||
this.geneName = geneName;
|
||||
this.bioType = bioType;
|
||||
this.transcriptID = transcriptID;
|
||||
this.exonID = exonID;
|
||||
this.exonRank = exonRank;
|
||||
this.isNonCodingGene = isNonCodingGene;
|
||||
this.effect = effect;
|
||||
this.effectExtraInformation = effectExtraInformation;
|
||||
this.oldAndNewAA = oldAndNewAA;
|
||||
this.oldAndNewCodon = oldAndNewCodon;
|
||||
this.codonNum = codonNum;
|
||||
this.cdsSize = cdsSize;
|
||||
this.codonsAround = codonsAround;
|
||||
this.aasAround = aasAround;
|
||||
this.customIntervalID = customIntervalID;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( SnpEffFeature other ) {
|
||||
|
||||
// If one effect is in a non-coding gene and the other is not, the effect NOT in the
|
||||
// non-coding gene has higher impact:
|
||||
|
||||
if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
|
||||
return true;
|
||||
}
|
||||
else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
|
||||
// of the effects themselves as defined in the SnpEffConstants class:
|
||||
|
||||
return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
|
||||
}
|
||||
|
||||
public String getChr() {
|
||||
return contig;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return (int)position;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return (int)position;
|
||||
}
|
||||
|
||||
public boolean hasReference() {
|
||||
return reference != null;
|
||||
}
|
||||
|
||||
public String getReference() {
|
||||
if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
|
||||
return reference;
|
||||
}
|
||||
|
||||
public boolean hasChange() {
|
||||
return change != null;
|
||||
}
|
||||
|
||||
public String getChange() {
|
||||
if ( change == null ) throw new NoSuchElementException("This feature has no change field");
|
||||
return change;
|
||||
}
|
||||
|
||||
public boolean hasChangeType() {
|
||||
return changeType != null;
|
||||
}
|
||||
|
||||
public ChangeType getChangeType() {
|
||||
if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
|
||||
return changeType;
|
||||
}
|
||||
|
||||
public boolean hasZygosity() {
|
||||
return zygosity != null;
|
||||
}
|
||||
|
||||
public Zygosity getZygosity() {
|
||||
if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
|
||||
return zygosity;
|
||||
}
|
||||
|
||||
public boolean hasQuality() {
|
||||
return quality != null;
|
||||
}
|
||||
|
||||
public Double getQuality() {
|
||||
if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
|
||||
return quality;
|
||||
}
|
||||
|
||||
public boolean hasCoverage() {
|
||||
return coverage != null;
|
||||
}
|
||||
|
||||
public Long getCoverage() {
|
||||
if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public boolean hasWarnings() {
|
||||
return warnings != null;
|
||||
}
|
||||
|
||||
public String getWarnings() {
|
||||
if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
|
||||
return warnings;
|
||||
}
|
||||
|
||||
public boolean hasGeneID() {
|
||||
return geneID != null;
|
||||
}
|
||||
|
||||
public String getGeneID() {
|
||||
if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
|
||||
return geneID;
|
||||
}
|
||||
|
||||
public boolean hasGeneName() {
|
||||
return geneName != null;
|
||||
}
|
||||
|
||||
public String getGeneName() {
|
||||
if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
|
||||
return geneName;
|
||||
}
|
||||
|
||||
public boolean hasBioType() {
|
||||
return bioType != null;
|
||||
}
|
||||
|
||||
public String getBioType() {
|
||||
if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
|
||||
return bioType;
|
||||
}
|
||||
|
||||
public boolean hasTranscriptID() {
|
||||
return transcriptID != null;
|
||||
}
|
||||
|
||||
public String getTranscriptID() {
|
||||
if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
|
||||
return transcriptID;
|
||||
}
|
||||
|
||||
public boolean hasExonID() {
|
||||
return exonID != null;
|
||||
}
|
||||
|
||||
public String getExonID() {
|
||||
if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
|
||||
return exonID;
|
||||
}
|
||||
|
||||
public boolean hasExonRank() {
|
||||
return exonRank != null;
|
||||
}
|
||||
|
||||
public Integer getExonRank() {
|
||||
if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
|
||||
return exonRank;
|
||||
}
|
||||
|
||||
public boolean isNonCodingGene() {
|
||||
return isNonCodingGene;
|
||||
}
|
||||
|
||||
public EffectType getEffect() {
|
||||
return effect;
|
||||
}
|
||||
|
||||
public EffectImpact getEffectImpact() {
|
||||
return effect.getImpact();
|
||||
}
|
||||
|
||||
public boolean hasEffectExtraInformation() {
|
||||
return effectExtraInformation != null;
|
||||
}
|
||||
|
||||
public String getEffectExtraInformation() {
|
||||
if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
|
||||
return effectExtraInformation;
|
||||
}
|
||||
|
||||
public boolean hasOldAndNewAA() {
|
||||
return oldAndNewAA != null;
|
||||
}
|
||||
|
||||
public String getOldAndNewAA() {
|
||||
if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
|
||||
return oldAndNewAA;
|
||||
}
|
||||
|
||||
public boolean hasOldAndNewCodon() {
|
||||
return oldAndNewCodon != null;
|
||||
}
|
||||
|
||||
public String getOldAndNewCodon() {
|
||||
if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
|
||||
return oldAndNewCodon;
|
||||
}
|
||||
|
||||
public boolean hasCodonNum() {
|
||||
return codonNum != null;
|
||||
}
|
||||
|
||||
public Integer getCodonNum() {
|
||||
if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
|
||||
return codonNum;
|
||||
}
|
||||
|
||||
public boolean hasCdsSize() {
|
||||
return cdsSize != null;
|
||||
}
|
||||
|
||||
public Integer getCdsSize() {
|
||||
if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
|
||||
return cdsSize;
|
||||
}
|
||||
|
||||
public boolean hasCodonsAround() {
|
||||
return codonsAround != null;
|
||||
}
|
||||
|
||||
public String getCodonsAround() {
|
||||
if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
|
||||
return codonsAround;
|
||||
}
|
||||
|
||||
public boolean hadAasAround() {
|
||||
return aasAround != null;
|
||||
}
|
||||
|
||||
public String getAasAround() {
|
||||
if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
|
||||
return aasAround;
|
||||
}
|
||||
|
||||
public boolean hasCustomIntervalID() {
|
||||
return customIntervalID != null;
|
||||
}
|
||||
|
||||
public String getCustomIntervalID() {
|
||||
if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
|
||||
return customIntervalID;
|
||||
}
|
||||
|
||||
public boolean equals ( Object o ) {
|
||||
if ( o == null || ! (o instanceof SnpEffFeature) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SnpEffFeature other = (SnpEffFeature)o;
|
||||
|
||||
return contig.equals(other.contig) &&
|
||||
position == other.position &&
|
||||
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
|
||||
(change == null ? other.change == null : change.equals(other.change)) &&
|
||||
changeType == other.changeType &&
|
||||
zygosity == other.zygosity &&
|
||||
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
|
||||
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
|
||||
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
|
||||
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
|
||||
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
|
||||
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
|
||||
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
|
||||
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
|
||||
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
|
||||
isNonCodingGene == other.isNonCodingGene &&
|
||||
effect == other.effect &&
|
||||
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
|
||||
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
|
||||
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
|
||||
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
|
||||
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
|
||||
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
|
||||
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
|
||||
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "[Contig: " + contig +
|
||||
" Position: " + position +
|
||||
" Reference: " + reference +
|
||||
" Change: " + change +
|
||||
" Change Type: " + changeType +
|
||||
" Zygosity: " + zygosity +
|
||||
" Quality: " + quality +
|
||||
" Coverage: " + coverage +
|
||||
" Warnings: " + warnings +
|
||||
" Gene ID: " + geneID +
|
||||
" Gene Name: " + geneName +
|
||||
" Bio Type: " + bioType +
|
||||
" Transcript ID: " + transcriptID +
|
||||
" Exon ID: " + exonID +
|
||||
" Exon Rank: " + exonRank +
|
||||
" Non-Coding Gene: " + isNonCodingGene +
|
||||
" Effect: " + effect +
|
||||
" Effect Extra Information: " + effectExtraInformation +
|
||||
" Old/New AA: " + oldAndNewAA +
|
||||
" Old/New Codon: " + oldAndNewCodon +
|
||||
" Codon Num: " + codonNum +
|
||||
" CDS Size: " + cdsSize +
|
||||
" Codons Around: " + codonsAround +
|
||||
" AAs Around: " + aasAround +
|
||||
" Custom Interval ID: " + customIntervalID +
|
||||
"]";
|
||||
}
|
||||
}
|
||||
|
|
@ -125,4 +125,16 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
executeTest("Testing lookup vcf tabix vs. vcf tribble", spec);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSnpEffAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff -B:variant,VCF " +
|
||||
validationDataLocation + "1000G.exomes.vcf -B:SnpEff,SnpEff " + validationDataLocation +
|
||||
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
|
||||
1,
|
||||
Arrays.asList("c08648a078368c80530bff004b3157f1")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,259 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.apache.commons.io.input.ReaderInputStream;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
public class SnpEffCodecUnitTest {
|
||||
|
||||
@Test
|
||||
public void testParseWellFormedSnpEffHeaderLine() {
|
||||
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
|
||||
String headerReturned = (String)codec.readHeader(reader);
|
||||
|
||||
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
|
||||
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseMisnamedColumnSnpEffHeaderLine() {
|
||||
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSimpleEffectSnpEffLine() {
|
||||
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
69428l,
|
||||
"T",
|
||||
"G",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
6049.69,
|
||||
61573l,
|
||||
null,
|
||||
"ENSG00000177693",
|
||||
"OR4F5",
|
||||
"mRNA",
|
||||
"ENST00000326183",
|
||||
"exon_1_69055_70108",
|
||||
1,
|
||||
false,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"F/C",
|
||||
"TTT/TGT",
|
||||
113,
|
||||
918,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseNonCodingRegionSnpEffLine() {
|
||||
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
|
||||
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
|
||||
"L/V\tCTA/GTA\t272\t952\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
1337592l,
|
||||
"G",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
1935.52,
|
||||
21885l,
|
||||
null,
|
||||
"ENSG00000250188",
|
||||
"RP4-758J18.5",
|
||||
"mRNA",
|
||||
"ENST00000514958",
|
||||
"exon_1_1337454_1338076",
|
||||
2,
|
||||
true,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"L/V",
|
||||
"CTA/GTA",
|
||||
272,
|
||||
952,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseExtraEffectInformationSnpEffLine() {
|
||||
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
|
||||
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
879537l,
|
||||
"T",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
341.58,
|
||||
13733l,
|
||||
null,
|
||||
"ENSG00000187634",
|
||||
"SAMD11",
|
||||
"mRNA",
|
||||
"ENST00000341065",
|
||||
null,
|
||||
null,
|
||||
false,
|
||||
EffectType.UTR_3_PRIME,
|
||||
"4 bases from transcript end",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseMultiEffectSnpEffLine() {
|
||||
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
|
||||
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
901901l,
|
||||
"C",
|
||||
"T",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
162.91,
|
||||
4646l,
|
||||
null,
|
||||
"ENSG00000187583",
|
||||
"PLEKHN1",
|
||||
"mRNA",
|
||||
"ENST00000379410",
|
||||
"exon_1_901877_901994",
|
||||
1,
|
||||
false,
|
||||
EffectType.START_GAINED,
|
||||
"ATG, UTR_5_PRIME: 11 bases from TSS",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffLine() {
|
||||
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseBlankEffectFieldSnpEffLine() {
|
||||
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseInvalidNumericFieldSnpEffLine() {
|
||||
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue