Initial support for adding genomic annotations through VariantAnnotator using
the output from the SnpEff tool, which replaces the old Genomic Annotator.
This commit is contained in:
parent
99e3a72343
commit
c1061e994c
|
|
@ -0,0 +1,171 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||||
|
|
||||||
|
import org.apache.log4j.Logger;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||||
|
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||||
|
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||||
|
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.StandardAnnotation;
|
||||||
|
import org.broadinstitute.sting.utils.Utils;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||||
|
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||||
|
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||||
|
|
||||||
|
import java.util.*;
|
||||||
|
|
||||||
|
public class SnpEff extends InfoFieldAnnotation implements StandardAnnotation {
|
||||||
|
|
||||||
|
// SnpEff field keys:
|
||||||
|
public static final String GENE_ID_KEY = "GENE_ID";
|
||||||
|
public static final String GENE_NAME_KEY = "GENE_NAME";
|
||||||
|
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
|
||||||
|
public static final String EXON_ID_KEY = "EXON_ID";
|
||||||
|
public static final String EXON_RANK_KEY = "EXON_RANK";
|
||||||
|
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
|
||||||
|
public static final String EFFECT_KEY = "EFFECT";
|
||||||
|
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
|
||||||
|
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
|
||||||
|
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
|
||||||
|
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
|
||||||
|
public static final String CODON_NUM_KEY = "CODON_NUM";
|
||||||
|
public static final String CDS_SIZE_KEY = "CDS_SIZE";
|
||||||
|
|
||||||
|
private static final String RMD_TRACK_NAME = "SnpEff";
|
||||||
|
private static final Logger logger = Logger.getLogger(SnpEff.class);
|
||||||
|
|
||||||
|
public Map<String, Object> annotate ( RefMetaDataTracker tracker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||||
|
List<Object> snpEffFeatures = tracker.getReferenceMetaData(RMD_TRACK_NAME);
|
||||||
|
|
||||||
|
sanityCheckSnpEffFeatures(snpEffFeatures);
|
||||||
|
|
||||||
|
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(snpEffFeatures);
|
||||||
|
return generateAnnotations(mostSignificantEffect);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void sanityCheckSnpEffFeatures( List<Object> snpEffFeatures ) {
|
||||||
|
Boolean locusIsNonCodingGene = null;
|
||||||
|
|
||||||
|
for ( Object feature : snpEffFeatures ) {
|
||||||
|
SnpEffFeature snpEffFeature = (SnpEffFeature)feature;
|
||||||
|
|
||||||
|
if ( locusIsNonCodingGene == null ) {
|
||||||
|
locusIsNonCodingGene = snpEffFeature.isNonCodingGene();
|
||||||
|
}
|
||||||
|
else if ( ! locusIsNonCodingGene.equals(snpEffFeature.isNonCodingGene()) ) {
|
||||||
|
logger.warn(String.format("Locus %s:%d is marked as both within and not within a non-coding gene",
|
||||||
|
snpEffFeature.getChr(), snpEffFeature.getStart()));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private SnpEffFeature getMostSignificantEffect ( List<Object> snpEffFeatures ) {
|
||||||
|
SnpEffFeature mostSignificantEffect = null;
|
||||||
|
|
||||||
|
for ( Object feature : snpEffFeatures ) {
|
||||||
|
SnpEffFeature snpEffFeature = (SnpEffFeature)feature;
|
||||||
|
|
||||||
|
if ( mostSignificantEffect == null ||
|
||||||
|
snpEffFeature.getEffectImpact().isHigherImpactThan(mostSignificantEffect.getEffectImpact()) ) {
|
||||||
|
|
||||||
|
mostSignificantEffect = snpEffFeature;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return mostSignificantEffect;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
|
||||||
|
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
|
||||||
|
|
||||||
|
if ( mostSignificantEffect.hasGeneID() )
|
||||||
|
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
|
||||||
|
if ( mostSignificantEffect.hasGeneName() )
|
||||||
|
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
|
||||||
|
if ( mostSignificantEffect.hasTranscriptID() )
|
||||||
|
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
|
||||||
|
if ( mostSignificantEffect.hasExonID() )
|
||||||
|
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
|
||||||
|
if ( mostSignificantEffect.hasExonRank() )
|
||||||
|
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
|
||||||
|
if ( mostSignificantEffect.isNonCodingGene() )
|
||||||
|
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
|
||||||
|
|
||||||
|
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
|
||||||
|
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
|
||||||
|
if ( mostSignificantEffect.hasEffectExtraInformation() )
|
||||||
|
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
|
||||||
|
|
||||||
|
if ( mostSignificantEffect.hasOldAndNewAA() )
|
||||||
|
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
|
||||||
|
if ( mostSignificantEffect.hasOldAndNewCodon() )
|
||||||
|
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
|
||||||
|
if ( mostSignificantEffect.hasCodonNum() )
|
||||||
|
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
|
||||||
|
if ( mostSignificantEffect.hasCdsSize() )
|
||||||
|
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
|
||||||
|
|
||||||
|
return annotations;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getKeyNames() {
|
||||||
|
return Arrays.asList( GENE_ID_KEY,
|
||||||
|
GENE_NAME_KEY,
|
||||||
|
TRANSCRIPT_ID_KEY,
|
||||||
|
EXON_ID_KEY,
|
||||||
|
EXON_RANK_KEY,
|
||||||
|
WITHIN_NON_CODING_GENE_KEY,
|
||||||
|
EFFECT_KEY,
|
||||||
|
EFFECT_IMPACT_KEY,
|
||||||
|
EFFECT_EXTRA_INFORMATION_KEY,
|
||||||
|
OLD_NEW_AA_KEY,
|
||||||
|
OLD_NEW_CODON_KEY,
|
||||||
|
CODON_NUM_KEY,
|
||||||
|
CDS_SIZE_KEY
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||||
|
return Arrays.asList(
|
||||||
|
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID"),
|
||||||
|
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name"),
|
||||||
|
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID"),
|
||||||
|
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID"),
|
||||||
|
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank"),
|
||||||
|
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If present, gene is non-coding"),
|
||||||
|
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "One of the most high-impact effects across all transcripts at this site"),
|
||||||
|
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the effect " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
||||||
|
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the effect"),
|
||||||
|
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid"),
|
||||||
|
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon"),
|
||||||
|
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number"),
|
||||||
|
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size")
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,202 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broad.tribble.FeatureCodec;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class SnpEffCodec implements FeatureCodec {
|
||||||
|
|
||||||
|
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
|
||||||
|
public static final String FIELD_DELIMITER_PATTERN = "\\t";
|
||||||
|
public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
|
||||||
|
public static final String HEADER_LINE_START = "# ";
|
||||||
|
public static final String[] HEADER_FIELD_NAMES = { "Chromo",
|
||||||
|
"Position",
|
||||||
|
"Reference",
|
||||||
|
"Change",
|
||||||
|
"Change type",
|
||||||
|
"Homozygous",
|
||||||
|
"Quality",
|
||||||
|
"Coverage",
|
||||||
|
"Warnings",
|
||||||
|
"Gene_ID",
|
||||||
|
"Gene_name",
|
||||||
|
"Bio_type",
|
||||||
|
"Trancript_ID", // yes, this is how it's spelled in the SnpEff output
|
||||||
|
"Exon_ID",
|
||||||
|
"Exon_Rank",
|
||||||
|
"Effect",
|
||||||
|
"old_AA/new_AA",
|
||||||
|
"Old_codon/New_codon",
|
||||||
|
"Codon_Num(CDS)",
|
||||||
|
"CDS_size",
|
||||||
|
"Codons around",
|
||||||
|
"AAs around",
|
||||||
|
"Custom_interval_ID"
|
||||||
|
};
|
||||||
|
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
|
||||||
|
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
|
||||||
|
|
||||||
|
public Feature decodeLoc ( String line ) {
|
||||||
|
return decode(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Feature decode ( String line ) {
|
||||||
|
String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
|
||||||
|
|
||||||
|
if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||||
|
throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
|
||||||
|
") number of fields: found " + tokens.length + " fields.", line);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
checkForRequiredFields(tokens, line);
|
||||||
|
|
||||||
|
String contig = tokens[0];
|
||||||
|
long position = Long.parseLong(tokens[1]);
|
||||||
|
|
||||||
|
String reference = tokens[2].isEmpty() ? null : tokens[2];
|
||||||
|
String change = tokens[3].isEmpty() ? null : tokens[3];
|
||||||
|
ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
|
||||||
|
Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
|
||||||
|
Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
|
||||||
|
Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
|
||||||
|
String warnings = tokens[8].isEmpty() ? null : tokens[8];
|
||||||
|
String geneID = tokens[9].isEmpty() ? null : tokens[9];
|
||||||
|
String geneName = tokens[10].isEmpty() ? null : tokens[10];
|
||||||
|
String bioType = tokens[11].isEmpty() ? null : tokens[11];
|
||||||
|
String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
|
||||||
|
String exonID = tokens[13].isEmpty() ? null : tokens[13];
|
||||||
|
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
|
||||||
|
|
||||||
|
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
|
||||||
|
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
|
||||||
|
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
|
||||||
|
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
|
||||||
|
String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
|
||||||
|
|
||||||
|
String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
|
||||||
|
String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
|
||||||
|
Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
|
||||||
|
Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
|
||||||
|
String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
|
||||||
|
String aasAround = tokens[21].isEmpty() ? null : tokens[21];
|
||||||
|
String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
|
||||||
|
|
||||||
|
return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
|
||||||
|
warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
|
||||||
|
effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
|
||||||
|
codonsAround, aasAround, customIntervalID);
|
||||||
|
}
|
||||||
|
catch ( NumberFormatException e ) {
|
||||||
|
throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
|
||||||
|
}
|
||||||
|
catch ( IllegalArgumentException e ) {
|
||||||
|
throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkForRequiredFields ( String[] tokens, String line ) {
|
||||||
|
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
||||||
|
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
||||||
|
throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
|
||||||
|
HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
|
||||||
|
line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isNonCodingGene ( String effectField ) {
|
||||||
|
return effectField.startsWith(NON_CODING_GENE_FLAG);
|
||||||
|
}
|
||||||
|
|
||||||
|
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||||
|
String effectName = "";
|
||||||
|
|
||||||
|
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
|
||||||
|
effectName = effectFieldTokens[1].trim();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
effectName = effectFieldTokens[0].trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
return EffectType.valueOf(effectName);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||||
|
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||||
|
return effectFieldTokens[effectFieldTokens.length - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Class getFeatureType() {
|
||||||
|
return SnpEffFeature.class;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Object readHeader ( LineReader reader ) {
|
||||||
|
String headerLine = "";
|
||||||
|
|
||||||
|
try {
|
||||||
|
headerLine = reader.readLine();
|
||||||
|
}
|
||||||
|
catch ( IOException e ) {
|
||||||
|
throw new TribbleException("Unable to read header line from input file.");
|
||||||
|
}
|
||||||
|
|
||||||
|
validateHeaderLine(headerLine);
|
||||||
|
return headerLine;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void validateHeaderLine ( String headerLine ) {
|
||||||
|
if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
|
||||||
|
throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
|
||||||
|
}
|
||||||
|
|
||||||
|
String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
|
||||||
|
|
||||||
|
if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||||
|
throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
|
||||||
|
EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
|
||||||
|
}
|
||||||
|
|
||||||
|
for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
|
||||||
|
if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
|
||||||
|
throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
|
||||||
|
HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
|
||||||
|
headerTokens[columnIndex] + "\"");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,107 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||||
|
|
||||||
|
public class SnpEffConstants {
|
||||||
|
|
||||||
|
public enum EffectType {
|
||||||
|
START_GAINED (EffectImpact.HIGH),
|
||||||
|
START_LOST (EffectImpact.HIGH),
|
||||||
|
EXON_DELETED (EffectImpact.HIGH),
|
||||||
|
FRAME_SHIFT (EffectImpact.HIGH),
|
||||||
|
STOP_GAINED (EffectImpact.HIGH),
|
||||||
|
STOP_LOST (EffectImpact.HIGH),
|
||||||
|
SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
|
||||||
|
SPLICE_SITE_DONOR (EffectImpact.HIGH),
|
||||||
|
|
||||||
|
NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
|
||||||
|
UTR_5_DELETED (EffectImpact.MODERATE),
|
||||||
|
UTR_3_DELETED (EffectImpact.MODERATE),
|
||||||
|
CODON_INSERTION (EffectImpact.MODERATE),
|
||||||
|
CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
|
||||||
|
CODON_DELETION (EffectImpact.MODERATE),
|
||||||
|
CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
|
||||||
|
|
||||||
|
NONE (EffectImpact.LOW),
|
||||||
|
CHROMOSOME (EffectImpact.LOW),
|
||||||
|
INTERGENIC (EffectImpact.LOW),
|
||||||
|
UPSTREAM (EffectImpact.LOW),
|
||||||
|
UTR_5_PRIME (EffectImpact.LOW),
|
||||||
|
SYNONYMOUS_START (EffectImpact.LOW),
|
||||||
|
NON_SYNONYMOUS_START (EffectImpact.LOW),
|
||||||
|
CDS (EffectImpact.LOW),
|
||||||
|
GENE (EffectImpact.LOW),
|
||||||
|
TRANSCRIPT (EffectImpact.LOW),
|
||||||
|
EXON (EffectImpact.LOW),
|
||||||
|
SYNONYMOUS_CODING (EffectImpact.LOW),
|
||||||
|
CODON_CHANGE (EffectImpact.LOW),
|
||||||
|
SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||||
|
NON_SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||||
|
INTRON (EffectImpact.LOW),
|
||||||
|
UTR_3_PRIME (EffectImpact.LOW),
|
||||||
|
DOWNSTREAM (EffectImpact.LOW),
|
||||||
|
INTRON_CONSERVED (EffectImpact.LOW),
|
||||||
|
INTERGENIC_CONSERVED (EffectImpact.LOW),
|
||||||
|
CUSTOM (EffectImpact.LOW);
|
||||||
|
|
||||||
|
private final EffectImpact impact;
|
||||||
|
|
||||||
|
EffectType ( EffectImpact impact ) {
|
||||||
|
this.impact = impact;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EffectImpact getImpact() {
|
||||||
|
return impact;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum EffectImpact {
|
||||||
|
LOW (1),
|
||||||
|
MODERATE (2),
|
||||||
|
HIGH (3);
|
||||||
|
|
||||||
|
private final int severityRating;
|
||||||
|
|
||||||
|
EffectImpact ( int severityRating ) {
|
||||||
|
this.severityRating = severityRating;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||||
|
return this.severityRating > other.severityRating;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum ChangeType {
|
||||||
|
SNP,
|
||||||
|
MNP,
|
||||||
|
INS,
|
||||||
|
DEL
|
||||||
|
}
|
||||||
|
|
||||||
|
public enum Zygosity {
|
||||||
|
Hom,
|
||||||
|
Het
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,306 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2011, The Broad Institute
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||||
|
|
||||||
|
import org.broad.tribble.Feature;
|
||||||
|
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||||
|
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||||
|
|
||||||
|
public class SnpEffFeature implements Feature {
|
||||||
|
|
||||||
|
private String contig;
|
||||||
|
private long position;
|
||||||
|
private String reference;
|
||||||
|
private String change;
|
||||||
|
private ChangeType changeType;
|
||||||
|
private Zygosity zygosity;
|
||||||
|
private Double quality;
|
||||||
|
private Long coverage;
|
||||||
|
private String warnings;
|
||||||
|
private String geneID;
|
||||||
|
private String geneName;
|
||||||
|
private String bioType;
|
||||||
|
private String transcriptID;
|
||||||
|
private String exonID;
|
||||||
|
private Integer exonRank;
|
||||||
|
private boolean isNonCodingGene;
|
||||||
|
private EffectType effect;
|
||||||
|
private String effectExtraInformation;
|
||||||
|
private String oldAndNewAA;
|
||||||
|
private String oldAndNewCodon;
|
||||||
|
private Integer codonNum;
|
||||||
|
private Integer cdsSize;
|
||||||
|
private String codonsAround;
|
||||||
|
private String aasAround;
|
||||||
|
private String customIntervalID;
|
||||||
|
|
||||||
|
public SnpEffFeature ( String contig,
|
||||||
|
long position,
|
||||||
|
String reference,
|
||||||
|
String change,
|
||||||
|
ChangeType changeType,
|
||||||
|
Zygosity zygosity,
|
||||||
|
Double quality,
|
||||||
|
Long coverage,
|
||||||
|
String warnings,
|
||||||
|
String geneID,
|
||||||
|
String geneName,
|
||||||
|
String bioType,
|
||||||
|
String transcriptID,
|
||||||
|
String exonID,
|
||||||
|
Integer exonRank,
|
||||||
|
boolean isNonCodingGene,
|
||||||
|
EffectType effect,
|
||||||
|
String effectExtraInformation,
|
||||||
|
String oldAndNewAA,
|
||||||
|
String oldAndNewCodon,
|
||||||
|
Integer codonNum,
|
||||||
|
Integer cdsSize,
|
||||||
|
String codonsAround,
|
||||||
|
String aasAround,
|
||||||
|
String customIntervalID ) {
|
||||||
|
|
||||||
|
this.contig = contig;
|
||||||
|
this.position = position;
|
||||||
|
this.reference = reference;
|
||||||
|
this.change = change;
|
||||||
|
this.changeType = changeType;
|
||||||
|
this.zygosity = zygosity;
|
||||||
|
this.quality = quality;
|
||||||
|
this.coverage = coverage;
|
||||||
|
this.warnings = warnings;
|
||||||
|
this.geneID = geneID;
|
||||||
|
this.geneName = geneName;
|
||||||
|
this.bioType = bioType;
|
||||||
|
this.transcriptID = transcriptID;
|
||||||
|
this.exonID = exonID;
|
||||||
|
this.exonRank = exonRank;
|
||||||
|
this.isNonCodingGene = isNonCodingGene;
|
||||||
|
this.effect = effect;
|
||||||
|
this.effectExtraInformation = effectExtraInformation;
|
||||||
|
this.oldAndNewAA = oldAndNewAA;
|
||||||
|
this.oldAndNewCodon = oldAndNewCodon;
|
||||||
|
this.codonNum = codonNum;
|
||||||
|
this.cdsSize = cdsSize;
|
||||||
|
this.codonsAround = codonsAround;
|
||||||
|
this.aasAround = aasAround;
|
||||||
|
this.customIntervalID = customIntervalID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getChr() {
|
||||||
|
return contig;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getStart() {
|
||||||
|
return (int)position;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getEnd() {
|
||||||
|
return (int)position;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasReference() {
|
||||||
|
return reference != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getReference() {
|
||||||
|
return reference;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasChange() {
|
||||||
|
return change != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getChange() {
|
||||||
|
return change;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasChangeType() {
|
||||||
|
return changeType != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ChangeType getChangeType() {
|
||||||
|
return changeType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasZygosity() {
|
||||||
|
return zygosity != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Zygosity getZygosity() {
|
||||||
|
return zygosity;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasQuality() {
|
||||||
|
return quality != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Double getQuality() {
|
||||||
|
return quality;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCoverage() {
|
||||||
|
return coverage != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Long getCoverage() {
|
||||||
|
return coverage;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasWarnings() {
|
||||||
|
return warnings != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getWarnings() {
|
||||||
|
return warnings;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasGeneID() {
|
||||||
|
return geneID != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGeneID() {
|
||||||
|
return geneID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasGeneName() {
|
||||||
|
return geneName != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getGeneName() {
|
||||||
|
return geneName;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasBioType() {
|
||||||
|
return bioType != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getBioType() {
|
||||||
|
return bioType;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasTranscriptID() {
|
||||||
|
return transcriptID != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getTranscriptID() {
|
||||||
|
return transcriptID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasExonID() {
|
||||||
|
return exonID != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getExonID() {
|
||||||
|
return exonID;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasExonRank() {
|
||||||
|
return exonRank != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getExonRank() {
|
||||||
|
return exonRank;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isNonCodingGene() {
|
||||||
|
return isNonCodingGene;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EffectType getEffect() {
|
||||||
|
return effect;
|
||||||
|
}
|
||||||
|
|
||||||
|
public EffectImpact getEffectImpact() {
|
||||||
|
return effect.getImpact();
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasEffectExtraInformation() {
|
||||||
|
return effectExtraInformation != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getEffectExtraInformation() {
|
||||||
|
return effectExtraInformation;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasOldAndNewAA() {
|
||||||
|
return oldAndNewAA != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOldAndNewAA() {
|
||||||
|
return oldAndNewAA;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasOldAndNewCodon() {
|
||||||
|
return oldAndNewCodon != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getOldAndNewCodon() {
|
||||||
|
return oldAndNewCodon;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCodonNum() {
|
||||||
|
return codonNum != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getCodonNum() {
|
||||||
|
return codonNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCdsSize() {
|
||||||
|
return cdsSize != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Integer getCdsSize() {
|
||||||
|
return cdsSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCodonsAround() {
|
||||||
|
return codonsAround != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCodonsAround() {
|
||||||
|
return codonsAround;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hadAasAround() {
|
||||||
|
return aasAround != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getAasAround() {
|
||||||
|
return aasAround;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean hasCustomIntervalID() {
|
||||||
|
return customIntervalID != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCustomIntervalID() {
|
||||||
|
return customIntervalID;
|
||||||
|
}
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue