Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable
This commit is contained in:
commit
86480b2e13
|
|
@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
|
|||
* @version 0.1
|
||||
*/
|
||||
public class PlatformFilter extends ReadFilter {
|
||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
|
||||
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
|
||||
protected String[] PLFilterNames;
|
||||
|
||||
public boolean filterOut(SAMRecord rec) {
|
||||
|
|
|
|||
|
|
@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
|
|||
* -I input1.bam \
|
||||
* -I input2.bam \
|
||||
* --read_filter MappingQualityZero
|
||||
*
|
||||
* java -Xmx2g -jar GenomeAnalysisTK.jar \
|
||||
* -R ref.fasta \
|
||||
* -T PrintReads \
|
||||
* -o output.bam \
|
||||
* -I input.bam \
|
||||
* -n 2000
|
||||
* </pre>
|
||||
*
|
||||
*/
|
||||
|
|
|
|||
|
|
@ -24,7 +24,9 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
|
||||
import org.broadinstitute.sting.utils.Utils;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -46,134 +45,421 @@ import java.util.*;
|
|||
* (http://snpeff.sourceforge.net/).
|
||||
*
|
||||
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
|
||||
* output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
|
||||
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
|
||||
* and adds annotations on that effect.
|
||||
*
|
||||
* The possible biological effects and their associated impacts are defined in the class:
|
||||
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
|
||||
|
||||
// SnpEff annotation key names:
|
||||
public static final String GENE_ID_KEY = "GENE_ID";
|
||||
public static final String GENE_NAME_KEY = "GENE_NAME";
|
||||
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
|
||||
public static final String EXON_ID_KEY = "EXON_ID";
|
||||
public static final String EXON_RANK_KEY = "EXON_RANK";
|
||||
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
|
||||
public static final String EFFECT_KEY = "EFFECT";
|
||||
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
|
||||
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
|
||||
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
|
||||
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
|
||||
public static final String CODON_NUM_KEY = "CODON_NUM";
|
||||
public static final String CDS_SIZE_KEY = "CDS_SIZE";
|
||||
private static Logger logger = Logger.getLogger(SnpEff.class);
|
||||
|
||||
// We refuse to parse SnpEff output files generated by unsupported versions, or
|
||||
// lacking a SnpEff version number in the VCF header:
|
||||
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
|
||||
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
|
||||
|
||||
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
|
||||
// field annotation with the key EFF:
|
||||
public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
|
||||
public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
|
||||
public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
|
||||
|
||||
// Key names for the INFO field annotations we will add to each record, along
|
||||
// with parsing-related information:
|
||||
public enum InfoFieldKey {
|
||||
EFF (-1),
|
||||
EFF_IMPACT (0),
|
||||
EFF_CODON_CHANGE (1),
|
||||
EFF_AMINO_ACID_CHANGE (2),
|
||||
EFF_GENE_NAME (3),
|
||||
EFF_GENE_BIOTYPE (4),
|
||||
EFF_TRANSCRIPT_ID (6),
|
||||
EFF_EXON_ID (7);
|
||||
|
||||
// Index within the effect metadata subfields from the SnpEff EFF annotation
|
||||
// where each key's associated value can be found during parsing.
|
||||
private final int fieldIndex;
|
||||
|
||||
InfoFieldKey ( int fieldIndex ) {
|
||||
this.fieldIndex = fieldIndex;
|
||||
}
|
||||
|
||||
public int getFieldIndex() {
|
||||
return fieldIndex;
|
||||
}
|
||||
}
|
||||
|
||||
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
|
||||
// are validated against this list.
|
||||
public enum EffectType {
|
||||
NONE,
|
||||
CHROMOSOME,
|
||||
INTERGENIC,
|
||||
UPSTREAM,
|
||||
UTR_5_PRIME,
|
||||
UTR_5_DELETED,
|
||||
START_GAINED,
|
||||
SPLICE_SITE_ACCEPTOR,
|
||||
SPLICE_SITE_DONOR,
|
||||
START_LOST,
|
||||
SYNONYMOUS_START,
|
||||
NON_SYNONYMOUS_START,
|
||||
CDS,
|
||||
GENE,
|
||||
TRANSCRIPT,
|
||||
EXON,
|
||||
EXON_DELETED,
|
||||
NON_SYNONYMOUS_CODING,
|
||||
SYNONYMOUS_CODING,
|
||||
FRAME_SHIFT,
|
||||
CODON_CHANGE,
|
||||
CODON_INSERTION,
|
||||
CODON_CHANGE_PLUS_CODON_INSERTION,
|
||||
CODON_DELETION,
|
||||
CODON_CHANGE_PLUS_CODON_DELETION,
|
||||
STOP_GAINED,
|
||||
SYNONYMOUS_STOP,
|
||||
NON_SYNONYMOUS_STOP,
|
||||
STOP_LOST,
|
||||
INTRON,
|
||||
UTR_3_PRIME,
|
||||
UTR_3_DELETED,
|
||||
DOWNSTREAM,
|
||||
INTRON_CONSERVED,
|
||||
INTERGENIC_CONSERVED,
|
||||
REGULATION,
|
||||
CUSTOM,
|
||||
WITHIN_NON_CODING_GENE
|
||||
}
|
||||
|
||||
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
|
||||
public enum EffectImpact {
|
||||
LOW (1),
|
||||
MODERATE (2),
|
||||
HIGH (3);
|
||||
|
||||
private final int severityRating;
|
||||
|
||||
EffectImpact ( int severityRating ) {
|
||||
this.severityRating = severityRating;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||
return this.severityRating > other.severityRating;
|
||||
}
|
||||
}
|
||||
|
||||
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
|
||||
public enum EffectCoding {
|
||||
CODING,
|
||||
NON_CODING,
|
||||
UNKNOWN
|
||||
}
|
||||
|
||||
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
||||
validateRodBinding(walker.getSnpEffRodBinding());
|
||||
checkSnpEffVersion(walker, toolkit);
|
||||
}
|
||||
|
||||
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
|
||||
RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
validateRodBinding(snpEffRodBinding);
|
||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
|
||||
List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
|
||||
// Get only SnpEff records that start at this locus, not merely span it:
|
||||
List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
|
||||
|
||||
// Add only annotations for one of the most biologically-significant effects as defined in
|
||||
// the SnpEffConstants class:
|
||||
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
|
||||
|
||||
if ( mostSignificantEffect == null ) {
|
||||
// Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
|
||||
// If there is more than one such record, we only need to pick the first one, since the biological
|
||||
// effects will be the same across all such records:
|
||||
VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
|
||||
if ( matchingRecord == null ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return generateAnnotations(mostSignificantEffect);
|
||||
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
|
||||
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
|
||||
if ( effects.size() == 0 ) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// Add only annotations for one of the most biologically-significant effects from this set:
|
||||
SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
|
||||
return mostSignificantEffect.getAnnotations();
|
||||
}
|
||||
|
||||
private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
|
||||
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
|
||||
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
|
||||
throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
|
||||
"as a rodbinding on the command line, but no SnpEff rodbinding was found.");
|
||||
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
|
||||
"as a rodbinding on the command line via the --snpEffFile option, but " +
|
||||
"no SnpEff rodbinding was found.");
|
||||
}
|
||||
}
|
||||
|
||||
private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
|
||||
SnpEffFeature mostSignificantEffect = null;
|
||||
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
|
||||
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
|
||||
|
||||
for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
|
||||
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
|
||||
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
|
||||
|
||||
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
|
||||
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
|
||||
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
|
||||
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
|
||||
}
|
||||
|
||||
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
|
||||
|
||||
if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
|
||||
throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
|
||||
"is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isSupportedSnpEffVersion ( String versionString ) {
|
||||
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
|
||||
if ( supportedVersion.equals(versionString) ) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
|
||||
for ( VariantContext snpEffRecord : snpEffRecords ) {
|
||||
if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
|
||||
return snpEffRecord;
|
||||
}
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
|
||||
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
|
||||
|
||||
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
|
||||
List<String> individualEffects;
|
||||
|
||||
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
|
||||
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
|
||||
if ( effectFieldValue instanceof List ) {
|
||||
individualEffects = (List<String>)effectFieldValue;
|
||||
}
|
||||
else {
|
||||
individualEffects = Arrays.asList((String)effectFieldValue);
|
||||
}
|
||||
|
||||
for ( String effectString : individualEffects ) {
|
||||
String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
|
||||
|
||||
if ( effectNameAndMetadata.length != 2 ) {
|
||||
logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
|
||||
snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
|
||||
continue;
|
||||
}
|
||||
|
||||
String effectName = effectNameAndMetadata[0];
|
||||
String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
|
||||
|
||||
SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
|
||||
|
||||
if ( parsedEffect.isWellFormed() ) {
|
||||
parsedEffects.add(parsedEffect);
|
||||
}
|
||||
else {
|
||||
logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
|
||||
snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
|
||||
}
|
||||
}
|
||||
|
||||
return parsedEffects;
|
||||
}
|
||||
|
||||
private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
|
||||
SnpEffEffect mostSignificantEffect = null;
|
||||
|
||||
for ( SnpEffEffect effect : effects ) {
|
||||
if ( mostSignificantEffect == null ||
|
||||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
|
||||
effect.isHigherImpactThan(mostSignificantEffect) ) {
|
||||
|
||||
mostSignificantEffect = snpEffFeature;
|
||||
mostSignificantEffect = effect;
|
||||
}
|
||||
}
|
||||
|
||||
return mostSignificantEffect;
|
||||
}
|
||||
|
||||
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
|
||||
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
|
||||
|
||||
if ( mostSignificantEffect.hasGeneID() )
|
||||
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
|
||||
if ( mostSignificantEffect.hasGeneName() )
|
||||
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
|
||||
if ( mostSignificantEffect.hasTranscriptID() )
|
||||
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
|
||||
if ( mostSignificantEffect.hasExonID() )
|
||||
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
|
||||
if ( mostSignificantEffect.hasExonRank() )
|
||||
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
|
||||
if ( mostSignificantEffect.isNonCodingGene() )
|
||||
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
|
||||
|
||||
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
|
||||
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
|
||||
if ( mostSignificantEffect.hasEffectExtraInformation() )
|
||||
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
|
||||
|
||||
if ( mostSignificantEffect.hasOldAndNewAA() )
|
||||
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
|
||||
if ( mostSignificantEffect.hasOldAndNewCodon() )
|
||||
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
|
||||
if ( mostSignificantEffect.hasCodonNum() )
|
||||
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
|
||||
if ( mostSignificantEffect.hasCdsSize() )
|
||||
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
||||
public List<String> getKeyNames() {
|
||||
return Arrays.asList( GENE_ID_KEY,
|
||||
GENE_NAME_KEY,
|
||||
TRANSCRIPT_ID_KEY,
|
||||
EXON_ID_KEY,
|
||||
EXON_RANK_KEY,
|
||||
WITHIN_NON_CODING_GENE_KEY,
|
||||
EFFECT_KEY,
|
||||
EFFECT_IMPACT_KEY,
|
||||
EFFECT_EXTRA_INFORMATION_KEY,
|
||||
OLD_NEW_AA_KEY,
|
||||
OLD_NEW_CODON_KEY,
|
||||
CODON_NUM_KEY,
|
||||
CDS_SIZE_KEY
|
||||
return Arrays.asList( InfoFieldKey.EFF.toString(),
|
||||
InfoFieldKey.EFF_IMPACT.toString(),
|
||||
InfoFieldKey.EFF_CODON_CHANGE.toString(),
|
||||
InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(),
|
||||
InfoFieldKey.EFF_GENE_NAME.toString(),
|
||||
InfoFieldKey.EFF_GENE_BIOTYPE.toString(),
|
||||
InfoFieldKey.EFF_TRANSCRIPT_ID.toString(),
|
||||
InfoFieldKey.EFF_EXON_ID.toString()
|
||||
);
|
||||
}
|
||||
|
||||
public List<VCFInfoHeaderLine> getDescriptions() {
|
||||
return Arrays.asList(
|
||||
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
|
||||
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
|
||||
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF.toString(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_IMPACT.toString(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_CODON_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_NAME.toString(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_BIOTYPE.toString(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
|
||||
new VCFInfoHeaderLine(InfoFieldKey.EFF_EXON_ID.toString(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper class to parse, validate, and store a single SnpEff effect and its metadata.
|
||||
*/
|
||||
protected static class SnpEffEffect {
|
||||
private EffectType effect;
|
||||
private EffectImpact impact;
|
||||
private String codonChange;
|
||||
private String aminoAcidChange;
|
||||
private String geneName;
|
||||
private String geneBiotype;
|
||||
private EffectCoding coding;
|
||||
private String transcriptID;
|
||||
private String exonID;
|
||||
|
||||
private String parseError = null;
|
||||
private boolean isWellFormed = true;
|
||||
|
||||
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
|
||||
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
|
||||
|
||||
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
|
||||
// errors come after warnings, not vice versa:
|
||||
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
|
||||
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
|
||||
|
||||
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
|
||||
|
||||
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
|
||||
parseEffectName(effectName);
|
||||
parseEffectMetadata(effectMetadata);
|
||||
}
|
||||
|
||||
private void parseEffectName ( String effectName ) {
|
||||
try {
|
||||
effect = EffectType.valueOf(effectName);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
parseError(String.format("%s is not a recognized effect type", effectName));
|
||||
}
|
||||
}
|
||||
|
||||
private void parseEffectMetadata ( String[] effectMetadata ) {
|
||||
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
|
||||
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
|
||||
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
|
||||
}
|
||||
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
|
||||
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
|
||||
}
|
||||
else {
|
||||
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
|
||||
EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
|
||||
}
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]));
|
||||
}
|
||||
|
||||
codonChange = effectMetadata[InfoFieldKey.EFF_CODON_CHANGE.getFieldIndex()];
|
||||
aminoAcidChange = effectMetadata[InfoFieldKey.EFF_AMINO_ACID_CHANGE.getFieldIndex()];
|
||||
geneName = effectMetadata[InfoFieldKey.EFF_GENE_NAME.getFieldIndex()];
|
||||
geneBiotype = effectMetadata[InfoFieldKey.EFF_GENE_BIOTYPE.getFieldIndex()];
|
||||
|
||||
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
|
||||
try {
|
||||
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
|
||||
}
|
||||
}
|
||||
else {
|
||||
coding = EffectCoding.UNKNOWN;
|
||||
}
|
||||
|
||||
transcriptID = effectMetadata[InfoFieldKey.EFF_TRANSCRIPT_ID.getFieldIndex()];
|
||||
exonID = effectMetadata[InfoFieldKey.EFF_EXON_ID.getFieldIndex()];
|
||||
}
|
||||
|
||||
private void parseError ( String message ) {
|
||||
isWellFormed = false;
|
||||
|
||||
// Cache only the first error encountered:
|
||||
if ( parseError == null ) {
|
||||
parseError = message;
|
||||
}
|
||||
}
|
||||
|
||||
public boolean isWellFormed() {
|
||||
return isWellFormed;
|
||||
}
|
||||
|
||||
public String getParseError() {
|
||||
return parseError == null ? "" : parseError;
|
||||
}
|
||||
|
||||
public boolean isCoding() {
|
||||
return coding == EffectCoding.CODING;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( SnpEffEffect other ) {
|
||||
// If one effect is within a coding gene and the other is not, the effect that is
|
||||
// within the coding gene has higher impact:
|
||||
|
||||
if ( isCoding() && ! other.isCoding() ) {
|
||||
return true;
|
||||
}
|
||||
else if ( ! isCoding() && other.isCoding() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
|
||||
// of the effects themselves:
|
||||
|
||||
return impact.isHigherImpactThan(other.impact);
|
||||
}
|
||||
|
||||
public Map<String, Object> getAnnotations() {
|
||||
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
|
||||
|
||||
addAnnotation(annotations, InfoFieldKey.EFF.toString(), effect.toString());
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_IMPACT.toString(), impact.toString());
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_CODON_CHANGE.toString(), codonChange);
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), aminoAcidChange);
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_GENE_NAME.toString(), geneName);
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_GENE_BIOTYPE.toString(), geneBiotype);
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), transcriptID);
|
||||
addAnnotation(annotations, InfoFieldKey.EFF_EXON_ID.toString(), exonID);
|
||||
|
||||
return annotations;
|
||||
}
|
||||
|
||||
private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
|
||||
// Only add annotations for keys associated with non-empty values:
|
||||
if ( keyValue != null && keyValue.trim().length() > 0 ) {
|
||||
annotations.put(keyName, keyValue);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
|
|||
import org.broadinstitute.sting.utils.BaseUtils;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.classloader.PluginManager;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
|
||||
|
|
@ -93,8 +92,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
* listed in the SnpEff output file for each variant.
|
||||
*/
|
||||
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
|
||||
public RodBinding<SnpEffFeature> snpEffFile;
|
||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
|
||||
public RodBinding<VariantContext> snpEffFile;
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
|
||||
|
||||
/**
|
||||
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
|
||||
|
|
@ -204,9 +203,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
|
|||
}
|
||||
|
||||
if ( USE_ALL_ANNOTATIONS )
|
||||
engine = new VariantAnnotatorEngine(this);
|
||||
engine = new VariantAnnotatorEngine(this, getToolkit());
|
||||
else
|
||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
|
||||
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
|
||||
engine.initializeExpressions(expressionsToUse);
|
||||
|
||||
engine.invokeAnnotationInitializationMethods();
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
|
|
@ -46,6 +47,7 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
|
||||
private AnnotatorCompatibleWalker walker;
|
||||
private GenomeAnalysisEngine toolkit;
|
||||
|
||||
private static class VAExpression {
|
||||
|
||||
|
|
@ -71,16 +73,18 @@ public class VariantAnnotatorEngine {
|
|||
}
|
||||
|
||||
// use this constructor if you want all possible annotations
|
||||
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
|
||||
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
|
||||
this.walker = walker;
|
||||
this.toolkit = toolkit;
|
||||
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
|
||||
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
|
||||
initializeDBs();
|
||||
}
|
||||
|
||||
// use this constructor if you want to select specific annotations (and/or interfaces)
|
||||
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
|
||||
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
|
||||
this.walker = walker;
|
||||
this.toolkit = toolkit;
|
||||
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
|
||||
initializeDBs();
|
||||
}
|
||||
|
|
@ -112,11 +116,11 @@ public class VariantAnnotatorEngine {
|
|||
|
||||
public void invokeAnnotationInitializationMethods() {
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
|
||||
annotation.initialize(walker);
|
||||
annotation.initialize(walker, toolkit);
|
||||
}
|
||||
|
||||
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
|
||||
annotation.initialize(walker);
|
||||
annotation.initialize(walker, toolkit);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.commandline.RodBinding;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -10,8 +9,8 @@ public interface AnnotatorCompatibleWalker {
|
|||
|
||||
// getter methods for various used bindings
|
||||
public abstract RodBinding<VariantContext> getVariantRodBinding();
|
||||
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
|
||||
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
|
||||
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
|
||||
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
|
||||
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -24,6 +24,7 @@
|
|||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
|
||||
|
||||
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
|
||||
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
|
||||
|
||||
import java.util.List;
|
||||
|
|
@ -34,5 +35,5 @@ public abstract class VariantAnnotatorAnnotation {
|
|||
public abstract List<String> getKeyNames();
|
||||
|
||||
// initialization method (optional for subclasses, and therefore non-abstract)
|
||||
public void initialize ( AnnotatorCompatibleWalker walker ) { }
|
||||
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
|
||||
}
|
||||
|
|
@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
|
|||
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
|
||||
import org.broadinstitute.sting.utils.SampleUtils;
|
||||
import org.broadinstitute.sting.utils.baq.BAQ;
|
||||
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
|
||||
import org.broadinstitute.sting.utils.codecs.vcf.*;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
|
|
@ -128,7 +127,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
|
||||
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
|
||||
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
|
||||
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
|
||||
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
|
||||
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
|
||||
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
|
||||
|
||||
|
|
@ -211,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
|
|||
if ( verboseWriter != null )
|
||||
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
|
||||
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
|
||||
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
|
||||
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
|
||||
|
||||
// initialize the header
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
|
|||
|
||||
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
|
||||
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
|
||||
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
|
@ -11,12 +12,19 @@ import java.util.List;
|
|||
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
|
||||
*/
|
||||
public class FunctionalClass extends VariantStratifier {
|
||||
|
||||
public enum FunctionalType {
|
||||
silent,
|
||||
missense,
|
||||
nonsense
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void initialize() {
|
||||
states.add("all");
|
||||
states.add("silent");
|
||||
states.add("missense");
|
||||
states.add("nonsense");
|
||||
for ( FunctionalType type : FunctionalType.values() )
|
||||
states.add(type.name());
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -26,10 +34,12 @@ public class FunctionalClass extends VariantStratifier {
|
|||
relevantStates.add("all");
|
||||
|
||||
if (eval != null && eval.isVariant()) {
|
||||
String type = null;
|
||||
FunctionalType type = null;
|
||||
|
||||
if (eval.hasAttribute("refseq.functionalClass")) {
|
||||
type = eval.getAttributeAsString("refseq.functionalClass");
|
||||
try {
|
||||
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
|
||||
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
|
||||
int annotationId = 1;
|
||||
String key;
|
||||
|
|
@ -37,24 +47,33 @@ public class FunctionalClass extends VariantStratifier {
|
|||
do {
|
||||
key = String.format("refseq.functionalClass_%d", annotationId);
|
||||
|
||||
String newtype = eval.getAttributeAsString(key);
|
||||
|
||||
if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
|
||||
( type == null ||
|
||||
( type.equals("silent") && !newtype.equals("silent") ) ||
|
||||
( type.equals("missense") && newtype.equals("nonsense") ) )
|
||||
) {
|
||||
type = newtype;
|
||||
String newtypeStr = eval.getAttributeAsString(key);
|
||||
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
|
||||
try {
|
||||
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
|
||||
if ( type == null ||
|
||||
( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
|
||||
( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
|
||||
type = newType;
|
||||
}
|
||||
} catch ( Exception e ) {} // don't error out if the type isn't supported
|
||||
}
|
||||
|
||||
annotationId++;
|
||||
} while (eval.hasAttribute(key));
|
||||
|
||||
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFF.name() ) ) {
|
||||
SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFF.name()).toString());
|
||||
if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
|
||||
type = FunctionalType.nonsense;
|
||||
else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
|
||||
type = FunctionalType.missense;
|
||||
else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING )
|
||||
type = FunctionalType.silent;
|
||||
}
|
||||
|
||||
if (type != null) {
|
||||
if (type.equals("silent")) { relevantStates.add("silent"); }
|
||||
else if (type.equals("missense")) { relevantStates.add("missense"); }
|
||||
else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
|
||||
if ( type != null ) {
|
||||
relevantStates.add(type.name());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,282 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
import org.broad.tribble.FeatureCodec;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Codec for decoding the output format of the SnpEff variant effect predictor tool
|
||||
*
|
||||
* <p>
|
||||
* This format has 23 tab-delimited fields:
|
||||
*
|
||||
* <pre>
|
||||
* Chromosome
|
||||
* Position
|
||||
* Reference
|
||||
* Change
|
||||
* Change Type: {SNP, MNP, INS, DEL}
|
||||
* Zygosity: {Hom, Het}
|
||||
* Quality
|
||||
* Coverage
|
||||
* Warnings
|
||||
* Gene ID
|
||||
* Gene Name
|
||||
* Bio Type
|
||||
* Transcript ID
|
||||
* Exon ID
|
||||
* Exon Rank
|
||||
* Effect
|
||||
* Old/New Amino Acid
|
||||
* Old/New Codon
|
||||
* Codon Num
|
||||
* CDS Size
|
||||
* Codons Around
|
||||
* Amino Acids Around
|
||||
* Custom Interval ID
|
||||
* </pre>
|
||||
* Note that we treat all except the Chromosome, Position, and Effect fields as optional.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* See also: @see <a href="http://snpeff.sourceforge.net/">SNPEff project page</a>
|
||||
* </p>
|
||||
*
|
||||
* @author David Roazen
|
||||
* @since 2011
|
||||
*/
|
||||
public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
|
||||
|
||||
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
|
||||
public static final String FIELD_DELIMITER_PATTERN = "\\t";
|
||||
public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
|
||||
public static final String HEADER_LINE_START = "# ";
|
||||
public static final String[] HEADER_FIELD_NAMES = { "Chromo",
|
||||
"Position",
|
||||
"Reference",
|
||||
"Change",
|
||||
"Change type",
|
||||
"Homozygous",
|
||||
"Quality",
|
||||
"Coverage",
|
||||
"Warnings",
|
||||
"Gene_ID",
|
||||
"Gene_name",
|
||||
"Bio_type",
|
||||
"Trancript_ID", // yes, this is how it's spelled in the SnpEff output
|
||||
"Exon_ID",
|
||||
"Exon_Rank",
|
||||
"Effect",
|
||||
"old_AA/new_AA",
|
||||
"Old_codon/New_codon",
|
||||
"Codon_Num(CDS)",
|
||||
"CDS_size",
|
||||
"Codons around",
|
||||
"AAs around",
|
||||
"Custom_interval_ID"
|
||||
};
|
||||
|
||||
// The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
|
||||
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
|
||||
|
||||
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
|
||||
|
||||
|
||||
public Feature decodeLoc ( String line ) {
|
||||
return decode(line);
|
||||
}
|
||||
|
||||
public Feature decode ( String line ) {
|
||||
String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
|
||||
|
||||
if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
|
||||
") number of fields: found " + tokens.length + " fields.", line);
|
||||
}
|
||||
|
||||
try {
|
||||
trimAllFields(tokens);
|
||||
checkForRequiredFields(tokens, line);
|
||||
|
||||
String contig = tokens[0];
|
||||
long position = Long.parseLong(tokens[1]);
|
||||
|
||||
String reference = tokens[2].isEmpty() ? null : tokens[2];
|
||||
String change = tokens[3].isEmpty() ? null : tokens[3];
|
||||
ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
|
||||
Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
|
||||
Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
|
||||
Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
|
||||
String warnings = tokens[8].isEmpty() ? null : tokens[8];
|
||||
String geneID = tokens[9].isEmpty() ? null : tokens[9];
|
||||
String geneName = tokens[10].isEmpty() ? null : tokens[10];
|
||||
String bioType = tokens[11].isEmpty() ? null : tokens[11];
|
||||
String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
|
||||
String exonID = tokens[13].isEmpty() ? null : tokens[13];
|
||||
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
|
||||
|
||||
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
|
||||
|
||||
// Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
|
||||
// otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
|
||||
// in the final field (when present) from being inappropriately tokenized:
|
||||
|
||||
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
|
||||
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
|
||||
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
|
||||
String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
|
||||
|
||||
String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
|
||||
String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
|
||||
Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
|
||||
Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
|
||||
String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
|
||||
String aasAround = tokens[21].isEmpty() ? null : tokens[21];
|
||||
String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
|
||||
|
||||
return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
|
||||
warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
|
||||
effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
|
||||
codonsAround, aasAround, customIntervalID);
|
||||
}
|
||||
catch ( NumberFormatException e ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
|
||||
}
|
||||
catch ( IllegalArgumentException e ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
|
||||
}
|
||||
}
|
||||
|
||||
private void trimAllFields ( String[] tokens ) {
|
||||
for ( int i = 0; i < tokens.length; i++ ) {
|
||||
tokens[i] = tokens[i].trim();
|
||||
}
|
||||
}
|
||||
|
||||
private void checkForRequiredFields ( String[] tokens, String line ) {
|
||||
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
|
||||
if ( tokens[requiredFieldIndex].isEmpty() ) {
|
||||
throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
|
||||
HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
|
||||
line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isNonCodingGene ( String effectField ) {
|
||||
return effectField.startsWith(NON_CODING_GENE_FLAG);
|
||||
}
|
||||
|
||||
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
String effectName = "";
|
||||
|
||||
// If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
|
||||
// otherwise it will be in the first subfield:
|
||||
|
||||
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
|
||||
effectName = effectFieldTokens[1].trim();
|
||||
}
|
||||
else {
|
||||
effectName = effectFieldTokens[0].trim();
|
||||
}
|
||||
|
||||
return EffectType.valueOf(effectName);
|
||||
}
|
||||
|
||||
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
|
||||
|
||||
// The extra effect-related information, if present, will always be the last subfield:
|
||||
|
||||
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
|
||||
return effectFieldTokens[effectFieldTokens.length - 1].trim();
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
public Class<SnpEffFeature> getFeatureType() {
|
||||
return SnpEffFeature.class;
|
||||
}
|
||||
|
||||
public Object readHeader ( LineReader reader ) {
|
||||
String headerLine = "";
|
||||
|
||||
try {
|
||||
headerLine = reader.readLine();
|
||||
}
|
||||
catch ( IOException e ) {
|
||||
throw new TribbleException("Unable to read header line from input file.");
|
||||
}
|
||||
|
||||
validateHeaderLine(headerLine);
|
||||
return headerLine;
|
||||
}
|
||||
|
||||
private void validateHeaderLine ( String headerLine ) {
|
||||
if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
|
||||
throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
|
||||
}
|
||||
|
||||
String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
|
||||
|
||||
if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
|
||||
throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
|
||||
EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
|
||||
}
|
||||
|
||||
for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
|
||||
if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
|
||||
throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
|
||||
HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
|
||||
headerTokens[columnIndex] + "\"");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean canDecode ( final File potentialInput ) {
|
||||
try {
|
||||
LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
|
||||
readHeader(reader);
|
||||
}
|
||||
catch ( Exception e ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,115 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
/**
|
||||
* A set of constants associated with the SnpEff codec.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffConstants {
|
||||
|
||||
// Possible SnpEff biological effects and their associated impacts:
|
||||
public enum EffectType {
|
||||
START_GAINED (EffectImpact.HIGH),
|
||||
START_LOST (EffectImpact.HIGH),
|
||||
EXON_DELETED (EffectImpact.HIGH),
|
||||
FRAME_SHIFT (EffectImpact.HIGH),
|
||||
STOP_GAINED (EffectImpact.HIGH),
|
||||
STOP_LOST (EffectImpact.HIGH),
|
||||
SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
|
||||
SPLICE_SITE_DONOR (EffectImpact.HIGH),
|
||||
|
||||
NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
|
||||
UTR_5_DELETED (EffectImpact.MODERATE),
|
||||
UTR_3_DELETED (EffectImpact.MODERATE),
|
||||
CODON_INSERTION (EffectImpact.MODERATE),
|
||||
CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
|
||||
CODON_DELETION (EffectImpact.MODERATE),
|
||||
CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
|
||||
|
||||
NONE (EffectImpact.LOW),
|
||||
CHROMOSOME (EffectImpact.LOW),
|
||||
INTERGENIC (EffectImpact.LOW),
|
||||
UPSTREAM (EffectImpact.LOW),
|
||||
UTR_5_PRIME (EffectImpact.LOW),
|
||||
SYNONYMOUS_START (EffectImpact.LOW),
|
||||
NON_SYNONYMOUS_START (EffectImpact.LOW),
|
||||
CDS (EffectImpact.LOW),
|
||||
GENE (EffectImpact.LOW),
|
||||
TRANSCRIPT (EffectImpact.LOW),
|
||||
EXON (EffectImpact.LOW),
|
||||
SYNONYMOUS_CODING (EffectImpact.LOW),
|
||||
CODON_CHANGE (EffectImpact.LOW),
|
||||
SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||
NON_SYNONYMOUS_STOP (EffectImpact.LOW),
|
||||
INTRON (EffectImpact.LOW),
|
||||
UTR_3_PRIME (EffectImpact.LOW),
|
||||
DOWNSTREAM (EffectImpact.LOW),
|
||||
INTRON_CONSERVED (EffectImpact.LOW),
|
||||
INTERGENIC_CONSERVED (EffectImpact.LOW),
|
||||
CUSTOM (EffectImpact.LOW);
|
||||
|
||||
private final EffectImpact impact;
|
||||
|
||||
EffectType ( EffectImpact impact ) {
|
||||
this.impact = impact;
|
||||
}
|
||||
|
||||
public EffectImpact getImpact() {
|
||||
return impact;
|
||||
}
|
||||
}
|
||||
|
||||
public enum EffectImpact {
|
||||
LOW (1),
|
||||
MODERATE (2),
|
||||
HIGH (3);
|
||||
|
||||
private final int severityRating;
|
||||
|
||||
EffectImpact ( int severityRating ) {
|
||||
this.severityRating = severityRating;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( EffectImpact other ) {
|
||||
return this.severityRating > other.severityRating;
|
||||
}
|
||||
}
|
||||
|
||||
// The kinds of variants supported by the SnpEff output format:
|
||||
public enum ChangeType {
|
||||
SNP,
|
||||
MNP,
|
||||
INS,
|
||||
DEL
|
||||
}
|
||||
|
||||
// Possible zygosities of SnpEff variants:
|
||||
public enum Zygosity {
|
||||
Hom,
|
||||
Het
|
||||
}
|
||||
}
|
||||
|
|
@ -1,423 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.broad.tribble.Feature;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
/**
|
||||
* Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
|
||||
*
|
||||
* Many fields are optional, and missing values are represented by nulls. You should always call the
|
||||
* hasX() method before calling the corresponding getX() method. Required fields can never be null
|
||||
* and do not have a hasX() method.
|
||||
*
|
||||
* @author David Roazen
|
||||
*/
|
||||
public class SnpEffFeature implements Feature {
|
||||
|
||||
private String contig; // REQUIRED FIELD
|
||||
private long position; // REQUIRED FIELD
|
||||
private String reference;
|
||||
private String change;
|
||||
private ChangeType changeType;
|
||||
private Zygosity zygosity;
|
||||
private Double quality;
|
||||
private Long coverage;
|
||||
private String warnings;
|
||||
private String geneID;
|
||||
private String geneName;
|
||||
private String bioType;
|
||||
private String transcriptID;
|
||||
private String exonID;
|
||||
private Integer exonRank;
|
||||
private boolean isNonCodingGene; // REQUIRED FIELD
|
||||
private EffectType effect; // REQUIRED FIELD
|
||||
private String effectExtraInformation;
|
||||
private String oldAndNewAA;
|
||||
private String oldAndNewCodon;
|
||||
private Integer codonNum;
|
||||
private Integer cdsSize;
|
||||
private String codonsAround;
|
||||
private String aasAround;
|
||||
private String customIntervalID;
|
||||
|
||||
public SnpEffFeature ( String contig,
|
||||
long position,
|
||||
String reference,
|
||||
String change,
|
||||
ChangeType changeType,
|
||||
Zygosity zygosity,
|
||||
Double quality,
|
||||
Long coverage,
|
||||
String warnings,
|
||||
String geneID,
|
||||
String geneName,
|
||||
String bioType,
|
||||
String transcriptID,
|
||||
String exonID,
|
||||
Integer exonRank,
|
||||
boolean isNonCodingGene,
|
||||
EffectType effect,
|
||||
String effectExtraInformation,
|
||||
String oldAndNewAA,
|
||||
String oldAndNewCodon,
|
||||
Integer codonNum,
|
||||
Integer cdsSize,
|
||||
String codonsAround,
|
||||
String aasAround,
|
||||
String customIntervalID ) {
|
||||
|
||||
if ( contig == null || effect == null ) {
|
||||
throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
|
||||
}
|
||||
|
||||
this.contig = contig;
|
||||
this.position = position;
|
||||
this.reference = reference;
|
||||
this.change = change;
|
||||
this.changeType = changeType;
|
||||
this.zygosity = zygosity;
|
||||
this.quality = quality;
|
||||
this.coverage = coverage;
|
||||
this.warnings = warnings;
|
||||
this.geneID = geneID;
|
||||
this.geneName = geneName;
|
||||
this.bioType = bioType;
|
||||
this.transcriptID = transcriptID;
|
||||
this.exonID = exonID;
|
||||
this.exonRank = exonRank;
|
||||
this.isNonCodingGene = isNonCodingGene;
|
||||
this.effect = effect;
|
||||
this.effectExtraInformation = effectExtraInformation;
|
||||
this.oldAndNewAA = oldAndNewAA;
|
||||
this.oldAndNewCodon = oldAndNewCodon;
|
||||
this.codonNum = codonNum;
|
||||
this.cdsSize = cdsSize;
|
||||
this.codonsAround = codonsAround;
|
||||
this.aasAround = aasAround;
|
||||
this.customIntervalID = customIntervalID;
|
||||
}
|
||||
|
||||
public boolean isHigherImpactThan ( SnpEffFeature other ) {
|
||||
|
||||
// If one effect is in a non-coding gene and the other is not, the effect NOT in the
|
||||
// non-coding gene has higher impact:
|
||||
|
||||
if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
|
||||
return true;
|
||||
}
|
||||
else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
|
||||
// of the effects themselves as defined in the SnpEffConstants class:
|
||||
|
||||
return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
|
||||
}
|
||||
|
||||
public String getChr() {
|
||||
return contig;
|
||||
}
|
||||
|
||||
public int getStart() {
|
||||
return (int)position;
|
||||
}
|
||||
|
||||
public int getEnd() {
|
||||
return (int)position;
|
||||
}
|
||||
|
||||
public boolean hasReference() {
|
||||
return reference != null;
|
||||
}
|
||||
|
||||
public String getReference() {
|
||||
if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
|
||||
return reference;
|
||||
}
|
||||
|
||||
public boolean hasChange() {
|
||||
return change != null;
|
||||
}
|
||||
|
||||
public String getChange() {
|
||||
if ( change == null ) throw new NoSuchElementException("This feature has no change field");
|
||||
return change;
|
||||
}
|
||||
|
||||
public boolean hasChangeType() {
|
||||
return changeType != null;
|
||||
}
|
||||
|
||||
public ChangeType getChangeType() {
|
||||
if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
|
||||
return changeType;
|
||||
}
|
||||
|
||||
public boolean hasZygosity() {
|
||||
return zygosity != null;
|
||||
}
|
||||
|
||||
public Zygosity getZygosity() {
|
||||
if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
|
||||
return zygosity;
|
||||
}
|
||||
|
||||
public boolean hasQuality() {
|
||||
return quality != null;
|
||||
}
|
||||
|
||||
public Double getQuality() {
|
||||
if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
|
||||
return quality;
|
||||
}
|
||||
|
||||
public boolean hasCoverage() {
|
||||
return coverage != null;
|
||||
}
|
||||
|
||||
public Long getCoverage() {
|
||||
if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
|
||||
return coverage;
|
||||
}
|
||||
|
||||
public boolean hasWarnings() {
|
||||
return warnings != null;
|
||||
}
|
||||
|
||||
public String getWarnings() {
|
||||
if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
|
||||
return warnings;
|
||||
}
|
||||
|
||||
public boolean hasGeneID() {
|
||||
return geneID != null;
|
||||
}
|
||||
|
||||
public String getGeneID() {
|
||||
if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
|
||||
return geneID;
|
||||
}
|
||||
|
||||
public boolean hasGeneName() {
|
||||
return geneName != null;
|
||||
}
|
||||
|
||||
public String getGeneName() {
|
||||
if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
|
||||
return geneName;
|
||||
}
|
||||
|
||||
public boolean hasBioType() {
|
||||
return bioType != null;
|
||||
}
|
||||
|
||||
public String getBioType() {
|
||||
if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
|
||||
return bioType;
|
||||
}
|
||||
|
||||
public boolean hasTranscriptID() {
|
||||
return transcriptID != null;
|
||||
}
|
||||
|
||||
public String getTranscriptID() {
|
||||
if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
|
||||
return transcriptID;
|
||||
}
|
||||
|
||||
public boolean hasExonID() {
|
||||
return exonID != null;
|
||||
}
|
||||
|
||||
public String getExonID() {
|
||||
if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
|
||||
return exonID;
|
||||
}
|
||||
|
||||
public boolean hasExonRank() {
|
||||
return exonRank != null;
|
||||
}
|
||||
|
||||
public Integer getExonRank() {
|
||||
if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
|
||||
return exonRank;
|
||||
}
|
||||
|
||||
public boolean isNonCodingGene() {
|
||||
return isNonCodingGene;
|
||||
}
|
||||
|
||||
public EffectType getEffect() {
|
||||
return effect;
|
||||
}
|
||||
|
||||
public EffectImpact getEffectImpact() {
|
||||
return effect.getImpact();
|
||||
}
|
||||
|
||||
public boolean hasEffectExtraInformation() {
|
||||
return effectExtraInformation != null;
|
||||
}
|
||||
|
||||
public String getEffectExtraInformation() {
|
||||
if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
|
||||
return effectExtraInformation;
|
||||
}
|
||||
|
||||
public boolean hasOldAndNewAA() {
|
||||
return oldAndNewAA != null;
|
||||
}
|
||||
|
||||
public String getOldAndNewAA() {
|
||||
if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
|
||||
return oldAndNewAA;
|
||||
}
|
||||
|
||||
public boolean hasOldAndNewCodon() {
|
||||
return oldAndNewCodon != null;
|
||||
}
|
||||
|
||||
public String getOldAndNewCodon() {
|
||||
if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
|
||||
return oldAndNewCodon;
|
||||
}
|
||||
|
||||
public boolean hasCodonNum() {
|
||||
return codonNum != null;
|
||||
}
|
||||
|
||||
public Integer getCodonNum() {
|
||||
if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
|
||||
return codonNum;
|
||||
}
|
||||
|
||||
public boolean hasCdsSize() {
|
||||
return cdsSize != null;
|
||||
}
|
||||
|
||||
public Integer getCdsSize() {
|
||||
if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
|
||||
return cdsSize;
|
||||
}
|
||||
|
||||
public boolean hasCodonsAround() {
|
||||
return codonsAround != null;
|
||||
}
|
||||
|
||||
public String getCodonsAround() {
|
||||
if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
|
||||
return codonsAround;
|
||||
}
|
||||
|
||||
public boolean hadAasAround() {
|
||||
return aasAround != null;
|
||||
}
|
||||
|
||||
public String getAasAround() {
|
||||
if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
|
||||
return aasAround;
|
||||
}
|
||||
|
||||
public boolean hasCustomIntervalID() {
|
||||
return customIntervalID != null;
|
||||
}
|
||||
|
||||
public String getCustomIntervalID() {
|
||||
if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
|
||||
return customIntervalID;
|
||||
}
|
||||
|
||||
public boolean equals ( Object o ) {
|
||||
if ( o == null || ! (o instanceof SnpEffFeature) ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
SnpEffFeature other = (SnpEffFeature)o;
|
||||
|
||||
return contig.equals(other.contig) &&
|
||||
position == other.position &&
|
||||
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
|
||||
(change == null ? other.change == null : change.equals(other.change)) &&
|
||||
changeType == other.changeType &&
|
||||
zygosity == other.zygosity &&
|
||||
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
|
||||
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
|
||||
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
|
||||
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
|
||||
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
|
||||
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
|
||||
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
|
||||
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
|
||||
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
|
||||
isNonCodingGene == other.isNonCodingGene &&
|
||||
effect == other.effect &&
|
||||
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
|
||||
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
|
||||
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
|
||||
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
|
||||
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
|
||||
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
|
||||
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
|
||||
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
|
||||
}
|
||||
|
||||
public String toString() {
|
||||
return "[Contig: " + contig +
|
||||
" Position: " + position +
|
||||
" Reference: " + reference +
|
||||
" Change: " + change +
|
||||
" Change Type: " + changeType +
|
||||
" Zygosity: " + zygosity +
|
||||
" Quality: " + quality +
|
||||
" Coverage: " + coverage +
|
||||
" Warnings: " + warnings +
|
||||
" Gene ID: " + geneID +
|
||||
" Gene Name: " + geneName +
|
||||
" Bio Type: " + bioType +
|
||||
" Transcript ID: " + transcriptID +
|
||||
" Exon ID: " + exonID +
|
||||
" Exon Rank: " + exonRank +
|
||||
" Non-Coding Gene: " + isNonCodingGene +
|
||||
" Effect: " + effect +
|
||||
" Effect Extra Information: " + effectExtraInformation +
|
||||
" Old/New AA: " + oldAndNewAA +
|
||||
" Old/New Codon: " + oldAndNewCodon +
|
||||
" Codon Num: " + codonNum +
|
||||
" CDS Size: " + cdsSize +
|
||||
" Codons Around: " + codonsAround +
|
||||
" AAs Around: " + aasAround +
|
||||
" Custom Interval ID: " + customIntervalID +
|
||||
"]";
|
||||
}
|
||||
}
|
||||
|
|
@ -24,6 +24,7 @@ public class VCFHeader {
|
|||
private final Set<VCFHeaderLine> mMetaData;
|
||||
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
|
||||
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
|
||||
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
|
||||
|
||||
// the list of auxillary tags
|
||||
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
|
||||
|
|
@ -110,6 +111,9 @@ public class VCFHeader {
|
|||
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
|
||||
mFormatMetaData.put(formatLine.getName(), formatLine);
|
||||
}
|
||||
else {
|
||||
mOtherMetaData.put(line.getKey(), line);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -185,6 +189,14 @@ public class VCFHeader {
|
|||
public VCFFormatHeaderLine getFormatHeaderLine(String key) {
|
||||
return mFormatMetaData.get(key);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param key the header key name
|
||||
* @return the meta data line, or null if there is none
|
||||
*/
|
||||
public VCFHeaderLine getOtherHeaderLine(String key) {
|
||||
return mOtherMetaData.get(key);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -817,6 +817,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati
|
|||
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
|
||||
}
|
||||
|
||||
/**
|
||||
* @param other VariantContext whose alternate alleles to compare against
|
||||
* @return true if this VariantContext has the same alternate alleles as other,
|
||||
* regardless of ordering. Otherwise returns false.
|
||||
*/
|
||||
public boolean hasSameAlternateAllelesAs ( VariantContext other ) {
|
||||
Set<Allele> thisAlternateAlleles = getAlternateAlleles();
|
||||
Set<Allele> otherAlternateAlleles = other.getAlternateAlleles();
|
||||
|
||||
if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for ( Allele allele : thisAlternateAlleles ) {
|
||||
if ( ! otherAlternateAlleles.contains(allele) ) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------------------------------------
|
||||
//
|
||||
// Working with genotypes
|
||||
|
|
|
|||
|
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.SnpEffEffect;
|
||||
|
||||
public class SnpEffUnitTest {
|
||||
|
||||
@Test
|
||||
public void testParseWellFormedEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInvalidEffectNameEffect() {
|
||||
String effectName = "MADE_UP_EFFECT";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseInvalidEffectImpactEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseWrongNumberOfMetadataFieldsEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertFalse(effect.isWellFormed());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSnpEffWarningEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSnpEffErrorEffect() {
|
||||
String effectName = "NON_SYNONYMOUS_CODING";
|
||||
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
|
||||
|
||||
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
|
||||
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
|
||||
}
|
||||
}
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
package org.broadinstitute.sting.gatk.walkers.annotator;
|
||||
|
||||
import org.broadinstitute.sting.WalkerTest;
|
||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
|
@ -129,12 +130,24 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
|
|||
@Test
|
||||
public void testSnpEffAnnotations() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
|
||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
|
||||
1,
|
||||
Arrays.asList("03eae1dab19a9358250890594bf53607")
|
||||
Arrays.asList("a1c3ba9efc28ee0606339604095076ea")
|
||||
);
|
||||
executeTest("Testing SnpEff annotations", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSnpEffAnnotationsUnsupportedVersion() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
|
||||
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
|
||||
"snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000",
|
||||
1,
|
||||
UserException.class
|
||||
);
|
||||
executeTest("Testing SnpEff annotations (unsupported version)", spec);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@ import org.testng.annotations.Test;
|
|||
import java.util.Arrays;
|
||||
|
||||
public class VariantEvalIntegrationTest extends WalkerTest {
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
|
||||
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
|
||||
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
|
||||
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
|
||||
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
|
||||
|
|
@ -14,6 +14,27 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
private static String cmdRoot = "-T VariantEval" +
|
||||
" -R " + b36KGReference;
|
||||
|
||||
@Test
|
||||
public void testFunctionClassWithSnpeff() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
buildCommandLine(
|
||||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"--dbsnp " + b37dbSNP132,
|
||||
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
|
||||
"-noEV",
|
||||
"-EV TiTvVariantEvaluator",
|
||||
"-noST",
|
||||
"-ST FunctionalClass",
|
||||
"-BTI eval",
|
||||
"-o %s"
|
||||
),
|
||||
1,
|
||||
Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
|
||||
);
|
||||
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStratifySamplesAndExcludeMonomorphicSites() {
|
||||
WalkerTestSpec spec = new WalkerTestSpec(
|
||||
|
|
@ -21,7 +42,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
|
|||
"-T VariantEval",
|
||||
"-R " + b37KGReference,
|
||||
"--dbsnp " + b37dbSNP132,
|
||||
"--eval " + variantEvalTestDataRoot + "/CEU.trio.callsForVE.vcf",
|
||||
"--eval " + variantEvalTestDataRoot + "CEU.trio.callsForVE.vcf",
|
||||
"-noEV",
|
||||
"-EV TiTvVariantEvaluator",
|
||||
"-ST Sample",
|
||||
|
|
|
|||
|
|
@ -1,259 +0,0 @@
|
|||
/*
|
||||
* Copyright (c) 2011, The Broad Institute
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
package org.broadinstitute.sting.utils.codecs.snpEff;
|
||||
|
||||
import org.apache.commons.io.input.ReaderInputStream;
|
||||
import org.broad.tribble.TribbleException;
|
||||
import org.broad.tribble.readers.AsciiLineReader;
|
||||
import org.broad.tribble.readers.LineReader;
|
||||
import org.testng.Assert;
|
||||
import org.testng.annotations.Test;
|
||||
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
|
||||
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
public class SnpEffCodecUnitTest {
|
||||
|
||||
@Test
|
||||
public void testParseWellFormedSnpEffHeaderLine() {
|
||||
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
|
||||
String headerReturned = (String)codec.readHeader(reader);
|
||||
|
||||
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
|
||||
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
|
||||
public void testParseMisnamedColumnSnpEffHeaderLine() {
|
||||
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
|
||||
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
|
||||
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
|
||||
"AAs around\tCustom_interval_ID";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
|
||||
codec.readHeader(reader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseSimpleEffectSnpEffLine() {
|
||||
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
69428l,
|
||||
"T",
|
||||
"G",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
6049.69,
|
||||
61573l,
|
||||
null,
|
||||
"ENSG00000177693",
|
||||
"OR4F5",
|
||||
"mRNA",
|
||||
"ENST00000326183",
|
||||
"exon_1_69055_70108",
|
||||
1,
|
||||
false,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"F/C",
|
||||
"TTT/TGT",
|
||||
113,
|
||||
918,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseNonCodingRegionSnpEffLine() {
|
||||
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
|
||||
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
|
||||
"L/V\tCTA/GTA\t272\t952\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
1337592l,
|
||||
"G",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
1935.52,
|
||||
21885l,
|
||||
null,
|
||||
"ENSG00000250188",
|
||||
"RP4-758J18.5",
|
||||
"mRNA",
|
||||
"ENST00000514958",
|
||||
"exon_1_1337454_1338076",
|
||||
2,
|
||||
true,
|
||||
EffectType.NON_SYNONYMOUS_CODING,
|
||||
null,
|
||||
"L/V",
|
||||
"CTA/GTA",
|
||||
272,
|
||||
952,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseExtraEffectInformationSnpEffLine() {
|
||||
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
|
||||
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
879537l,
|
||||
"T",
|
||||
"C",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
341.58,
|
||||
13733l,
|
||||
null,
|
||||
"ENSG00000187634",
|
||||
"SAMD11",
|
||||
"mRNA",
|
||||
"ENST00000341065",
|
||||
null,
|
||||
null,
|
||||
false,
|
||||
EffectType.UTR_3_PRIME,
|
||||
"4 bases from transcript end",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testParseMultiEffectSnpEffLine() {
|
||||
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
|
||||
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
|
||||
|
||||
SnpEffFeature expectedFeature = new SnpEffFeature("1",
|
||||
901901l,
|
||||
"C",
|
||||
"T",
|
||||
ChangeType.SNP,
|
||||
Zygosity.Hom,
|
||||
162.91,
|
||||
4646l,
|
||||
null,
|
||||
"ENSG00000187583",
|
||||
"PLEKHN1",
|
||||
"mRNA",
|
||||
"ENST00000379410",
|
||||
"exon_1_901877_901994",
|
||||
1,
|
||||
false,
|
||||
EffectType.START_GAINED,
|
||||
"ATG, UTR_5_PRIME: 11 bases from TSS",
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null,
|
||||
null
|
||||
);
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
|
||||
|
||||
Assert.assertEquals(feature, expectedFeature);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseWrongNumberOfFieldsSnpEffLine() {
|
||||
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseBlankEffectFieldSnpEffLine() {
|
||||
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
|
||||
}
|
||||
|
||||
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
|
||||
public void testParseInvalidNumericFieldSnpEffLine() {
|
||||
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
|
||||
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
|
||||
|
||||
SnpEffCodec codec = new SnpEffCodec();
|
||||
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
|
||||
}
|
||||
}
|
||||
Loading…
Reference in New Issue