Merge branch 'master' of ssh://nickel.broadinstitute.org/humgen/gsa-scr1/gsa-engineering/git/unstable

This commit is contained in:
Guillermo del Angel 2011-09-15 15:31:07 -04:00
commit 86480b2e13
18 changed files with 606 additions and 1217 deletions

View File

@ -36,7 +36,7 @@ import org.broadinstitute.sting.utils.sam.ReadUtils;
* @version 0.1
*/
public class PlatformFilter extends ReadFilter {
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this strign", required=false)
@Argument(fullName = "PLFilterName", shortName = "PLFilterName", doc="Discard reads with RG:PL attribute containing this string", required=false)
protected String[] PLFilterNames;
public boolean filterOut(SAMRecord rec) {

View File

@ -68,6 +68,13 @@ import org.broadinstitute.sting.gatk.refdata.ReadMetaDataTracker;
* -I input1.bam \
* -I input2.bam \
* --read_filter MappingQualityZero
*
* java -Xmx2g -jar GenomeAnalysisTK.jar \
* -R ref.fasta \
* -T PrintReads \
* -o output.bam \
* -I input.bam \
* -n 2000
* </pre>
*
*/

View File

@ -24,7 +24,9 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.apache.log4j.Logger;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -32,10 +34,7 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompa
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.ExperimentalAnnotation;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;
import org.broadinstitute.sting.utils.Utils;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.VCFHeaderLineType;
import org.broadinstitute.sting.utils.codecs.vcf.VCFInfoHeaderLine;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -46,134 +45,421 @@ import java.util.*;
* (http://snpeff.sourceforge.net/).
*
* For each variant, chooses one of the effects of highest biological impact from the SnpEff
* output file (which must be provided on the command line via --snpEffFile:SnpEff <filename>),
* output file (which must be provided on the command line via --snpEffFile filename.vcf),
* and adds annotations on that effect.
*
* The possible biological effects and their associated impacts are defined in the class:
* org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants
*
* @author David Roazen
*/
public class SnpEff extends InfoFieldAnnotation implements ExperimentalAnnotation {
// SnpEff annotation key names:
public static final String GENE_ID_KEY = "GENE_ID";
public static final String GENE_NAME_KEY = "GENE_NAME";
public static final String TRANSCRIPT_ID_KEY = "TRANSCRIPT_ID";
public static final String EXON_ID_KEY = "EXON_ID";
public static final String EXON_RANK_KEY = "EXON_RANK";
public static final String WITHIN_NON_CODING_GENE_KEY = "WITHIN_NON_CODING_GENE";
public static final String EFFECT_KEY = "EFFECT";
public static final String EFFECT_IMPACT_KEY = "EFFECT_IMPACT";
public static final String EFFECT_EXTRA_INFORMATION_KEY = "EFFECT_EXTRA_INFORMATION";
public static final String OLD_NEW_AA_KEY = "OLD_NEW_AA";
public static final String OLD_NEW_CODON_KEY = "OLD_NEW_CODON";
public static final String CODON_NUM_KEY = "CODON_NUM";
public static final String CDS_SIZE_KEY = "CDS_SIZE";
private static Logger logger = Logger.getLogger(SnpEff.class);
// We refuse to parse SnpEff output files generated by unsupported versions, or
// lacking a SnpEff version number in the VCF header:
public static final String[] SUPPORTED_SNPEFF_VERSIONS = { "2.0.2" };
public static final String SNPEFF_VCF_HEADER_VERSION_LINE_KEY = "SnpEffVersion";
// SnpEff aggregates all effects (and effect metadata) together into a single INFO
// field annotation with the key EFF:
public static final String SNPEFF_INFO_FIELD_KEY = "EFF";
public static final String SNPEFF_EFFECT_METADATA_DELIMITER = "[()]";
public static final String SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER = "\\|";
// Key names for the INFO field annotations we will add to each record, along
// with parsing-related information:
public enum InfoFieldKey {
EFF (-1),
EFF_IMPACT (0),
EFF_CODON_CHANGE (1),
EFF_AMINO_ACID_CHANGE (2),
EFF_GENE_NAME (3),
EFF_GENE_BIOTYPE (4),
EFF_TRANSCRIPT_ID (6),
EFF_EXON_ID (7);
// Index within the effect metadata subfields from the SnpEff EFF annotation
// where each key's associated value can be found during parsing.
private final int fieldIndex;
InfoFieldKey ( int fieldIndex ) {
this.fieldIndex = fieldIndex;
}
public int getFieldIndex() {
return fieldIndex;
}
}
// Possible SnpEff biological effects. All effect names found in the SnpEff input file
// are validated against this list.
public enum EffectType {
NONE,
CHROMOSOME,
INTERGENIC,
UPSTREAM,
UTR_5_PRIME,
UTR_5_DELETED,
START_GAINED,
SPLICE_SITE_ACCEPTOR,
SPLICE_SITE_DONOR,
START_LOST,
SYNONYMOUS_START,
NON_SYNONYMOUS_START,
CDS,
GENE,
TRANSCRIPT,
EXON,
EXON_DELETED,
NON_SYNONYMOUS_CODING,
SYNONYMOUS_CODING,
FRAME_SHIFT,
CODON_CHANGE,
CODON_INSERTION,
CODON_CHANGE_PLUS_CODON_INSERTION,
CODON_DELETION,
CODON_CHANGE_PLUS_CODON_DELETION,
STOP_GAINED,
SYNONYMOUS_STOP,
NON_SYNONYMOUS_STOP,
STOP_LOST,
INTRON,
UTR_3_PRIME,
UTR_3_DELETED,
DOWNSTREAM,
INTRON_CONSERVED,
INTERGENIC_CONSERVED,
REGULATION,
CUSTOM,
WITHIN_NON_CODING_GENE
}
// SnpEff labels each effect as either LOW, MODERATE, or HIGH impact.
public enum EffectImpact {
LOW (1),
MODERATE (2),
HIGH (3);
private final int severityRating;
EffectImpact ( int severityRating ) {
this.severityRating = severityRating;
}
public boolean isHigherImpactThan ( EffectImpact other ) {
return this.severityRating > other.severityRating;
}
}
// SnpEff labels most effects as either CODING or NON_CODING, but sometimes omits this information.
public enum EffectCoding {
CODING,
NON_CODING,
UNKNOWN
}
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
validateRodBinding(walker.getSnpEffRodBinding());
checkSnpEffVersion(walker, toolkit);
}
public Map<String, Object> annotate ( RefMetaDataTracker tracker, AnnotatorCompatibleWalker walker, ReferenceContext ref, Map<String, AlignmentContext> stratifiedContexts, VariantContext vc ) {
RodBinding<SnpEffFeature> snpEffRodBinding = walker.getSnpEffRodBinding();
validateRodBinding(snpEffRodBinding);
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
List<SnpEffFeature> features = tracker.getValues(snpEffRodBinding, ref.getLocus());
// Get only SnpEff records that start at this locus, not merely span it:
List<VariantContext> snpEffRecords = tracker.getValues(snpEffRodBinding, ref.getLocus());
// Add only annotations for one of the most biologically-significant effects as defined in
// the SnpEffConstants class:
SnpEffFeature mostSignificantEffect = getMostSignificantEffect(features);
if ( mostSignificantEffect == null ) {
// Within this set, look for a SnpEff record whose ref/alt alleles match the record to annotate.
// If there is more than one such record, we only need to pick the first one, since the biological
// effects will be the same across all such records:
VariantContext matchingRecord = getMatchingSnpEffRecord(snpEffRecords, vc);
if ( matchingRecord == null ) {
return null;
}
return generateAnnotations(mostSignificantEffect);
// Parse the SnpEff INFO field annotation from the matching record into individual effect objects:
List<SnpEffEffect> effects = parseSnpEffRecord(matchingRecord);
if ( effects.size() == 0 ) {
return null;
}
// Add only annotations for one of the most biologically-significant effects from this set:
SnpEffEffect mostSignificantEffect = getMostSignificantEffect(effects);
return mostSignificantEffect.getAnnotations();
}
private void validateRodBinding ( RodBinding<SnpEffFeature> snpEffRodBinding ) {
private void validateRodBinding ( RodBinding<VariantContext> snpEffRodBinding ) {
if ( snpEffRodBinding == null || ! snpEffRodBinding.isBound() ) {
throw new UserException("The SnpEff annotator requires that a SnpEff output file be provided " +
"as a rodbinding on the command line, but no SnpEff rodbinding was found.");
throw new UserException("The SnpEff annotator requires that a SnpEff VCF output file be provided " +
"as a rodbinding on the command line via the --snpEffFile option, but " +
"no SnpEff rodbinding was found.");
}
}
private SnpEffFeature getMostSignificantEffect ( List<SnpEffFeature> snpEffFeatures ) {
SnpEffFeature mostSignificantEffect = null;
private void checkSnpEffVersion ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) {
RodBinding<VariantContext> snpEffRodBinding = walker.getSnpEffRodBinding();
for ( SnpEffFeature snpEffFeature : snpEffFeatures ) {
VCFHeader snpEffVCFHeader = VCFUtils.getVCFHeadersFromRods(toolkit, Arrays.asList(snpEffRodBinding.getName())).get(snpEffRodBinding.getName());
VCFHeaderLine snpEffVersionLine = snpEffVCFHeader.getOtherHeaderLine(SNPEFF_VCF_HEADER_VERSION_LINE_KEY);
if ( snpEffVersionLine == null || snpEffVersionLine.getValue() == null || snpEffVersionLine.getValue().trim().length() == 0 ) {
throw new UserException("Could not find a " + SNPEFF_VCF_HEADER_VERSION_LINE_KEY + " entry in the VCF header for the SnpEff " +
"input file, and so could not verify that the file was generated by a supported version of SnpEff (" +
Arrays.toString(SUPPORTED_SNPEFF_VERSIONS) + ")");
}
String snpEffVersionString = snpEffVersionLine.getValue().replaceAll("\"", "").split(" ")[0];
if ( ! isSupportedSnpEffVersion(snpEffVersionString) ) {
throw new UserException("The version of SnpEff used to generate the SnpEff input file (" + snpEffVersionString + ") " +
"is not currently supported by the GATK. Supported versions are: " + Arrays.toString(SUPPORTED_SNPEFF_VERSIONS));
}
}
private boolean isSupportedSnpEffVersion ( String versionString ) {
for ( String supportedVersion : SUPPORTED_SNPEFF_VERSIONS ) {
if ( supportedVersion.equals(versionString) ) {
return true;
}
}
return false;
}
private VariantContext getMatchingSnpEffRecord ( List<VariantContext> snpEffRecords, VariantContext vc ) {
for ( VariantContext snpEffRecord : snpEffRecords ) {
if ( snpEffRecord.hasSameAlternateAllelesAs(vc) && snpEffRecord.getReference().equals(vc.getReference()) ) {
return snpEffRecord;
}
}
return null;
}
private List<SnpEffEffect> parseSnpEffRecord ( VariantContext snpEffRecord ) {
List<SnpEffEffect> parsedEffects = new ArrayList<SnpEffEffect>();
Object effectFieldValue = snpEffRecord.getAttribute(SNPEFF_INFO_FIELD_KEY);
List<String> individualEffects;
// The VCF codec stores multi-valued fields as a List<String>, and single-valued fields as a String.
// We can have either in the case of SnpEff, since there may be one or more than one effect in this record.
if ( effectFieldValue instanceof List ) {
individualEffects = (List<String>)effectFieldValue;
}
else {
individualEffects = Arrays.asList((String)effectFieldValue);
}
for ( String effectString : individualEffects ) {
String[] effectNameAndMetadata = effectString.split(SNPEFF_EFFECT_METADATA_DELIMITER);
if ( effectNameAndMetadata.length != 2 ) {
logger.warn(String.format("Malformed SnpEff effect field at %s:%d, skipping: %s",
snpEffRecord.getChr(), snpEffRecord.getStart(), effectString));
continue;
}
String effectName = effectNameAndMetadata[0];
String[] effectMetadata = effectNameAndMetadata[1].split(SNPEFF_EFFECT_METADATA_SUBFIELD_DELIMITER, -1);
SnpEffEffect parsedEffect = new SnpEffEffect(effectName, effectMetadata);
if ( parsedEffect.isWellFormed() ) {
parsedEffects.add(parsedEffect);
}
else {
logger.warn(String.format("Skipping malformed SnpEff effect field at %s:%d. Error was: \"%s\". Field was: \"%s\"",
snpEffRecord.getChr(), snpEffRecord.getStart(), parsedEffect.getParseError(), effectString));
}
}
return parsedEffects;
}
private SnpEffEffect getMostSignificantEffect ( List<SnpEffEffect> effects ) {
SnpEffEffect mostSignificantEffect = null;
for ( SnpEffEffect effect : effects ) {
if ( mostSignificantEffect == null ||
snpEffFeature.isHigherImpactThan(mostSignificantEffect) ) {
effect.isHigherImpactThan(mostSignificantEffect) ) {
mostSignificantEffect = snpEffFeature;
mostSignificantEffect = effect;
}
}
return mostSignificantEffect;
}
private Map<String, Object> generateAnnotations ( SnpEffFeature mostSignificantEffect ) {
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(getKeyNames().size()));
if ( mostSignificantEffect.hasGeneID() )
annotations.put(GENE_ID_KEY, mostSignificantEffect.getGeneID());
if ( mostSignificantEffect.hasGeneName() )
annotations.put(GENE_NAME_KEY, mostSignificantEffect.getGeneName());
if ( mostSignificantEffect.hasTranscriptID() )
annotations.put(TRANSCRIPT_ID_KEY, mostSignificantEffect.getTranscriptID());
if ( mostSignificantEffect.hasExonID() )
annotations.put(EXON_ID_KEY, mostSignificantEffect.getExonID());
if ( mostSignificantEffect.hasExonRank() )
annotations.put(EXON_RANK_KEY, Integer.toString(mostSignificantEffect.getExonRank()));
if ( mostSignificantEffect.isNonCodingGene() )
annotations.put(WITHIN_NON_CODING_GENE_KEY, null);
annotations.put(EFFECT_KEY, mostSignificantEffect.getEffect().toString());
annotations.put(EFFECT_IMPACT_KEY, mostSignificantEffect.getEffectImpact().toString());
if ( mostSignificantEffect.hasEffectExtraInformation() )
annotations.put(EFFECT_EXTRA_INFORMATION_KEY, mostSignificantEffect.getEffectExtraInformation());
if ( mostSignificantEffect.hasOldAndNewAA() )
annotations.put(OLD_NEW_AA_KEY, mostSignificantEffect.getOldAndNewAA());
if ( mostSignificantEffect.hasOldAndNewCodon() )
annotations.put(OLD_NEW_CODON_KEY, mostSignificantEffect.getOldAndNewCodon());
if ( mostSignificantEffect.hasCodonNum() )
annotations.put(CODON_NUM_KEY, Integer.toString(mostSignificantEffect.getCodonNum()));
if ( mostSignificantEffect.hasCdsSize() )
annotations.put(CDS_SIZE_KEY, Integer.toString(mostSignificantEffect.getCdsSize()));
return annotations;
}
public List<String> getKeyNames() {
return Arrays.asList( GENE_ID_KEY,
GENE_NAME_KEY,
TRANSCRIPT_ID_KEY,
EXON_ID_KEY,
EXON_RANK_KEY,
WITHIN_NON_CODING_GENE_KEY,
EFFECT_KEY,
EFFECT_IMPACT_KEY,
EFFECT_EXTRA_INFORMATION_KEY,
OLD_NEW_AA_KEY,
OLD_NEW_CODON_KEY,
CODON_NUM_KEY,
CDS_SIZE_KEY
return Arrays.asList( InfoFieldKey.EFF.toString(),
InfoFieldKey.EFF_IMPACT.toString(),
InfoFieldKey.EFF_CODON_CHANGE.toString(),
InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(),
InfoFieldKey.EFF_GENE_NAME.toString(),
InfoFieldKey.EFF_GENE_BIOTYPE.toString(),
InfoFieldKey.EFF_TRANSCRIPT_ID.toString(),
InfoFieldKey.EFF_EXON_ID.toString()
);
}
public List<VCFInfoHeaderLine> getDescriptions() {
return Arrays.asList(
new VCFInfoHeaderLine(GENE_ID_KEY, 1, VCFHeaderLineType.String, "Gene ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(GENE_NAME_KEY, 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(TRANSCRIPT_ID_KEY, 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(EXON_ID_KEY, 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(EXON_RANK_KEY, 1, VCFHeaderLineType.Integer, "Exon rank for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(WITHIN_NON_CODING_GENE_KEY, 0, VCFHeaderLineType.Flag, "If this flag is present, the highest-impact effect resulting from the current variant is within a non-coding gene"),
new VCFInfoHeaderLine(EFFECT_KEY, 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(EFFECT_IMPACT_KEY, 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(SnpEffConstants.EffectImpact.values())),
new VCFInfoHeaderLine(EFFECT_EXTRA_INFORMATION_KEY, 1, VCFHeaderLineType.String, "Additional information about the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(OLD_NEW_AA_KEY, 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(OLD_NEW_CODON_KEY, 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(CODON_NUM_KEY, 1, VCFHeaderLineType.Integer, "Codon number for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(CDS_SIZE_KEY, 1, VCFHeaderLineType.Integer, "CDS size for the highest-impact effect resulting from the current variant")
new VCFInfoHeaderLine(InfoFieldKey.EFF.toString(), 1, VCFHeaderLineType.String, "The highest-impact effect resulting from the current variant (or one of the highest-impact effects, if there is a tie)"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_IMPACT.toString(), 1, VCFHeaderLineType.String, "Impact of the highest-impact effect resulting from the current variant " + Arrays.toString(EffectImpact.values())),
new VCFInfoHeaderLine(InfoFieldKey.EFF_CODON_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New codon for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), 1, VCFHeaderLineType.String, "Old/New amino acid for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_NAME.toString(), 1, VCFHeaderLineType.String, "Gene name for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_GENE_BIOTYPE.toString(), 1, VCFHeaderLineType.String, "Gene biotype for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), 1, VCFHeaderLineType.String, "Transcript ID for the highest-impact effect resulting from the current variant"),
new VCFInfoHeaderLine(InfoFieldKey.EFF_EXON_ID.toString(), 1, VCFHeaderLineType.String, "Exon ID for the highest-impact effect resulting from the current variant")
);
}
/**
* Helper class to parse, validate, and store a single SnpEff effect and its metadata.
*/
protected static class SnpEffEffect {
private EffectType effect;
private EffectImpact impact;
private String codonChange;
private String aminoAcidChange;
private String geneName;
private String geneBiotype;
private EffectCoding coding;
private String transcriptID;
private String exonID;
private String parseError = null;
private boolean isWellFormed = true;
private static final int EXPECTED_NUMBER_OF_METADATA_FIELDS = 8;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_WARNING = 9;
private static final int NUMBER_OF_METADATA_FIELDS_UPON_ERROR = 10;
// Note that contrary to the description for the EFF field layout that SnpEff adds to the VCF header,
// errors come after warnings, not vice versa:
private static final int SNPEFF_WARNING_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_WARNING - 1;
private static final int SNPEFF_ERROR_FIELD_INDEX = NUMBER_OF_METADATA_FIELDS_UPON_ERROR - 1;
private static final int SNPEFF_CODING_FIELD_INDEX = 5;
public SnpEffEffect ( String effectName, String[] effectMetadata ) {
parseEffectName(effectName);
parseEffectMetadata(effectMetadata);
}
private void parseEffectName ( String effectName ) {
try {
effect = EffectType.valueOf(effectName);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("%s is not a recognized effect type", effectName));
}
}
private void parseEffectMetadata ( String[] effectMetadata ) {
if ( effectMetadata.length != EXPECTED_NUMBER_OF_METADATA_FIELDS ) {
if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_WARNING ) {
parseError(String.format("SnpEff issued the following warning: %s", effectMetadata[SNPEFF_WARNING_FIELD_INDEX]));
}
else if ( effectMetadata.length == NUMBER_OF_METADATA_FIELDS_UPON_ERROR ) {
parseError(String.format("SnpEff issued the following error: %s", effectMetadata[SNPEFF_ERROR_FIELD_INDEX]));
}
else {
parseError(String.format("Wrong number of effect metadata fields. Expected %d but found %d",
EXPECTED_NUMBER_OF_METADATA_FIELDS, effectMetadata.length));
}
return;
}
try {
impact = EffectImpact.valueOf(effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect impact: %s", effectMetadata[InfoFieldKey.EFF_IMPACT.getFieldIndex()]));
}
codonChange = effectMetadata[InfoFieldKey.EFF_CODON_CHANGE.getFieldIndex()];
aminoAcidChange = effectMetadata[InfoFieldKey.EFF_AMINO_ACID_CHANGE.getFieldIndex()];
geneName = effectMetadata[InfoFieldKey.EFF_GENE_NAME.getFieldIndex()];
geneBiotype = effectMetadata[InfoFieldKey.EFF_GENE_BIOTYPE.getFieldIndex()];
if ( effectMetadata[SNPEFF_CODING_FIELD_INDEX].trim().length() > 0 ) {
try {
coding = EffectCoding.valueOf(effectMetadata[SNPEFF_CODING_FIELD_INDEX]);
}
catch ( IllegalArgumentException e ) {
parseError(String.format("Unrecognized value for effect coding: %s", effectMetadata[SNPEFF_CODING_FIELD_INDEX]));
}
}
else {
coding = EffectCoding.UNKNOWN;
}
transcriptID = effectMetadata[InfoFieldKey.EFF_TRANSCRIPT_ID.getFieldIndex()];
exonID = effectMetadata[InfoFieldKey.EFF_EXON_ID.getFieldIndex()];
}
private void parseError ( String message ) {
isWellFormed = false;
// Cache only the first error encountered:
if ( parseError == null ) {
parseError = message;
}
}
public boolean isWellFormed() {
return isWellFormed;
}
public String getParseError() {
return parseError == null ? "" : parseError;
}
public boolean isCoding() {
return coding == EffectCoding.CODING;
}
public boolean isHigherImpactThan ( SnpEffEffect other ) {
// If one effect is within a coding gene and the other is not, the effect that is
// within the coding gene has higher impact:
if ( isCoding() && ! other.isCoding() ) {
return true;
}
else if ( ! isCoding() && other.isCoding() ) {
return false;
}
// Otherwise, both effects are either in or not in a coding gene, so we compare the impacts
// of the effects themselves:
return impact.isHigherImpactThan(other.impact);
}
public Map<String, Object> getAnnotations() {
Map<String, Object> annotations = new LinkedHashMap<String, Object>(Utils.optimumHashSize(InfoFieldKey.values().length));
addAnnotation(annotations, InfoFieldKey.EFF.toString(), effect.toString());
addAnnotation(annotations, InfoFieldKey.EFF_IMPACT.toString(), impact.toString());
addAnnotation(annotations, InfoFieldKey.EFF_CODON_CHANGE.toString(), codonChange);
addAnnotation(annotations, InfoFieldKey.EFF_AMINO_ACID_CHANGE.toString(), aminoAcidChange);
addAnnotation(annotations, InfoFieldKey.EFF_GENE_NAME.toString(), geneName);
addAnnotation(annotations, InfoFieldKey.EFF_GENE_BIOTYPE.toString(), geneBiotype);
addAnnotation(annotations, InfoFieldKey.EFF_TRANSCRIPT_ID.toString(), transcriptID);
addAnnotation(annotations, InfoFieldKey.EFF_EXON_ID.toString(), exonID);
return annotations;
}
private void addAnnotation ( Map<String, Object> annotations, String keyName, String keyValue ) {
// Only add annotations for keys associated with non-empty values:
if ( keyValue != null && keyValue.trim().length() > 0 ) {
annotations.put(keyName, keyValue);
}
}
}
}

View File

@ -40,7 +40,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnot
import org.broadinstitute.sting.utils.BaseUtils;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.classloader.PluginManager;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import org.broadinstitute.sting.utils.variantcontext.VariantContextUtils;
@ -93,8 +92,8 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
* listed in the SnpEff output file for each variant.
*/
@Input(fullName="snpEffFile", shortName = "snpEffFile", doc="A SnpEff output file from which to add annotations", required=false)
public RodBinding<SnpEffFeature> snpEffFile;
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return snpEffFile; }
public RodBinding<VariantContext> snpEffFile;
public RodBinding<VariantContext> getSnpEffRodBinding() { return snpEffFile; }
/**
* rsIDs from this file are used to populate the ID column of the output. Also, the DB INFO flag will be set when appropriate.
@ -204,9 +203,9 @@ public class VariantAnnotator extends RodWalker<Integer, Integer> implements Ann
}
if ( USE_ALL_ANNOTATIONS )
engine = new VariantAnnotatorEngine(this);
engine = new VariantAnnotatorEngine(this, getToolkit());
else
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this);
engine = new VariantAnnotatorEngine(annotationGroupsToUse, annotationsToUse, this, getToolkit());
engine.initializeExpressions(expressionsToUse);
engine.invokeAnnotationInitializationMethods();

View File

@ -26,6 +26,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.gatk.contexts.AlignmentContext;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
@ -46,6 +47,7 @@ public class VariantAnnotatorEngine {
private HashMap<RodBinding<VariantContext>, String> dbAnnotations = new HashMap<RodBinding<VariantContext>, String>();
private AnnotatorCompatibleWalker walker;
private GenomeAnalysisEngine toolkit;
private static class VAExpression {
@ -71,16 +73,18 @@ public class VariantAnnotatorEngine {
}
// use this constructor if you want all possible annotations
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker) {
public VariantAnnotatorEngine(AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
requestedInfoAnnotations = AnnotationInterfaceManager.createAllInfoFieldAnnotations();
requestedGenotypeAnnotations = AnnotationInterfaceManager.createAllGenotypeAnnotations();
initializeDBs();
}
// use this constructor if you want to select specific annotations (and/or interfaces)
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker) {
public VariantAnnotatorEngine(List<String> annotationGroupsToUse, List<String> annotationsToUse, AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit) {
this.walker = walker;
this.toolkit = toolkit;
initializeAnnotations(annotationGroupsToUse, annotationsToUse);
initializeDBs();
}
@ -112,11 +116,11 @@ public class VariantAnnotatorEngine {
public void invokeAnnotationInitializationMethods() {
for ( VariantAnnotatorAnnotation annotation : requestedInfoAnnotations ) {
annotation.initialize(walker);
annotation.initialize(walker, toolkit);
}
for ( VariantAnnotatorAnnotation annotation : requestedGenotypeAnnotations ) {
annotation.initialize(walker);
annotation.initialize(walker, toolkit);
}
}

View File

@ -1,7 +1,6 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.commandline.RodBinding;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.List;
@ -10,8 +9,8 @@ public interface AnnotatorCompatibleWalker {
// getter methods for various used bindings
public abstract RodBinding<VariantContext> getVariantRodBinding();
public abstract RodBinding<SnpEffFeature> getSnpEffRodBinding();
public abstract RodBinding<VariantContext> getSnpEffRodBinding();
public abstract RodBinding<VariantContext> getDbsnpRodBinding();
public abstract List<RodBinding<VariantContext>> getCompRodBindings();
public abstract List<RodBinding<VariantContext>> getResourceRodBindings();
}
}

View File

@ -24,6 +24,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator.interfaces;
import org.broadinstitute.sting.gatk.GenomeAnalysisEngine;
import org.broadinstitute.sting.utils.help.DocumentedGATKFeature;
import java.util.List;
@ -34,5 +35,5 @@ public abstract class VariantAnnotatorAnnotation {
public abstract List<String> getKeyNames();
// initialization method (optional for subclasses, and therefore non-abstract)
public void initialize ( AnnotatorCompatibleWalker walker ) { }
public void initialize ( AnnotatorCompatibleWalker walker, GenomeAnalysisEngine toolkit ) { }
}

View File

@ -38,7 +38,6 @@ import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.AnnotatorCompatibleWalker;
import org.broadinstitute.sting.utils.SampleUtils;
import org.broadinstitute.sting.utils.baq.BAQ;
import org.broadinstitute.sting.utils.codecs.snpEff.SnpEffFeature;
import org.broadinstitute.sting.utils.codecs.vcf.*;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
@ -128,7 +127,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
protected DbsnpArgumentCollection dbsnp = new DbsnpArgumentCollection();
public RodBinding<VariantContext> getDbsnpRodBinding() { return dbsnp.dbsnp; }
public RodBinding<VariantContext> getVariantRodBinding() { return null; }
public RodBinding<SnpEffFeature> getSnpEffRodBinding() { return null; }
public RodBinding<VariantContext> getSnpEffRodBinding() { return null; }
public List<RodBinding<VariantContext>> getCompRodBindings() { return Collections.emptyList(); }
public List<RodBinding<VariantContext>> getResourceRodBindings() { return Collections.emptyList(); }
@ -211,7 +210,7 @@ public class UnifiedGenotyper extends LocusWalker<VariantCallContext, UnifiedGen
if ( verboseWriter != null )
verboseWriter.println("AFINFO\tLOC\tREF\tALT\tMAF\tF\tAFprior\tAFposterior\tNormalizedPosterior");
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this);
annotationEngine = new VariantAnnotatorEngine(Arrays.asList(annotationClassesToUse), annotationsToUse, this, getToolkit());
UG_engine = new UnifiedGenotyperEngine(getToolkit(), UAC, logger, verboseWriter, annotationEngine, samples);
// initialize the header

View File

@ -2,6 +2,7 @@ package org.broadinstitute.sting.gatk.walkers.varianteval.stratifications;
import org.broadinstitute.sting.gatk.contexts.ReferenceContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff;
import org.broadinstitute.sting.utils.variantcontext.VariantContext;
import java.util.ArrayList;
@ -11,12 +12,19 @@ import java.util.List;
* Stratifies by nonsense, missense, silent, and all annotations in the input ROD, from the INFO field annotation.
*/
public class FunctionalClass extends VariantStratifier {
public enum FunctionalType {
silent,
missense,
nonsense
}
@Override
public void initialize() {
states.add("all");
states.add("silent");
states.add("missense");
states.add("nonsense");
for ( FunctionalType type : FunctionalType.values() )
states.add(type.name());
}
@ -26,10 +34,12 @@ public class FunctionalClass extends VariantStratifier {
relevantStates.add("all");
if (eval != null && eval.isVariant()) {
String type = null;
FunctionalType type = null;
if (eval.hasAttribute("refseq.functionalClass")) {
type = eval.getAttributeAsString("refseq.functionalClass");
try {
type = FunctionalType.valueOf(eval.getAttributeAsString("refseq.functionalClass"));
} catch ( Exception e ) {} // don't error out if the type isn't supported
} else if (eval.hasAttribute("refseq.functionalClass_1")) {
int annotationId = 1;
String key;
@ -37,24 +47,33 @@ public class FunctionalClass extends VariantStratifier {
do {
key = String.format("refseq.functionalClass_%d", annotationId);
String newtype = eval.getAttributeAsString(key);
if ( newtype != null && !newtype.equalsIgnoreCase("null") &&
( type == null ||
( type.equals("silent") && !newtype.equals("silent") ) ||
( type.equals("missense") && newtype.equals("nonsense") ) )
) {
type = newtype;
String newtypeStr = eval.getAttributeAsString(key);
if ( newtypeStr != null && !newtypeStr.equalsIgnoreCase("null") ) {
try {
FunctionalType newType = FunctionalType.valueOf(newtypeStr);
if ( type == null ||
( type == FunctionalType.silent && newType != FunctionalType.silent ) ||
( type == FunctionalType.missense && newType == FunctionalType.nonsense ) ) {
type = newType;
}
} catch ( Exception e ) {} // don't error out if the type isn't supported
}
annotationId++;
} while (eval.hasAttribute(key));
} else if ( eval.hasAttribute(SnpEff.InfoFieldKey.EFF.name() ) ) {
SnpEff.EffectType snpEffType = SnpEff.EffectType.valueOf(eval.getAttribute(SnpEff.InfoFieldKey.EFF.name()).toString());
if ( snpEffType == SnpEff.EffectType.STOP_GAINED )
type = FunctionalType.nonsense;
else if ( snpEffType == SnpEff.EffectType.NON_SYNONYMOUS_CODING )
type = FunctionalType.missense;
else if ( snpEffType == SnpEff.EffectType.SYNONYMOUS_CODING )
type = FunctionalType.silent;
}
if (type != null) {
if (type.equals("silent")) { relevantStates.add("silent"); }
else if (type.equals("missense")) { relevantStates.add("missense"); }
else if (type.equals("nonsense")) { relevantStates.add("nonsense"); }
if ( type != null ) {
relevantStates.add(type.name());
}
}

View File

@ -1,282 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.broad.tribble.Feature;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.broadinstitute.sting.gatk.refdata.SelfScopingFeatureCodec;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
/**
* Codec for decoding the output format of the SnpEff variant effect predictor tool
*
* <p>
* This format has 23 tab-delimited fields:
*
* <pre>
* Chromosome
* Position
* Reference
* Change
* Change Type: {SNP, MNP, INS, DEL}
* Zygosity: {Hom, Het}
* Quality
* Coverage
* Warnings
* Gene ID
* Gene Name
* Bio Type
* Transcript ID
* Exon ID
* Exon Rank
* Effect
* Old/New Amino Acid
* Old/New Codon
* Codon Num
* CDS Size
* Codons Around
* Amino Acids Around
* Custom Interval ID
* </pre>
* Note that we treat all except the Chromosome, Position, and Effect fields as optional.
* </p>
*
* <p>
* See also: @see <a href="http://snpeff.sourceforge.net/">SNPEff project page</a>
* </p>
*
* @author David Roazen
* @since 2011
*/
public class SnpEffCodec implements FeatureCodec, SelfScopingFeatureCodec {
public static final int EXPECTED_NUMBER_OF_FIELDS = 23;
public static final String FIELD_DELIMITER_PATTERN = "\\t";
public static final String EFFECT_FIELD_DELIMITER_PATTERN = "[,:]";
public static final String HEADER_LINE_START = "# ";
public static final String[] HEADER_FIELD_NAMES = { "Chromo",
"Position",
"Reference",
"Change",
"Change type",
"Homozygous",
"Quality",
"Coverage",
"Warnings",
"Gene_ID",
"Gene_name",
"Bio_type",
"Trancript_ID", // yes, this is how it's spelled in the SnpEff output
"Exon_ID",
"Exon_Rank",
"Effect",
"old_AA/new_AA",
"Old_codon/New_codon",
"Codon_Num(CDS)",
"CDS_size",
"Codons around",
"AAs around",
"Custom_interval_ID"
};
// The "Chromo", "Position", and "Effect" fields are required to be non-empty in every SnpEff output line:
public static final int[] REQUIRED_FIELDS = { 0, 1, 15 };
public static final String NON_CODING_GENE_FLAG = "WITHIN_NON_CODING_GENE";
public Feature decodeLoc ( String line ) {
return decode(line);
}
public Feature decode ( String line ) {
String[] tokens = line.split(FIELD_DELIMITER_PATTERN, -1);
if ( tokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
throw new TribbleException.InvalidDecodeLine("Line does not have the expected (" + EXPECTED_NUMBER_OF_FIELDS +
") number of fields: found " + tokens.length + " fields.", line);
}
try {
trimAllFields(tokens);
checkForRequiredFields(tokens, line);
String contig = tokens[0];
long position = Long.parseLong(tokens[1]);
String reference = tokens[2].isEmpty() ? null : tokens[2];
String change = tokens[3].isEmpty() ? null : tokens[3];
ChangeType changeType = tokens[4].isEmpty() ? null : ChangeType.valueOf(tokens[4]);
Zygosity zygosity = tokens[5].isEmpty() ? null : Zygosity.valueOf(tokens[5]);
Double quality = tokens[6].isEmpty() ? null : Double.parseDouble(tokens[6]);
Long coverage = tokens[7].isEmpty() ? null : Long.parseLong(tokens[7]);
String warnings = tokens[8].isEmpty() ? null : tokens[8];
String geneID = tokens[9].isEmpty() ? null : tokens[9];
String geneName = tokens[10].isEmpty() ? null : tokens[10];
String bioType = tokens[11].isEmpty() ? null : tokens[11];
String transcriptID = tokens[12].isEmpty() ? null : tokens[12];
String exonID = tokens[13].isEmpty() ? null : tokens[13];
Integer exonRank = tokens[14].isEmpty() ? null : Integer.parseInt(tokens[14]);
boolean isNonCodingGene = isNonCodingGene(tokens[15]);
// Split the effect field into three subfields if the WITHIN_NON_CODING_GENE flag is present,
// otherwise split it into two subfields. We need this limit to prevent the extra effect-related information
// in the final field (when present) from being inappropriately tokenized:
int effectFieldTokenLimit = isNonCodingGene ? 3 : 2;
String[] effectFieldTokens = tokens[15].split(EFFECT_FIELD_DELIMITER_PATTERN, effectFieldTokenLimit);
EffectType effect = parseEffect(effectFieldTokens, isNonCodingGene);
String effectExtraInformation = parseEffectExtraInformation(effectFieldTokens, isNonCodingGene);
String oldAndNewAA = tokens[16].isEmpty() ? null : tokens[16];
String oldAndNewCodon = tokens[17].isEmpty() ? null : tokens[17];
Integer codonNum = tokens[18].isEmpty() ? null : Integer.parseInt(tokens[18]);
Integer cdsSize = tokens[19].isEmpty() ? null : Integer.parseInt(tokens[19]);
String codonsAround = tokens[20].isEmpty() ? null : tokens[20];
String aasAround = tokens[21].isEmpty() ? null : tokens[21];
String customIntervalID = tokens[22].isEmpty() ? null : tokens[22];
return new SnpEffFeature(contig, position, reference, change, changeType, zygosity, quality, coverage,
warnings, geneID, geneName, bioType, transcriptID, exonID, exonRank, isNonCodingGene,
effect, effectExtraInformation, oldAndNewAA, oldAndNewCodon, codonNum, cdsSize,
codonsAround, aasAround, customIntervalID);
}
catch ( NumberFormatException e ) {
throw new TribbleException.InvalidDecodeLine("Error parsing a numeric field : " + e.getMessage(), line);
}
catch ( IllegalArgumentException e ) {
throw new TribbleException.InvalidDecodeLine("Illegal value in field: " + e.getMessage(), line);
}
}
private void trimAllFields ( String[] tokens ) {
for ( int i = 0; i < tokens.length; i++ ) {
tokens[i] = tokens[i].trim();
}
}
private void checkForRequiredFields ( String[] tokens, String line ) {
for ( int requiredFieldIndex : REQUIRED_FIELDS ) {
if ( tokens[requiredFieldIndex].isEmpty() ) {
throw new TribbleException.InvalidDecodeLine("Line is missing required field \"" +
HEADER_FIELD_NAMES[requiredFieldIndex] + "\"",
line);
}
}
}
private boolean isNonCodingGene ( String effectField ) {
return effectField.startsWith(NON_CODING_GENE_FLAG);
}
private EffectType parseEffect ( String[] effectFieldTokens, boolean isNonCodingGene ) {
String effectName = "";
// If there's a WITHIN_NON_CODING_GENE flag, the effect name will be in the second subfield,
// otherwise it will be in the first subfield:
if ( effectFieldTokens.length > 1 && isNonCodingGene ) {
effectName = effectFieldTokens[1].trim();
}
else {
effectName = effectFieldTokens[0].trim();
}
return EffectType.valueOf(effectName);
}
private String parseEffectExtraInformation ( String[] effectFieldTokens, boolean isNonCodingGene ) {
// The extra effect-related information, if present, will always be the last subfield:
if ( (effectFieldTokens.length == 2 && ! isNonCodingGene) || effectFieldTokens.length == 3 ) {
return effectFieldTokens[effectFieldTokens.length - 1].trim();
}
return null;
}
public Class<SnpEffFeature> getFeatureType() {
return SnpEffFeature.class;
}
public Object readHeader ( LineReader reader ) {
String headerLine = "";
try {
headerLine = reader.readLine();
}
catch ( IOException e ) {
throw new TribbleException("Unable to read header line from input file.");
}
validateHeaderLine(headerLine);
return headerLine;
}
private void validateHeaderLine ( String headerLine ) {
if ( headerLine == null || ! headerLine.startsWith(HEADER_LINE_START) ) {
throw new TribbleException.InvalidHeader("Header line does not start with " + HEADER_LINE_START);
}
String[] headerTokens = headerLine.substring(HEADER_LINE_START.length()).split(FIELD_DELIMITER_PATTERN);
if ( headerTokens.length != EXPECTED_NUMBER_OF_FIELDS ) {
throw new TribbleException.InvalidHeader("Header line does not contain headings for the expected number (" +
EXPECTED_NUMBER_OF_FIELDS + ") of columns.");
}
for ( int columnIndex = 0; columnIndex < headerTokens.length; columnIndex++ ) {
if ( ! HEADER_FIELD_NAMES[columnIndex].equals(headerTokens[columnIndex]) ) {
throw new TribbleException.InvalidHeader("Header field #" + columnIndex + ": Expected \"" +
HEADER_FIELD_NAMES[columnIndex] + "\" but found \"" +
headerTokens[columnIndex] + "\"");
}
}
}
public boolean canDecode ( final File potentialInput ) {
try {
LineReader reader = new AsciiLineReader(new FileInputStream(potentialInput));
readHeader(reader);
}
catch ( Exception e ) {
return false;
}
return true;
}
}

View File

@ -1,115 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
/**
* A set of constants associated with the SnpEff codec.
*
* @author David Roazen
*/
public class SnpEffConstants {
// Possible SnpEff biological effects and their associated impacts:
public enum EffectType {
START_GAINED (EffectImpact.HIGH),
START_LOST (EffectImpact.HIGH),
EXON_DELETED (EffectImpact.HIGH),
FRAME_SHIFT (EffectImpact.HIGH),
STOP_GAINED (EffectImpact.HIGH),
STOP_LOST (EffectImpact.HIGH),
SPLICE_SITE_ACCEPTOR (EffectImpact.HIGH),
SPLICE_SITE_DONOR (EffectImpact.HIGH),
NON_SYNONYMOUS_CODING (EffectImpact.MODERATE),
UTR_5_DELETED (EffectImpact.MODERATE),
UTR_3_DELETED (EffectImpact.MODERATE),
CODON_INSERTION (EffectImpact.MODERATE),
CODON_CHANGE_PLUS_CODON_INSERTION (EffectImpact.MODERATE),
CODON_DELETION (EffectImpact.MODERATE),
CODON_CHANGE_PLUS_CODON_DELETION (EffectImpact.MODERATE),
NONE (EffectImpact.LOW),
CHROMOSOME (EffectImpact.LOW),
INTERGENIC (EffectImpact.LOW),
UPSTREAM (EffectImpact.LOW),
UTR_5_PRIME (EffectImpact.LOW),
SYNONYMOUS_START (EffectImpact.LOW),
NON_SYNONYMOUS_START (EffectImpact.LOW),
CDS (EffectImpact.LOW),
GENE (EffectImpact.LOW),
TRANSCRIPT (EffectImpact.LOW),
EXON (EffectImpact.LOW),
SYNONYMOUS_CODING (EffectImpact.LOW),
CODON_CHANGE (EffectImpact.LOW),
SYNONYMOUS_STOP (EffectImpact.LOW),
NON_SYNONYMOUS_STOP (EffectImpact.LOW),
INTRON (EffectImpact.LOW),
UTR_3_PRIME (EffectImpact.LOW),
DOWNSTREAM (EffectImpact.LOW),
INTRON_CONSERVED (EffectImpact.LOW),
INTERGENIC_CONSERVED (EffectImpact.LOW),
CUSTOM (EffectImpact.LOW);
private final EffectImpact impact;
EffectType ( EffectImpact impact ) {
this.impact = impact;
}
public EffectImpact getImpact() {
return impact;
}
}
public enum EffectImpact {
LOW (1),
MODERATE (2),
HIGH (3);
private final int severityRating;
EffectImpact ( int severityRating ) {
this.severityRating = severityRating;
}
public boolean isHigherImpactThan ( EffectImpact other ) {
return this.severityRating > other.severityRating;
}
}
// The kinds of variants supported by the SnpEff output format:
public enum ChangeType {
SNP,
MNP,
INS,
DEL
}
// Possible zygosities of SnpEff variants:
public enum Zygosity {
Hom,
Het
}
}

View File

@ -1,423 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.broad.tribble.Feature;
import java.util.NoSuchElementException;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectImpact;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
/**
* Feature returned by the SnpEff codec -- stores the parsed field values from a line of SnpEff output.
*
* Many fields are optional, and missing values are represented by nulls. You should always call the
* hasX() method before calling the corresponding getX() method. Required fields can never be null
* and do not have a hasX() method.
*
* @author David Roazen
*/
public class SnpEffFeature implements Feature {
private String contig; // REQUIRED FIELD
private long position; // REQUIRED FIELD
private String reference;
private String change;
private ChangeType changeType;
private Zygosity zygosity;
private Double quality;
private Long coverage;
private String warnings;
private String geneID;
private String geneName;
private String bioType;
private String transcriptID;
private String exonID;
private Integer exonRank;
private boolean isNonCodingGene; // REQUIRED FIELD
private EffectType effect; // REQUIRED FIELD
private String effectExtraInformation;
private String oldAndNewAA;
private String oldAndNewCodon;
private Integer codonNum;
private Integer cdsSize;
private String codonsAround;
private String aasAround;
private String customIntervalID;
public SnpEffFeature ( String contig,
long position,
String reference,
String change,
ChangeType changeType,
Zygosity zygosity,
Double quality,
Long coverage,
String warnings,
String geneID,
String geneName,
String bioType,
String transcriptID,
String exonID,
Integer exonRank,
boolean isNonCodingGene,
EffectType effect,
String effectExtraInformation,
String oldAndNewAA,
String oldAndNewCodon,
Integer codonNum,
Integer cdsSize,
String codonsAround,
String aasAround,
String customIntervalID ) {
if ( contig == null || effect == null ) {
throw new IllegalArgumentException("contig and effect cannot be null, as they are required fields");
}
this.contig = contig;
this.position = position;
this.reference = reference;
this.change = change;
this.changeType = changeType;
this.zygosity = zygosity;
this.quality = quality;
this.coverage = coverage;
this.warnings = warnings;
this.geneID = geneID;
this.geneName = geneName;
this.bioType = bioType;
this.transcriptID = transcriptID;
this.exonID = exonID;
this.exonRank = exonRank;
this.isNonCodingGene = isNonCodingGene;
this.effect = effect;
this.effectExtraInformation = effectExtraInformation;
this.oldAndNewAA = oldAndNewAA;
this.oldAndNewCodon = oldAndNewCodon;
this.codonNum = codonNum;
this.cdsSize = cdsSize;
this.codonsAround = codonsAround;
this.aasAround = aasAround;
this.customIntervalID = customIntervalID;
}
public boolean isHigherImpactThan ( SnpEffFeature other ) {
// If one effect is in a non-coding gene and the other is not, the effect NOT in the
// non-coding gene has higher impact:
if ( ! isNonCodingGene() && other.isNonCodingGene() ) {
return true;
}
else if ( isNonCodingGene() && ! other.isNonCodingGene() ) {
return false;
}
// Otherwise, both effects are either in or not in a non-coding gene, so we compare the impacts
// of the effects themselves as defined in the SnpEffConstants class:
return getEffectImpact().isHigherImpactThan(other.getEffectImpact());
}
public String getChr() {
return contig;
}
public int getStart() {
return (int)position;
}
public int getEnd() {
return (int)position;
}
public boolean hasReference() {
return reference != null;
}
public String getReference() {
if ( reference == null ) throw new NoSuchElementException("This feature has no reference field");
return reference;
}
public boolean hasChange() {
return change != null;
}
public String getChange() {
if ( change == null ) throw new NoSuchElementException("This feature has no change field");
return change;
}
public boolean hasChangeType() {
return changeType != null;
}
public ChangeType getChangeType() {
if ( changeType == null ) throw new NoSuchElementException("This feature has no changeType field");
return changeType;
}
public boolean hasZygosity() {
return zygosity != null;
}
public Zygosity getZygosity() {
if ( zygosity == null ) throw new NoSuchElementException("This feature has no zygosity field");
return zygosity;
}
public boolean hasQuality() {
return quality != null;
}
public Double getQuality() {
if ( quality == null ) throw new NoSuchElementException("This feature has no quality field");
return quality;
}
public boolean hasCoverage() {
return coverage != null;
}
public Long getCoverage() {
if ( coverage == null ) throw new NoSuchElementException("This feature has no coverage field");
return coverage;
}
public boolean hasWarnings() {
return warnings != null;
}
public String getWarnings() {
if ( warnings == null ) throw new NoSuchElementException("This feature has no warnings field");
return warnings;
}
public boolean hasGeneID() {
return geneID != null;
}
public String getGeneID() {
if ( geneID == null ) throw new NoSuchElementException("This feature has no geneID field");
return geneID;
}
public boolean hasGeneName() {
return geneName != null;
}
public String getGeneName() {
if ( geneName == null ) throw new NoSuchElementException("This feature has no geneName field");
return geneName;
}
public boolean hasBioType() {
return bioType != null;
}
public String getBioType() {
if ( bioType == null ) throw new NoSuchElementException("This feature has no bioType field");
return bioType;
}
public boolean hasTranscriptID() {
return transcriptID != null;
}
public String getTranscriptID() {
if ( transcriptID == null ) throw new NoSuchElementException("This feature has no transcriptID field");
return transcriptID;
}
public boolean hasExonID() {
return exonID != null;
}
public String getExonID() {
if ( exonID == null ) throw new NoSuchElementException("This feature has no exonID field");
return exonID;
}
public boolean hasExonRank() {
return exonRank != null;
}
public Integer getExonRank() {
if ( exonRank == null ) throw new NoSuchElementException("This feature has no exonRank field");
return exonRank;
}
public boolean isNonCodingGene() {
return isNonCodingGene;
}
public EffectType getEffect() {
return effect;
}
public EffectImpact getEffectImpact() {
return effect.getImpact();
}
public boolean hasEffectExtraInformation() {
return effectExtraInformation != null;
}
public String getEffectExtraInformation() {
if ( effectExtraInformation == null ) throw new NoSuchElementException("This feature has no effectExtraInformation field");
return effectExtraInformation;
}
public boolean hasOldAndNewAA() {
return oldAndNewAA != null;
}
public String getOldAndNewAA() {
if ( oldAndNewAA == null ) throw new NoSuchElementException("This feature has no oldAndNewAA field");
return oldAndNewAA;
}
public boolean hasOldAndNewCodon() {
return oldAndNewCodon != null;
}
public String getOldAndNewCodon() {
if ( oldAndNewCodon == null ) throw new NoSuchElementException("This feature has no oldAndNewCodon field");
return oldAndNewCodon;
}
public boolean hasCodonNum() {
return codonNum != null;
}
public Integer getCodonNum() {
if ( codonNum == null ) throw new NoSuchElementException("This feature has no codonNum field");
return codonNum;
}
public boolean hasCdsSize() {
return cdsSize != null;
}
public Integer getCdsSize() {
if ( cdsSize == null ) throw new NoSuchElementException("This feature has no cdsSize field");
return cdsSize;
}
public boolean hasCodonsAround() {
return codonsAround != null;
}
public String getCodonsAround() {
if ( codonsAround == null ) throw new NoSuchElementException("This feature has no codonsAround field");
return codonsAround;
}
public boolean hadAasAround() {
return aasAround != null;
}
public String getAasAround() {
if ( aasAround == null ) throw new NoSuchElementException("This feature has no aasAround field");
return aasAround;
}
public boolean hasCustomIntervalID() {
return customIntervalID != null;
}
public String getCustomIntervalID() {
if ( customIntervalID == null ) throw new NoSuchElementException("This feature has no customIntervalID field");
return customIntervalID;
}
public boolean equals ( Object o ) {
if ( o == null || ! (o instanceof SnpEffFeature) ) {
return false;
}
SnpEffFeature other = (SnpEffFeature)o;
return contig.equals(other.contig) &&
position == other.position &&
(reference == null ? other.reference == null : reference.equals(other.reference)) &&
(change == null ? other.change == null : change.equals(other.change)) &&
changeType == other.changeType &&
zygosity == other.zygosity &&
(quality == null ? other.quality == null : quality.equals(other.quality)) &&
(coverage == null ? other.coverage == null : coverage.equals(other.coverage)) &&
(warnings == null ? other.warnings == null : warnings.equals(other.warnings)) &&
(geneID == null ? other.geneID == null : geneID.equals(other.geneID)) &&
(geneName == null ? other.geneName == null : geneName.equals(other.geneName)) &&
(bioType == null ? other.bioType == null : bioType.equals(other.bioType)) &&
(transcriptID == null ? other.transcriptID == null : transcriptID.equals(other.transcriptID)) &&
(exonID == null ? other.exonID == null : exonID.equals(other.exonID)) &&
(exonRank == null ? other.exonRank == null : exonRank.equals(other.exonRank)) &&
isNonCodingGene == other.isNonCodingGene &&
effect == other.effect &&
(effectExtraInformation == null ? other.effectExtraInformation == null : effectExtraInformation.equals(other.effectExtraInformation)) &&
(oldAndNewAA == null ? other.oldAndNewAA == null : oldAndNewAA.equals(other.oldAndNewAA)) &&
(oldAndNewCodon == null ? other.oldAndNewCodon == null : oldAndNewCodon.equals(other.oldAndNewCodon)) &&
(codonNum == null ? other.codonNum == null : codonNum.equals(other.codonNum)) &&
(cdsSize == null ? other.cdsSize == null : cdsSize.equals(other.cdsSize)) &&
(codonsAround == null ? other.codonsAround == null : codonsAround.equals(other.codonsAround)) &&
(aasAround == null ? other.aasAround == null : aasAround.equals(other.aasAround)) &&
(customIntervalID == null ? other.customIntervalID == null : customIntervalID.equals(other.customIntervalID));
}
public String toString() {
return "[Contig: " + contig +
" Position: " + position +
" Reference: " + reference +
" Change: " + change +
" Change Type: " + changeType +
" Zygosity: " + zygosity +
" Quality: " + quality +
" Coverage: " + coverage +
" Warnings: " + warnings +
" Gene ID: " + geneID +
" Gene Name: " + geneName +
" Bio Type: " + bioType +
" Transcript ID: " + transcriptID +
" Exon ID: " + exonID +
" Exon Rank: " + exonRank +
" Non-Coding Gene: " + isNonCodingGene +
" Effect: " + effect +
" Effect Extra Information: " + effectExtraInformation +
" Old/New AA: " + oldAndNewAA +
" Old/New Codon: " + oldAndNewCodon +
" Codon Num: " + codonNum +
" CDS Size: " + cdsSize +
" Codons Around: " + codonsAround +
" AAs Around: " + aasAround +
" Custom Interval ID: " + customIntervalID +
"]";
}
}

View File

@ -24,6 +24,7 @@ public class VCFHeader {
private final Set<VCFHeaderLine> mMetaData;
private final Map<String, VCFInfoHeaderLine> mInfoMetaData = new HashMap<String, VCFInfoHeaderLine>();
private final Map<String, VCFFormatHeaderLine> mFormatMetaData = new HashMap<String, VCFFormatHeaderLine>();
private final Map<String, VCFHeaderLine> mOtherMetaData = new HashMap<String, VCFHeaderLine>();
// the list of auxillary tags
private final Set<String> mGenotypeSampleNames = new LinkedHashSet<String>();
@ -110,6 +111,9 @@ public class VCFHeader {
VCFFormatHeaderLine formatLine = (VCFFormatHeaderLine)line;
mFormatMetaData.put(formatLine.getName(), formatLine);
}
else {
mOtherMetaData.put(line.getKey(), line);
}
}
}
@ -185,6 +189,14 @@ public class VCFHeader {
public VCFFormatHeaderLine getFormatHeaderLine(String key) {
return mFormatMetaData.get(key);
}
/**
* @param key the header key name
* @return the meta data line, or null if there is none
*/
public VCFHeaderLine getOtherHeaderLine(String key) {
return mOtherMetaData.get(key);
}
}

View File

@ -817,6 +817,28 @@ public class VariantContext implements Feature { // to enable tribble intergrati
throw new IllegalArgumentException("Requested " + i + " alternative allele but there are only " + n + " alternative alleles " + this);
}
/**
* @param other VariantContext whose alternate alleles to compare against
* @return true if this VariantContext has the same alternate alleles as other,
* regardless of ordering. Otherwise returns false.
*/
public boolean hasSameAlternateAllelesAs ( VariantContext other ) {
Set<Allele> thisAlternateAlleles = getAlternateAlleles();
Set<Allele> otherAlternateAlleles = other.getAlternateAlleles();
if ( thisAlternateAlleles.size() != otherAlternateAlleles.size() ) {
return false;
}
for ( Allele allele : thisAlternateAlleles ) {
if ( ! otherAlternateAlleles.contains(allele) ) {
return false;
}
}
return true;
}
// ---------------------------------------------------------------------------------------------------------
//
// Working with genotypes

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.testng.Assert;
import org.testng.annotations.Test;
import org.broadinstitute.sting.gatk.walkers.annotator.SnpEff.SnpEffEffect;
public class SnpEffUnitTest {
@Test
public void testParseWellFormedEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( effect.isWellFormed() && effect.isCoding() );
}
@Test
public void testParseInvalidEffectNameEffect() {
String effectName = "MADE_UP_EFFECT";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseInvalidEffectImpactEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MEDIUM", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseWrongNumberOfMetadataFieldsEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertFalse(effect.isWellFormed());
}
@Test
public void testParseSnpEffWarningEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "SNPEFF_WARNING" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following warning: SNPEFF_WARNING") );
}
@Test
public void testParseSnpEffErrorEffect() {
String effectName = "NON_SYNONYMOUS_CODING";
String[] effectMetadata = { "MODERATE", "Aca/Gca", "T/A", "OR4F5", "protein_coding", "CODING", "ENST00000534990", "exon_1_69037_69829", "", "SNPEFF_ERROR" };
SnpEffEffect effect = new SnpEffEffect(effectName, effectMetadata);
Assert.assertTrue( ! effect.isWellFormed() && effect.getParseError().equals("SnpEff issued the following error: SNPEFF_ERROR") );
}
}

View File

@ -1,6 +1,7 @@
package org.broadinstitute.sting.gatk.walkers.annotator;
import org.broadinstitute.sting.WalkerTest;
import org.broadinstitute.sting.utils.exceptions.UserException;
import org.testng.annotations.Test;
import java.util.Arrays;
@ -129,12 +130,24 @@ public class VariantAnnotatorIntegrationTest extends WalkerTest {
@Test
public void testSnpEffAnnotations() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + b37KGReference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1000G.exomes.vcf --snpEffFile " + validationDataLocation +
"snpEff_1.9.6_1000G.exomes.vcf_hg37.61.out -L 1:26,000,000-26,500,000",
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.vcf -L 1:1-1,500,000",
1,
Arrays.asList("03eae1dab19a9358250890594bf53607")
Arrays.asList("a1c3ba9efc28ee0606339604095076ea")
);
executeTest("Testing SnpEff annotations", spec);
}
@Test
public void testSnpEffAnnotationsUnsupportedVersion() {
WalkerTestSpec spec = new WalkerTestSpec(
"-T VariantAnnotator -R " + hg19Reference + " -NO_HEADER -o %s -A SnpEff --variant " +
validationDataLocation + "1kg_exomes_unfiltered.AFR.unfiltered.vcf --snpEffFile " + validationDataLocation +
"snpEff.AFR.unfiltered.unsupported.version.vcf -L 1:1-1,500,000",
1,
UserException.class
);
executeTest("Testing SnpEff annotations (unsupported version)", spec);
}
}

View File

@ -6,7 +6,7 @@ import org.testng.annotations.Test;
import java.util.Arrays;
public class VariantEvalIntegrationTest extends WalkerTest {
private static String variantEvalTestDataRoot = validationDataLocation + "/VariantEval";
private static String variantEvalTestDataRoot = validationDataLocation + "VariantEval";
private static String fundamentalTestVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.snps_and_indels.vcf";
private static String fundamentalTestSNPsVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.vcf";
private static String fundamentalTestSNPsOneSampleVCF = variantEvalTestDataRoot + "/" + "FundamentalsTest.annotated.db.subset.final.HG00625.vcf";
@ -14,6 +14,27 @@ public class VariantEvalIntegrationTest extends WalkerTest {
private static String cmdRoot = "-T VariantEval" +
" -R " + b36KGReference;
@Test
public void testFunctionClassWithSnpeff() {
WalkerTestSpec spec = new WalkerTestSpec(
buildCommandLine(
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + validationDataLocation + "snpEff.AFR.unfiltered.VariantAnnotator.output.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-noST",
"-ST FunctionalClass",
"-BTI eval",
"-o %s"
),
1,
Arrays.asList("f5f811ceb973d7fd6c1b2b734f1b2b12")
);
executeTest("testStratifySamplesAndExcludeMonomorphicSites", spec);
}
@Test
public void testStratifySamplesAndExcludeMonomorphicSites() {
WalkerTestSpec spec = new WalkerTestSpec(
@ -21,7 +42,7 @@ public class VariantEvalIntegrationTest extends WalkerTest {
"-T VariantEval",
"-R " + b37KGReference,
"--dbsnp " + b37dbSNP132,
"--eval " + variantEvalTestDataRoot + "/CEU.trio.callsForVE.vcf",
"--eval " + variantEvalTestDataRoot + "CEU.trio.callsForVE.vcf",
"-noEV",
"-EV TiTvVariantEvaluator",
"-ST Sample",

View File

@ -1,259 +0,0 @@
/*
* Copyright (c) 2011, The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.utils.codecs.snpEff;
import org.apache.commons.io.input.ReaderInputStream;
import org.broad.tribble.TribbleException;
import org.broad.tribble.readers.AsciiLineReader;
import org.broad.tribble.readers.LineReader;
import org.testng.Assert;
import org.testng.annotations.Test;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.EffectType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.ChangeType;
import static org.broadinstitute.sting.utils.codecs.snpEff.SnpEffConstants.Zygosity;
import java.io.StringReader;
public class SnpEffCodecUnitTest {
@Test
public void testParseWellFormedSnpEffHeaderLine() {
String wellFormedSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wellFormedSnpEffHeaderLine)));
String headerReturned = (String)codec.readHeader(reader);
Assert.assertEquals(headerReturned, wellFormedSnpEffHeaderLine);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseWrongNumberOfFieldsSnpEffHeaderLine() {
String wrongNumberOfFieldsSnpEffHeaderLine = "# Chromo\tPosition\tReference\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(wrongNumberOfFieldsSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test(expectedExceptions = TribbleException.InvalidHeader.class)
public void testParseMisnamedColumnSnpEffHeaderLine() {
String misnamedColumnSnpEffHeaderLine = "# Chromo\tPosition\tRef\tChange\tChange type\t" +
"Homozygous\tQuality\tCoverage\tWarnings\tGene_ID\tGene_name\tBio_type\tTrancript_ID\tExon_ID\t" +
"Exon_Rank\tEffect\told_AA/new_AA\tOld_codon/New_codon\tCodon_Num(CDS)\tCDS_size\tCodons around\t" +
"AAs around\tCustom_interval_ID";
SnpEffCodec codec = new SnpEffCodec();
LineReader reader = new AsciiLineReader(new ReaderInputStream(new StringReader(misnamedColumnSnpEffHeaderLine)));
codec.readHeader(reader);
}
@Test
public void testParseSimpleEffectSnpEffLine() {
String simpleEffectSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
69428l,
"T",
"G",
ChangeType.SNP,
Zygosity.Hom,
6049.69,
61573l,
null,
"ENSG00000177693",
"OR4F5",
"mRNA",
"ENST00000326183",
"exon_1_69055_70108",
1,
false,
EffectType.NON_SYNONYMOUS_CODING,
null,
"F/C",
"TTT/TGT",
113,
918,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(simpleEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseNonCodingRegionSnpEffLine() {
String nonCodingRegionSnpEffLine = "1\t1337592\tG\tC\tSNP\tHom\t1935.52\t21885\t\tENSG00000250188\t" +
"RP4-758J18.5\tmRNA\tENST00000514958\texon_1_1337454_1338076\t2\tWITHIN_NON_CODING_GENE, NON_SYNONYMOUS_CODING\t" +
"L/V\tCTA/GTA\t272\t952\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
1337592l,
"G",
"C",
ChangeType.SNP,
Zygosity.Hom,
1935.52,
21885l,
null,
"ENSG00000250188",
"RP4-758J18.5",
"mRNA",
"ENST00000514958",
"exon_1_1337454_1338076",
2,
true,
EffectType.NON_SYNONYMOUS_CODING,
null,
"L/V",
"CTA/GTA",
272,
952,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(nonCodingRegionSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseExtraEffectInformationSnpEffLine() {
String extraEffectInformationSnpEffLine = "1\t879537\tT\tC\tSNP\tHom\t341.58\t13733\t\tENSG00000187634\tSAMD11\t" +
"mRNA\tENST00000341065\t\t\tUTR_3_PRIME: 4 bases from transcript end\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
879537l,
"T",
"C",
ChangeType.SNP,
Zygosity.Hom,
341.58,
13733l,
null,
"ENSG00000187634",
"SAMD11",
"mRNA",
"ENST00000341065",
null,
null,
false,
EffectType.UTR_3_PRIME,
"4 bases from transcript end",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(extraEffectInformationSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test
public void testParseMultiEffectSnpEffLine() {
String multiEffectSnpEffLine = "1\t901901\tC\tT\tSNP\tHom\t162.91\t4646\t\tENSG00000187583\tPLEKHN1\tmRNA\t" +
"ENST00000379410\texon_1_901877_901994\t1\tSTART_GAINED: ATG, UTR_5_PRIME: 11 bases from TSS\t\t\t\t\t\t\t";
SnpEffFeature expectedFeature = new SnpEffFeature("1",
901901l,
"C",
"T",
ChangeType.SNP,
Zygosity.Hom,
162.91,
4646l,
null,
"ENSG00000187583",
"PLEKHN1",
"mRNA",
"ENST00000379410",
"exon_1_901877_901994",
1,
false,
EffectType.START_GAINED,
"ATG, UTR_5_PRIME: 11 bases from TSS",
null,
null,
null,
null,
null,
null,
null
);
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(multiEffectSnpEffLine);
Assert.assertEquals(feature, expectedFeature);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseWrongNumberOfFieldsSnpEffLine() {
String wrongNumberOfFieldsSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\t918\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(wrongNumberOfFieldsSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseBlankEffectFieldSnpEffLine() {
String blankEffectFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\t\tF/C\tTTT/TGT\t113\t918\t\t\t";
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(blankEffectFieldSnpEffLine);
}
@Test(expectedExceptions = TribbleException.InvalidDecodeLine.class)
public void testParseInvalidNumericFieldSnpEffLine() {
String invalidNumericFieldSnpEffLine = "1\t69428\tT\tG\tSNP\tHom\t6049.69\t61573\t\tENSG00000177693\t" +
"OR4F5\tmRNA\tENST00000326183\texon_1_69055_70108\t1\tNON_SYNONYMOUS_CODING\tF/C\tTTT/TGT\t113\tfoo\t\t\t";;
SnpEffCodec codec = new SnpEffCodec();
SnpEffFeature feature = (SnpEffFeature)codec.decode(invalidNumericFieldSnpEffLine);
}
}