Inconsequential changes, more 'variant classification' values are recognized
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@5236 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
d3660aa00e
commit
7f7d7eb2d1
|
|
@ -337,7 +337,45 @@ public class VariantContextAdaptors {
|
||||||
addGenotype(genotypes,tumorSample,maf.getObservedTumorAlleleList(),maf.getRefBases());
|
addGenotype(genotypes,tumorSample,maf.getObservedTumorAlleleList(),maf.getRefBases());
|
||||||
|
|
||||||
|
|
||||||
HashMap<String, Object> attrs = new HashMap<String, Object>(1);
|
HashMap<String, Object> attrs = new HashMap<String, Object>(10);
|
||||||
|
// fill attributes:
|
||||||
|
if ( maf.getHugoGeneSymbol() != null && ! maf.getHugoGeneSymbol().equals("Unknown"))
|
||||||
|
attrs.put("Gene",maf.getHugoGeneSymbol());
|
||||||
|
|
||||||
|
if ( maf.isSomatic() ) {
|
||||||
|
attrs.put(VCFConstants.SOMATIC_KEY,true);
|
||||||
|
attrs.put("SS","Somatic");
|
||||||
|
} else {
|
||||||
|
attrs.put("SS","Germline");
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( maf.getVariantClassification() != null ) {
|
||||||
|
switch(maf.getVariantClassification()) {
|
||||||
|
case Intergenic: attrs.put("VC","Genomic"); break;
|
||||||
|
case Intron: attrs.put("VC","Intron"); break;
|
||||||
|
case Noncoding_transcript: attrs.put("VC","Noncoding_transcript"); break;
|
||||||
|
case UTR3: attrs.put("VC","3'UTR"); break;
|
||||||
|
case UTR5: attrs.put("VC","5'UTR"); break;
|
||||||
|
case Flank5: attrs.put("VC","5'flank"); break;
|
||||||
|
case Promoter: attrs.put("VC","5'flank"); break;
|
||||||
|
case De_novo_start: attrs.put("VC","De_novo_start"); break;
|
||||||
|
case Silent: attrs.put("VC","Silent"); break;
|
||||||
|
case Missense: attrs.put("VC","Missense"); break;
|
||||||
|
case Nonsense: attrs.put("VC","Nonsense"); break;
|
||||||
|
case Splice: attrs.put("VC","Splice_site"); break;
|
||||||
|
case miRNA: attrs.put("VC","miRNA"); break;
|
||||||
|
case Frameshift: attrs.put("VC","Frameshift"); break;
|
||||||
|
case Inframe: attrs.put("VC","Inframe"); break;
|
||||||
|
case Stop_deletion: attrs.put("VC","Stop_codon_deletion");
|
||||||
|
case Splice_site_deletion: attrs.put("VC","Splice_site_deletion");
|
||||||
|
case Splice_site_insertion: attrs.put("VC","Splice_site_insertion");
|
||||||
|
case Unclassified: attrs.put("VC","Unclassified");
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
attrs.put("VT",maf.getType());
|
||||||
|
|
||||||
// attrs.put(VariantContext.ID_KEY, hapmap.getName());
|
// attrs.put(VariantContext.ID_KEY, hapmap.getName());
|
||||||
int end = maf.getEnd();
|
int end = maf.getEnd();
|
||||||
VariantContext vc = new VariantContext(name, maf.getChr(), maf.getStart(), end, alleles,
|
VariantContext vc = new VariantContext(name, maf.getChr(), maf.getStart(), end, alleles,
|
||||||
|
|
|
||||||
|
|
@ -27,13 +27,18 @@ package org.broadinstitute.sting.playground.gatk.features.maf;
|
||||||
|
|
||||||
import org.broad.tribble.FeatureCodec;
|
import org.broad.tribble.FeatureCodec;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
import org.apache.log4j.Logger;
|
import org.apache.log4j.Logger;
|
||||||
import org.broadinstitute.sting.utils.exceptions.UserException;
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.lang.reflect.Field;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Created by IntelliJ IDEA.
|
* Created by IntelliJ IDEA.
|
||||||
|
|
@ -47,31 +52,21 @@ public class MafCodec implements FeatureCodec {
|
||||||
|
|
||||||
private int expectedTokenCount = -1;
|
private int expectedTokenCount = -1;
|
||||||
|
|
||||||
private int BUILD_COL;
|
|
||||||
private int CHR_COL;
|
|
||||||
private int START_COL;
|
|
||||||
private int END_COL;
|
|
||||||
private int REF_ALLELE_COL;
|
|
||||||
private int TUMOR_ALLELE1_COL;
|
|
||||||
private int TUMOR_ALLELE2_COL;
|
|
||||||
private int TUMOR_SAMPLE_COL;
|
|
||||||
private int NORMAL_SAMPLE_COL;
|
|
||||||
// optional fields (absent from maf lite):
|
|
||||||
private int VARTYPE_COL = -1;
|
|
||||||
private int STRAND_COL = -1;
|
|
||||||
|
|
||||||
private static String BUILD_COLNAME="NCBI_Build";
|
private Column BUILD_COL = new Column("NCBI_Build",true);
|
||||||
private static String CHR_COLNAME="Chromosome";
|
private Column CHR_COL = new Column("Chromosome",true);
|
||||||
private static String START_COLNAME="Start_position";
|
private Column START_COL = new Column("Start_position",true);
|
||||||
private static String END_COLNAME="End_position";
|
private Column END_COL = new Column("End_position",true);
|
||||||
private static String REF_ALLELE_COLNAME="Reference_Allele";
|
private Column REF_ALLELE_COL = new Column("Reference_Allele",true);
|
||||||
private static String TUMOR_ALLELE1_COLNAME="Tumor_Seq_Allele1";
|
private Column TUMOR_ALLELE1_COL = new Column("Tumor_Seq_Allele1",true);
|
||||||
private static String TUMOR_ALLELE2_COLNAME="Tumor_Seq_Allele2";
|
private Column TUMOR_ALLELE2_COL = new Column("Tumor_Seq_Allele2",true);
|
||||||
private static String TUMOR_SAMPLE_COLNAME="Tumor_Sample_Barcode";
|
private Column TUMOR_SAMPLE_COL = new Column("Tumor_Sample_Barcode",true);
|
||||||
private static String NORMAL_SAMPLE_COLNAME="Matched_Norm_Sample_Barcode";
|
private Column NORMAL_SAMPLE_COL = new Column("Matched_Norm_Sample_Barcode",true);
|
||||||
// optional fields (absent from maf lite):
|
// optional fields (absent from maf lite):
|
||||||
private static String VARTYPE_COLNAME="Variant_Type";
|
private Column VARTYPE_COL = new Column("Variant_Type",false);
|
||||||
private static String STRAND_COLNAME="Strand";
|
private Column STRAND_COL = new Column("Strand",false);
|
||||||
|
private Column HUGO_GENE_COL = new Column("Hugo_Symbol",false);
|
||||||
|
private Column VARCLASS_COL = new Column("Variant_Classification",false);
|
||||||
|
|
||||||
|
|
||||||
public enum MAF_TYPE {
|
public enum MAF_TYPE {
|
||||||
|
|
@ -85,18 +80,55 @@ public class MafCodec implements FeatureCodec {
|
||||||
|
|
||||||
private MAF_TYPE mafType=MAF_TYPE.UNKNOWN;
|
private MAF_TYPE mafType=MAF_TYPE.UNKNOWN;
|
||||||
|
|
||||||
|
private List<Column> allColumns = null; /// filled dynamically by constructor through introspection. Slow but less typing.
|
||||||
|
|
||||||
|
private boolean tooManyColsWarned = false;
|
||||||
|
private boolean tooFewColsWarned = false;
|
||||||
|
|
||||||
|
public MafCodec() {
|
||||||
|
allColumns = new ArrayList<Column>(30);
|
||||||
|
Field[] fields = this.getClass().getDeclaredFields();
|
||||||
|
try {
|
||||||
|
for ( Field f : fields ) {
|
||||||
|
if ( f.get(this) instanceof Column ) {
|
||||||
|
allColumns.add((Column)f.get(this));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IllegalAccessException e) {
|
||||||
|
throw new StingException("Error in MAFCodec when trying to introspect itself, this is probably a BUG",e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decode a line to obtain just its FeatureLoc for indexing -- contig, start, and stop.
|
* Decode a line to obtain just its FeatureLoc for indexing -- contig, start, and stop.
|
||||||
*
|
* This method will NOT fill in the additional information available in the maf file
|
||||||
* @param line the input line to decode
|
* @param line the input line to decode
|
||||||
* @return Return the FeatureLoc encoded by the line, or null if the line does not represent a feature (e.g. is
|
* @return Return the FeatureLoc encoded by the line, or null if the line does not represent a feature (e.g. is
|
||||||
* a comment)
|
* a comment)
|
||||||
*/
|
*/
|
||||||
public Feature decodeLoc(String line) {
|
public Feature decodeLoc(String line) {
|
||||||
return decode(line);
|
return reallyDecode(line,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Fully decode a line, will try extracting as much additional/annotation information from the maf file as it can.
|
||||||
|
* @param line the input line to decode
|
||||||
|
* @return Return the FeatureLoc encoded by the line, or null if the line does not represent a feature (e.g. is
|
||||||
|
* a comment)
|
||||||
|
*/
|
||||||
public Feature decode(String line) {
|
public Feature decode(String line) {
|
||||||
|
return reallyDecode(line,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Decodes a maf line. If <code>extra</code> is false, will decode only location and return;
|
||||||
|
* if <code>extra</code> is true, then extracts everything it can (samples, annotations, etc)
|
||||||
|
* @param line
|
||||||
|
* @param extra
|
||||||
|
* @return
|
||||||
|
*/
|
||||||
|
public Feature reallyDecode(String line, boolean extra) {
|
||||||
|
|
||||||
// ignore commented-out lines
|
// ignore commented-out lines
|
||||||
if (line.startsWith("#")) return null;
|
if (line.startsWith("#")) return null;
|
||||||
|
|
@ -137,24 +169,39 @@ public class MafCodec implements FeatureCodec {
|
||||||
|
|
||||||
|
|
||||||
if (tokens.length < expectedTokenCount) {
|
if (tokens.length < expectedTokenCount) {
|
||||||
log.error("MAF line contains too few columns ("+tokens.length+")");
|
if ( ! tooFewColsWarned ) {
|
||||||
return null;
|
log.error("MAF line contains too few columns ("+tokens.length+"); this error is reported only once.");
|
||||||
|
tooFewColsWarned = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (tokens.length > expectedTokenCount) {
|
if (tokens.length > expectedTokenCount) {
|
||||||
log.warn("MAF line contains more columns than expected ("+tokens.length+"); extra columns discarded");
|
if ( ! tooManyColsWarned ) {
|
||||||
|
log.warn("MAF line contains more columns than expected ("+tokens.length+"); extra columns discarded. This error is shown only once.");
|
||||||
|
tooManyColsWarned = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if ( tokens[CHR_COL].equals("Chromosome") ) return null; // if someone uses this codec manually and feeds it the header line multiple times...
|
if ( tokens[CHR_COL.getIndex()].equals("Chromosome") ) return null; // if someone uses this codec manually and feeds it the header line multiple times...
|
||||||
// create a new feature from the line:
|
// create a new feature from the line:
|
||||||
|
|
||||||
int start = Integer.valueOf(tokens[START_COL]);
|
int start = 0;
|
||||||
int stop = Integer.valueOf(tokens[END_COL]);
|
try {
|
||||||
|
start = Integer.parseInt(START_COL.getValue(tokens));
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
throw new UserException.MalformedFile("Missing or non-numeric start position in line:\n"+line,e);
|
||||||
|
}
|
||||||
|
int stop = 0 ;
|
||||||
|
try {
|
||||||
|
stop = Integer.parseInt(END_COL.getValue(tokens));
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
throw new UserException.MalformedFile("Missing or non-numeric stop position in line:\n"+line,e);
|
||||||
|
}
|
||||||
|
|
||||||
String eventType="UNKNOWN";
|
String eventType="UNKNOWN";
|
||||||
|
|
||||||
String ref = tokens[REF_ALLELE_COL];
|
String ref = REF_ALLELE_COL.getValue(tokens);
|
||||||
String alt1 = tokens[TUMOR_ALLELE1_COL];
|
String alt1 = TUMOR_ALLELE1_COL.getValue(tokens);
|
||||||
String alt2 = tokens[TUMOR_ALLELE2_COL];
|
String alt2 = TUMOR_ALLELE2_COL.getValue(tokens);
|
||||||
|
|
||||||
if ( ref.equals("-") ) {
|
if ( ref.equals("-") ) {
|
||||||
// insertion
|
// insertion
|
||||||
|
|
@ -208,16 +255,29 @@ public class MafCodec implements FeatureCodec {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// if we got vartype column, make sure it makes sense:
|
// if we got vartype column, make sure it makes sense:
|
||||||
if ( VARTYPE_COL != -1 && ! tokens[VARTYPE_COL].equals(eventType) )
|
if ( VARTYPE_COL.isSet(tokens) && ! tokens[VARTYPE_COL.getIndex()].equals(eventType) ) {
|
||||||
|
// special case: we annotate everything as MNP while MAF can have DNP/TNP, these are fine:
|
||||||
|
if ( eventType == MNP && (
|
||||||
|
tokens[VARTYPE_COL.getIndex()].equals("DNP") && ref.length() == 2 ||
|
||||||
|
tokens[VARTYPE_COL.getIndex()].equals("TNP") && ref.length() == 3)
|
||||||
|
) {} // these are fine
|
||||||
|
else {
|
||||||
throw new UserException.MalformedFile("Inconsistency in MAF: variant looks like a "+eventType +" but annotated as "+
|
throw new UserException.MalformedFile("Inconsistency in MAF: variant looks like a "+eventType +" but annotated as "+
|
||||||
tokens[VARTYPE_COL]);
|
tokens[VARTYPE_COL.getIndex()]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
MafFeature feature = new MafFeature(CHR_COL.getValue(tokens),start,stop);
|
||||||
|
|
||||||
|
if ( ! extra ) return feature; // ignore additional fields unless we were explicitly asked to read those!
|
||||||
|
|
||||||
MafFeature feature = new MafFeature(tokens[CHR_COL],start,stop);
|
|
||||||
feature.setVariantType(eventType);
|
feature.setVariantType(eventType);
|
||||||
feature.setRefAllele(ref);
|
feature.setRefAllele(ref);
|
||||||
feature.setObservedTumor(alt1,alt2);
|
feature.setObservedTumor(alt1,alt2);
|
||||||
feature.setTumorSample(tokens[TUMOR_SAMPLE_COL]);
|
feature.setTumorSample(TUMOR_SAMPLE_COL.getValue(tokens));
|
||||||
feature.setNormalSample(tokens[NORMAL_SAMPLE_COL]);
|
feature.setNormalSample(NORMAL_SAMPLE_COL.getValue(tokens));
|
||||||
|
|
||||||
|
if ( HUGO_GENE_COL.isSet(tokens) ) feature.setHugoGeneSymbol(tokens[HUGO_GENE_COL.getIndex()]);
|
||||||
|
if ( VARCLASS_COL.isSet(tokens) ) feature.setVariantClassification(tokens[VARCLASS_COL.getIndex()]);
|
||||||
|
|
||||||
return feature;
|
return feature;
|
||||||
}
|
}
|
||||||
|
|
@ -239,67 +299,95 @@ public class MafCodec implements FeatureCodec {
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
private void setMafLiteCols() {
|
private void setMafLiteCols() {
|
||||||
BUILD_COL = 0;
|
BUILD_COL.setIndex(0);
|
||||||
CHR_COL = 1;
|
CHR_COL.setIndex(1);
|
||||||
START_COL = 2;
|
START_COL.setIndex(2);
|
||||||
END_COL = 3;
|
END_COL.setIndex(3);
|
||||||
REF_ALLELE_COL = 4;
|
REF_ALLELE_COL.setIndex(4);
|
||||||
TUMOR_ALLELE1_COL = 5;
|
TUMOR_ALLELE1_COL.setIndex(5);
|
||||||
TUMOR_ALLELE2_COL = 6;
|
TUMOR_ALLELE2_COL.setIndex(6);
|
||||||
TUMOR_SAMPLE_COL = 7;
|
TUMOR_SAMPLE_COL.setIndex(7);
|
||||||
NORMAL_SAMPLE_COL = 8;
|
NORMAL_SAMPLE_COL.setIndex(8);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setMafAnnotatedCols() {
|
private void setMafAnnotatedCols() {
|
||||||
BUILD_COL = 3;
|
BUILD_COL.setIndex(3);
|
||||||
CHR_COL = 4;
|
CHR_COL.setIndex(4);
|
||||||
START_COL = 5;
|
START_COL.setIndex(5);
|
||||||
END_COL = 6;
|
END_COL.setIndex(6);
|
||||||
REF_ALLELE_COL = 10;
|
REF_ALLELE_COL.setIndex(10);
|
||||||
TUMOR_ALLELE1_COL = 11;
|
TUMOR_ALLELE1_COL.setIndex(11);
|
||||||
TUMOR_ALLELE2_COL = 12;
|
TUMOR_ALLELE2_COL.setIndex(12);
|
||||||
TUMOR_SAMPLE_COL = 15;
|
TUMOR_SAMPLE_COL.setIndex(15);
|
||||||
NORMAL_SAMPLE_COL = 16;
|
NORMAL_SAMPLE_COL.setIndex(16);
|
||||||
VARTYPE_COL = 9;
|
VARTYPE_COL.setIndex(9);
|
||||||
STRAND_COL = 7;
|
STRAND_COL.setIndex(7);
|
||||||
|
VARCLASS_COL.setIndex(8);
|
||||||
|
HUGO_GENE_COL.setIndex(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void setColumnsFromHeader(String[] tokens) {
|
private void setColumnsFromHeader(String[] tokens) {
|
||||||
Map<String,Integer> colNames = new HashMap<String,Integer>();
|
Map<String,Integer> colNames = new HashMap<String,Integer>();
|
||||||
for ( int i = 0 ; i < tokens.length ; i++ ) colNames.put(tokens[i],i);
|
for ( int i = 0 ; i < tokens.length ; i++ ) colNames.put(tokens[i],i);
|
||||||
|
|
||||||
if ( colNames.containsKey(BUILD_COLNAME) ) BUILD_COL = colNames.get(BUILD_COLNAME);
|
for ( Column c : allColumns ) c.setFromMap(colNames);
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+BUILD_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(CHR_COLNAME) ) CHR_COL = colNames.get(CHR_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+CHR_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(START_COLNAME) ) START_COL = colNames.get(START_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+START_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(END_COLNAME) ) END_COL = colNames.get(END_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+END_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(REF_ALLELE_COLNAME) ) REF_ALLELE_COL = colNames.get(REF_ALLELE_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+REF_ALLELE_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(TUMOR_ALLELE1_COLNAME) ) TUMOR_ALLELE1_COL = colNames.get(TUMOR_ALLELE1_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+TUMOR_ALLELE1_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(TUMOR_ALLELE2_COLNAME) ) TUMOR_ALLELE2_COL = colNames.get(TUMOR_ALLELE2_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+TUMOR_ALLELE2_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(TUMOR_SAMPLE_COLNAME) ) TUMOR_SAMPLE_COL = colNames.get(TUMOR_SAMPLE_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+TUMOR_SAMPLE_COLNAME+" column");
|
|
||||||
|
|
||||||
if ( colNames.containsKey(NORMAL_SAMPLE_COLNAME) ) NORMAL_SAMPLE_COL = colNames.get(NORMAL_SAMPLE_COLNAME);
|
|
||||||
else throw new UserException.MalformedFile("Maf file does not have "+NORMAL_SAMPLE_COLNAME+" column");
|
|
||||||
|
|
||||||
// we do not require variant type column but we use it if it's present (for validation):
|
|
||||||
if ( colNames.containsKey(VARTYPE_COLNAME) ) VARTYPE_COL = colNames.get(VARTYPE_COLNAME);
|
|
||||||
|
|
||||||
// we do not require strand column but we use it if it's present (for validation):
|
|
||||||
if ( colNames.containsKey(STRAND_COLNAME) ) STRAND_COL = colNames.get(STRAND_COLNAME);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class Column {
|
||||||
|
int index ;
|
||||||
|
String name;
|
||||||
|
boolean required;
|
||||||
|
|
||||||
|
Column(String name, boolean required) {
|
||||||
|
this.name = name;
|
||||||
|
this.required = required;
|
||||||
|
this.index = -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getName() { return name; }
|
||||||
|
public void setName(String name) { this.name = name; }
|
||||||
|
public int getIndex() { return index; }
|
||||||
|
public void setIndex(int index) { this.index = index; }
|
||||||
|
public String getValue(String[] fields) {
|
||||||
|
if ( index < fields.length ) return fields[index];
|
||||||
|
|
||||||
|
if ( required ) throw new UserException.MalformedFile("In MAF file: required column "+name+" has index "+index+
|
||||||
|
", but only "+fields.length+ " fields are present in maf line");
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets this column's index from the provided name->index map (i.e. searches for itself in the map).
|
||||||
|
* If column not found, <code>throw_exception</code> is true <i>AND</i> this column is required, then an exception will
|
||||||
|
* be thrown right away; otherwise returns quietely even if map does not contain this column.
|
||||||
|
* @param m
|
||||||
|
* @param throw_exception
|
||||||
|
*/
|
||||||
|
public void setFromMap(Map<String,Integer> m, boolean throw_exception) {
|
||||||
|
Integer i = m.get(this.name);
|
||||||
|
if ( i == null ) {
|
||||||
|
if ( this.required && throw_exception ) throw new UserException.MalformedFile("Required column "+this.name+" is missing from the maf file");
|
||||||
|
index = -1;
|
||||||
|
return; // not found
|
||||||
|
}
|
||||||
|
this.index = i.intValue(); // found and set.
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Sets this column's index from the provided name->index map (i.e. searches for itself in the map).
|
||||||
|
* If this column is required but not found in the map, then an exception will
|
||||||
|
* be thrown.
|
||||||
|
* @param m
|
||||||
|
*/
|
||||||
|
public void setFromMap(Map<String,Integer> m) {
|
||||||
|
setFromMap(m,true);
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isSet() { return index > -1; }
|
||||||
|
|
||||||
|
public boolean isSet(String[] fields) { return index > -1 && index < fields.length; }
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ package org.broadinstitute.sting.playground.gatk.features.maf;
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broadinstitute.sting.utils.exceptions.StingException;
|
import org.broadinstitute.sting.utils.exceptions.StingException;
|
||||||
|
import org.broadinstitute.sting.utils.exceptions.UserException;
|
||||||
|
|
||||||
import java.util.*;
|
import java.util.*;
|
||||||
|
|
||||||
|
|
@ -47,11 +48,18 @@ public class MafFeature implements Feature {
|
||||||
private String[] observedNormAlleles = null; // The sequences of the observed alleles in normal
|
private String[] observedNormAlleles = null; // The sequences of the observed alleles in normal
|
||||||
private String tumorSampleId = null;
|
private String tumorSampleId = null;
|
||||||
private String normalSampleId = null;
|
private String normalSampleId = null;
|
||||||
|
private String hugoSymbol = null;
|
||||||
|
private Classification classification = null;
|
||||||
|
|
||||||
public enum Type {
|
public enum Type {
|
||||||
UNKNOWN,SNP,MNP,INS,DEL
|
UNKNOWN,SNP,MNP,INS,DEL
|
||||||
};
|
};
|
||||||
|
|
||||||
|
public enum Classification {
|
||||||
|
Unclassified, Intergenic,Intron,Noncoding_transcript,UTR3,UTR5,Flank5,Silent,Missense, Nonsense, Splice, miRNA,
|
||||||
|
Frameshift, Inframe, Stop_deletion, Promoter,De_novo_start,Splice_site_deletion,Splice_site_insertion
|
||||||
|
}
|
||||||
|
|
||||||
private Type type = Type.UNKNOWN;
|
private Type type = Type.UNKNOWN;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -99,6 +107,14 @@ public class MafFeature implements Feature {
|
||||||
return refAllele;
|
return refAllele;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getHugoGeneSymbol() {
|
||||||
|
return hugoSymbol;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String setHugoGeneSymbol(String genename) {
|
||||||
|
return hugoSymbol = genename;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns list of alleles (represented as strings) observed in Tumor. Returned alleles
|
* Returns list of alleles (represented as strings) observed in Tumor. Returned alleles
|
||||||
* could be redundant (e.g. if we have homozygous non-ref at ploidy 2+).
|
* could be redundant (e.g. if we have homozygous non-ref at ploidy 2+).
|
||||||
|
|
@ -187,6 +203,43 @@ public class MafFeature implements Feature {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean isSomatic() {
|
||||||
|
if ( observedTumAlleles[0].equals(refAllele) && observedTumAlleles[1].equals(refAllele) ) return false; // tumor is ref
|
||||||
|
// we get here only if tumor is non-ref
|
||||||
|
if ( observedNormAlleles == null ) return true; // norm alleles are omitted from maf only if they are all ref
|
||||||
|
if ( observedNormAlleles[0].equals(refAllele) && observedNormAlleles[1].equals(refAllele) ) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVariantClassification(String s) {
|
||||||
|
if ( s.equals("IGR") ) { classification = Classification.Intergenic ; return; }
|
||||||
|
if ( s.equals("Intron") ) { classification = Classification.Intron ; return; }
|
||||||
|
if ( s.equals("3'UTR") ) { classification = Classification.UTR3 ; return; }
|
||||||
|
if ( s.equals("5'UTR") ) { classification = Classification.UTR5 ; return; }
|
||||||
|
if ( s.equals("5'-Flank") ) { classification = Classification.Flank5 ; return; }
|
||||||
|
if ( s.equals("Silent") ) { classification = Classification.Silent ; return; }
|
||||||
|
if ( s.equals("Non-coding_Transcript")) { classification = Classification.Noncoding_transcript; return; }
|
||||||
|
if ( s.equals("Missense") || s.equals("Missense_Mutation") ) { classification = Classification.Missense ; return; }
|
||||||
|
if ( s.equals("Nonsense_Mutation") ) { classification = Classification.Nonsense ; return; }
|
||||||
|
if ( s.equals("Splice_Site") ) { classification = Classification.Splice ; return; }
|
||||||
|
if ( s.equals("miRNA") ) { classification = Classification.miRNA ; return; }
|
||||||
|
if ( s.equals("Frame_Shift_Ins") ) { classification = Classification.Frameshift ; return; }
|
||||||
|
if ( s.equals("Frame_Shift_Del") ) { classification = Classification.Frameshift ; return; }
|
||||||
|
if ( s.equals("In_Frame_Ins") ) { classification = Classification.Inframe ; return; }
|
||||||
|
if ( s.equals("In_Frame_Del") ) { classification = Classification.Inframe ; return; }
|
||||||
|
if ( s.equals("Stop_Codon_Del") ) { classification = Classification.Stop_deletion ; return; }
|
||||||
|
if ( s.equals("Splice_Site_Del") ) { classification = Classification.Splice_site_deletion ; return; }
|
||||||
|
if ( s.equals("Splice_Site_Ins") ) { classification = Classification.Splice_site_insertion ; return; }
|
||||||
|
if ( s.equals("Promoter") ) { classification = Classification.Promoter ; return; }
|
||||||
|
if ( s.equals("De_novo_Start") ) { classification = Classification.De_novo_start ; return; }
|
||||||
|
if ( s.equals("TX-REF-MISMATCH") ) { classification = Classification.Unclassified ; return; }
|
||||||
|
throw new UserException.MalformedFile("Unknown variant classification: " + s);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Classification getVariantClassification() {
|
||||||
|
return classification;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* the required getting and setter methods
|
* the required getting and setter methods
|
||||||
*/
|
*/
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue