Moved AnnotatorInputTableFeature and Codec to org.broadinstitute.sting.gatk.refdata.features.annotator

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3424 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
weisburd 2010-05-24 14:36:28 +00:00
parent 42fa12d9e4
commit 64ed770250
4 changed files with 464 additions and 1 deletions

View File

@ -0,0 +1,153 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import org.broad.tribble.FeatureCodec;
import org.broad.tribble.util.AsciiLineReader;
import org.broad.tribble.util.LineReader;
import org.broadinstitute.sting.utils.GenomeLoc;
import org.broadinstitute.sting.utils.GenomeLocParser;
import org.broadinstitute.sting.utils.Utils;
public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTableFeature> {
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
public static final String DELIMITER = "\t";
private ArrayList<String> header;
/**
* Parses the header.
*
* @param reader
*
* @return The # of header lines for this file.
*/
public int readHeader(LineReader reader)
{
int[] lineCounter = new int[1];
try {
header = readHeader(reader, lineCounter);
} catch(IOException e) {
throw new IllegalArgumentException("Unable to read from file.", e);
}
return lineCounter[0];
}
/**
* Parses the line into an AnnotatorInputTableFeature object.
*
* @param line
*/
public AnnotatorInputTableFeature decode(String line) {
final ArrayList<String> header = this.header; //optimization
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
//if ( values.size() > header.size()) {
// throw new CodecLineParsingException(String.format("Encountered a line within " + file + " that has %d columns which is > the number of columns in the header which has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
//}
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
for ( int i = 0; i < header.size(); i++ ) {
feature.putColumnValue(header.get(i), values.get(i));
}
final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(values.get(0)); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
//parse the location
feature.setChr(loc.getContig());
feature.setStart((int) loc.getStart());
feature.setEnd((int) loc.getStop());
return feature;
}
/**
* Returns the header.
* @param source
* @return
* @throws IOException
*/
public static ArrayList<String> readHeader(final File source) throws IOException {
FileInputStream is = new FileInputStream(source);
try {
return readHeader(new AsciiLineReader(is), null);
} finally {
is.close();
}
}
/**
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
* @param source
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
* @return The header fields.
* @throws IOException
*/
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
ArrayList<String> header = null;
int numLines = 0;
//find the 1st line that's non-empty and not a comment
String line = null;
while( (line = source.readLine()) != null ) {
numLines++;
line = line.trim();
if ( line.isEmpty() || line.startsWith("#") ) {
continue;
}
//parse the header
header = Utils.split(line, DELIMITER);
break;
}
// check that we found the header
if ( header == null ) {
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
}
if(lineCounter != null) {
lineCounter[0] = numLines;
}
logger.info(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
return header;
}
}

View File

@ -0,0 +1,260 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.gatk.refdata.features.annotator;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.Map.Entry;
import org.broad.tribble.Feature;
/**
* This class represents a single record in an AnnotatorInputTable.
*/
public class AnnotatorInputTableFeature implements Feature {
private ArrayList<String> columnNames;
private HashMap<String, String> columnValues;
private String chr;
private int start;
private int end;
// Temporary attributes were added to make it easier to implement certain
// optimizations for RODs that span an interval. For example, if a Walker
// needs to do a time-consuming computation on data from a ROD, it would normally
// have to repeat this computation every time its map(..) method is called.
// If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
// However, many computations (including validation and parsing) are done per ROD rather than
// per position. Therefore, substantial optimizations are possible if the result
// of the first computation is cached and reused on subsequent map(..) calls.
// Temporary attributes provide a convenient place to store these results,
// freeing the Walkers from having to maintain their own ROD -> result hashmaps.
private Map<Object, Object> temporaryAttributes;
/**
* Constructor.
* @param columnNames The column names as parsed out of the file header.
*/
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
this.columnNames = columnNames;
this.columnValues = new HashMap<String, String>();
}
/**
* Returns the list of column names from the file header.
* @return
*/
public ArrayList<String> getHeader() {
return columnNames;
}
/**
* Returns the value of the given column.
*
* @param columnName The column name as it appears in the file header.
* @return The value
*/
public String getColumnValue(final Object columnName) {
return columnValues.get(columnName);
}
public boolean containsColumnName(final Object columnName) {
return columnValues.containsKey(columnName);
}
/**
* Sets the value for the given column.
*
* @param columnName The column name as it appears in the file header.
* @param value The value
* @return The existing value associated with the columnName, if there is one.
*/
protected String putColumnValue(final String columnName, final String value) {
return columnValues.put(columnName, value);
}
/**
* Returns all values in this line, hashed by their column names.
*
* @return
*/
public Map<String,String> getColumnValues() {
return Collections.unmodifiableMap(columnValues);
}
/**
* Returns the entry set of all column name-value pairs.
*
* @return
*/
public Set<Entry<String, String>> getEntrySet() {
return columnValues.entrySet();
}
public String getChr() {
return chr;
}
public int getStart() {
return start;
}
public int getEnd() {
return end;
}
protected void setChr(String chr) {
this.chr = chr;
}
protected void setStart(int start) {
this.start = start;
}
protected void setEnd(int end) {
this.end = end;
}
/**
* Checks whether an attribute has been set for the given key.
*
* Temporary attributes make it easier to implement certain
* optimizations for RODs that span an interval. For example, if a Walker
* needs to do a time-consuming computation on data from a ROD, it would normally
* have to repeat this computation every time its map(..) method is called.
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
* However, many computations (including validation and parsing) are done per ROD rather than
* per position. Therefore, substantial optimizations are possible if the result
* of the first computation is cached and reused on subsequent map(..) calls.
* Temporary attributes provide a convenient place to store these results,
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
*
* @param key key
* @return True if an attribute has been set for this key.
*/
public boolean containsTemporaryAttribute(Object key) {
if(temporaryAttributes != null) {
return temporaryAttributes.containsKey(key);
}
return false;
}
/**
* Sets the key to the given value, replacing any previous value. The previous
* value is returned.
*
* Temporary attributes make it easier to implement certain
* optimizations for RODs that span an interval. For example, if a Walker
* needs to do a time-consuming computation on data from a ROD, it would normally
* have to repeat this computation every time its map(..) method is called.
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
* However, many computations (including validation and parsing) are done per ROD rather than
* per position. Therefore, substantial optimizations are possible if the result
* of the first computation is cached and reused on subsequent map(..) calls.
* Temporary attributes provide a convenient place to store these results,
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
*
* @param key key
* @param value value
* @return attribute
*/
public Object setTemporaryAttribute(Object key, Object value) {
if(temporaryAttributes == null) {
temporaryAttributes = new HashMap<Object, Object>();
}
return temporaryAttributes.put(key, value);
}
/**
* Looks up the value associated with the given key.
*
* Temporary attributes make it easier to implement certain
* optimizations for RODs that span an interval. For example, if a Walker
* needs to do a time-consuming computation on data from a ROD, it would normally
* have to repeat this computation every time its map(..) method is called.
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
* However, many computations (including validation and parsing) are done per ROD rather than
* per position. Therefore, substantial optimizations are possible if the result
* of the first computation is cached and reused on subsequent map(..) calls.
* Temporary attributes provide a convenient place to store these results,
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
*
* @param key key
* @return The value, or null.
*/
public Object getTemporaryAttribute(Object key) {
if(temporaryAttributes != null) {
return temporaryAttributes.get(key);
}
return null;
}
/**
* Removes the attribute that has the given key.
*
* Temporary attributes make it easier to implement certain
* optimizations for RODs that span an interval. For example, if a Walker
* needs to do a time-consuming computation on data from a ROD, it would normally
* have to repeat this computation every time its map(..) method is called.
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
* However, many computations (including validation and parsing) are done per ROD rather than
* per position. Therefore, substantial optimizations are possible if the result
* of the first computation is cached and reused on subsequent map(..) calls.
* Temporary attributes provide a convenient place to store these results,
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
*
* @param key key
* @return The value that was associated with this key, or null.
*/
public Object removeTemporaryAttribute(Object key) {
if(temporaryAttributes != null) {
return temporaryAttributes.remove(key);
}
return null;
}
}

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.annotator;
/**

View File

@ -1,3 +1,28 @@
/*
* Copyright (c) 2010 The Broad Institute
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
* THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package org.broadinstitute.sting.playground.gatk.walkers.annotator;
import java.util.HashMap;
@ -14,7 +39,7 @@ import org.broadinstitute.sting.gatk.contexts.StratifiedAlignmentContext;
import org.broadinstitute.sting.gatk.contexts.variantcontext.Allele;
import org.broadinstitute.sting.gatk.contexts.variantcontext.VariantContext;
import org.broadinstitute.sting.gatk.refdata.RefMetaDataTracker;
import org.broadinstitute.sting.gatk.refdata.features.sampileup.AnnotatorInputTableFeature;
import org.broadinstitute.sting.gatk.refdata.features.annotator.AnnotatorInputTableFeature;
import org.broadinstitute.sting.gatk.refdata.utils.GATKFeature;
import org.broadinstitute.sting.gatk.walkers.annotator.VariantAnnotatorEngine;
import org.broadinstitute.sting.gatk.walkers.annotator.interfaces.InfoFieldAnnotation;