fixed the build problem: there were two copies of the AnnotatorInputTable Codec and Feature in two different spots.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3439 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
5928047d8b
commit
a2fab07258
|
|
@ -1,128 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.sampileup;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
import org.apache.log4j.Logger;
|
|
||||||
import org.broad.tribble.FeatureCodec;
|
|
||||||
import org.broad.tribble.util.AsciiLineReader;
|
|
||||||
import org.broad.tribble.util.LineReader;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLoc;
|
|
||||||
import org.broadinstitute.sting.utils.GenomeLocParser;
|
|
||||||
import org.broadinstitute.sting.utils.Utils;
|
|
||||||
|
|
||||||
public class AnnotatorInputTableCodec implements FeatureCodec<AnnotatorInputTableFeature> {
|
|
||||||
|
|
||||||
private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class);
|
|
||||||
|
|
||||||
public static final String DELIMITER = "\t";
|
|
||||||
|
|
||||||
private ArrayList<String> header;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the header.
|
|
||||||
*
|
|
||||||
* @param reader
|
|
||||||
*
|
|
||||||
* @return The # of header lines for this file.
|
|
||||||
*/
|
|
||||||
public int readHeader(LineReader reader)
|
|
||||||
{
|
|
||||||
int[] lineCounter = new int[1];
|
|
||||||
try {
|
|
||||||
header = readHeader(reader, lineCounter);
|
|
||||||
} catch(IOException e) {
|
|
||||||
throw new IllegalArgumentException("Unable to read from file.", e);
|
|
||||||
}
|
|
||||||
return lineCounter[0];
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Parses the line into an AnnotatorInputTableFeature object.
|
|
||||||
*
|
|
||||||
* @param line
|
|
||||||
*/
|
|
||||||
public AnnotatorInputTableFeature decode(String line) {
|
|
||||||
final ArrayList<String> header = this.header; //optimization
|
|
||||||
final ArrayList<String> values = Utils.split(line, DELIMITER, header.size());
|
|
||||||
|
|
||||||
//if ( values.size() > header.size()) {
|
|
||||||
// throw new CodecLineParsingException(String.format("Encountered a line within " + file + " that has %d columns which is > the number of columns in the header which has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size()));
|
|
||||||
//}
|
|
||||||
|
|
||||||
final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header);
|
|
||||||
for ( int i = 0; i < header.size(); i++ ) {
|
|
||||||
feature.putColumnValue(header.get(i), values.get(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(values.get(0)); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this
|
|
||||||
|
|
||||||
//parse the location
|
|
||||||
feature.setChr(loc.getContig());
|
|
||||||
feature.setStart((int) loc.getStart());
|
|
||||||
feature.setEnd((int) loc.getStop());
|
|
||||||
|
|
||||||
return feature;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the header.
|
|
||||||
* @param source
|
|
||||||
* @return
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
public static ArrayList<String> readHeader(final File source) throws IOException {
|
|
||||||
FileInputStream is = new FileInputStream(source);
|
|
||||||
try {
|
|
||||||
return readHeader(new AsciiLineReader(is), null);
|
|
||||||
} finally {
|
|
||||||
is.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the header, and also sets the 2nd arg to the number of lines in the header.
|
|
||||||
* @param source
|
|
||||||
* @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header.
|
|
||||||
* @return The header fields.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private static ArrayList<String> readHeader(final LineReader source, int[] lineCounter) throws IOException {
|
|
||||||
|
|
||||||
ArrayList<String> header = null;
|
|
||||||
int numLines = 0;
|
|
||||||
|
|
||||||
//find the 1st line that's non-empty and not a comment
|
|
||||||
String line = null;
|
|
||||||
while( (line = source.readLine()) != null ) {
|
|
||||||
numLines++;
|
|
||||||
line = line.trim();
|
|
||||||
if ( line.isEmpty() || line.startsWith("#") ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
//parse the header
|
|
||||||
header = Utils.split(line, DELIMITER);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// check that we found the header
|
|
||||||
if ( header == null ) {
|
|
||||||
throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty.");
|
|
||||||
}
|
|
||||||
|
|
||||||
if(lineCounter != null) {
|
|
||||||
lineCounter[0] = numLines;
|
|
||||||
}
|
|
||||||
logger.info(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
|
|
||||||
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -1,282 +0,0 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.sampileup;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
|
|
||||||
import org.broad.tribble.Feature;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This class represents a single record in an AnnotatorInputTable.
|
|
||||||
*/
|
|
||||||
public class AnnotatorInputTableFeature implements Feature {
|
|
||||||
|
|
||||||
private ArrayList<String> columnNames;
|
|
||||||
private HashMap<String, String> columnValues;
|
|
||||||
|
|
||||||
private String chr;
|
|
||||||
private int start;
|
|
||||||
private int end;
|
|
||||||
|
|
||||||
|
|
||||||
// Temporary attributes were added to make it easier to implement certain
|
|
||||||
// optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
// needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
// have to repeat this computation every time its map(..) method is called.
|
|
||||||
// If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
// However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
// per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
// of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
// Temporary attributes provide a convenient place to store these results,
|
|
||||||
// freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
private Map<Object, Object> temporaryAttributes;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Constructor.
|
|
||||||
* @param columnNames The column names as parsed out of the file header.
|
|
||||||
*/
|
|
||||||
public AnnotatorInputTableFeature(ArrayList<String> columnNames) {
|
|
||||||
this.columnNames = columnNames;
|
|
||||||
this.columnValues = new HashMap<String, String>();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the list of column names from the file header.
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public ArrayList<String> getHeader() {
|
|
||||||
return columnNames;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the value of the given column.
|
|
||||||
*
|
|
||||||
* @param columnName The column name as it appears in the file header.
|
|
||||||
* @return The value
|
|
||||||
*/
|
|
||||||
public String getColumnValue(final Object columnName) {
|
|
||||||
return columnValues.get(columnName);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public boolean containsColumnName(final Object columnName) {
|
|
||||||
return columnValues.containsKey(columnName);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the value for the given column.
|
|
||||||
*
|
|
||||||
* @param columnName The column name as it appears in the file header.
|
|
||||||
* @param value The value
|
|
||||||
* @return The existing value associated with the columnName, if there is one.
|
|
||||||
*/
|
|
||||||
protected String putColumnValue(final String columnName, final String value) {
|
|
||||||
return columnValues.put(columnName, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns all values in this line, hashed by their column names.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Map<String,String> getColumnValues() {
|
|
||||||
return Collections.unmodifiableMap(columnValues);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the entry set of all column name-value pairs.
|
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
|
||||||
public Set<Entry<String, String>> getEntrySet() {
|
|
||||||
|
|
||||||
return columnValues.entrySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
public String getChr() {
|
|
||||||
return chr;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getStart() {
|
|
||||||
return start;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getEnd() {
|
|
||||||
return end;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setChr(String chr) {
|
|
||||||
this.chr = chr;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setStart(int start) {
|
|
||||||
this.start = start;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void setEnd(int end) {
|
|
||||||
this.end = end;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// ROD accessors
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public GenomeLoc getLocation() {
|
|
||||||
if ( loc != null )
|
|
||||||
return loc;
|
|
||||||
String s = get(header.get(0));
|
|
||||||
if ( s == null )
|
|
||||||
return null;
|
|
||||||
return GenomeLocParser.parseGenomeLoc(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getAttributeString() {
|
|
||||||
List<String> strings = new ArrayList<String>(columnValues.size());
|
|
||||||
for ( String key : header ) {
|
|
||||||
if ( containsKey(key) ) { // avoid the header
|
|
||||||
strings.add(this.get(key));
|
|
||||||
//System.out.printf("Adding %s%n", this.get(key));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return Utils.join("\t", strings);
|
|
||||||
}
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
//
|
|
||||||
// map functions
|
|
||||||
//
|
|
||||||
// ----------------------------------------------------------------------
|
|
||||||
public int size() { return columnValues.size(); }
|
|
||||||
public boolean isEmpty() { return columnValues.isEmpty(); }
|
|
||||||
public boolean containsValue(Object o) { return columnValues.containsValue(o); }
|
|
||||||
public String remove(Object o) { return columnValues.remove(o); }
|
|
||||||
public void clear() { columnValues.clear(); }
|
|
||||||
public java.util.Set<String> keySet() { return columnValues.keySet(); }
|
|
||||||
public java.util.Collection<String> values() { return columnValues.values(); }
|
|
||||||
|
|
||||||
public void putAll(java.util.Map<? extends String, ? extends String> map) {
|
|
||||||
columnValues.putAll(map);
|
|
||||||
}
|
|
||||||
|
|
||||||
public java.util.Set<java.util.Map.Entry<String,String>> entrySet() {
|
|
||||||
return columnValues.entrySet();
|
|
||||||
}
|
|
||||||
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks whether an attribute has been set for the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return True if an attribute has been set for this key.
|
|
||||||
*/
|
|
||||||
public boolean containsTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.containsKey(key);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the key to the given value, replacing any previous value. The previous
|
|
||||||
* value is returned.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @param value value
|
|
||||||
* @return attribute
|
|
||||||
*/
|
|
||||||
public Object setTemporaryAttribute(Object key, Object value) {
|
|
||||||
if(temporaryAttributes == null) {
|
|
||||||
temporaryAttributes = new HashMap<Object, Object>();
|
|
||||||
}
|
|
||||||
return temporaryAttributes.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks up the value associated with the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return The value, or null.
|
|
||||||
*/
|
|
||||||
public Object getTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.get(key);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the attribute that has the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return The value that was associated with this key, or null.
|
|
||||||
*/
|
|
||||||
public Object removeTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.remove(key);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
@ -37,6 +37,6 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest {
|
||||||
"-s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet",
|
"-s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet",
|
||||||
1,
|
1,
|
||||||
Arrays.asList(md5WithDashSArg));
|
Arrays.asList(md5WithDashSArg));
|
||||||
//executeTest("test with dbSNP and -s arg", specWithSArg);
|
executeTest("test with dbSNP and -s arg", specWithSArg);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue