diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableCodec.java deleted file mode 100755 index f5e869522..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableCodec.java +++ /dev/null @@ -1,128 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata.features.sampileup; - -import java.io.File; -import java.io.FileInputStream; -import java.io.IOException; -import java.util.ArrayList; - -import org.apache.log4j.Logger; -import org.broad.tribble.FeatureCodec; -import org.broad.tribble.util.AsciiLineReader; -import org.broad.tribble.util.LineReader; -import org.broadinstitute.sting.utils.GenomeLoc; -import org.broadinstitute.sting.utils.GenomeLocParser; -import org.broadinstitute.sting.utils.Utils; - -public class AnnotatorInputTableCodec implements FeatureCodec { - - private static Logger logger = Logger.getLogger(AnnotatorInputTableCodec.class); - - public static final String DELIMITER = "\t"; - - private ArrayList header; - - /** - * Parses the header. - * - * @param reader - * - * @return The # of header lines for this file. - */ - public int readHeader(LineReader reader) - { - int[] lineCounter = new int[1]; - try { - header = readHeader(reader, lineCounter); - } catch(IOException e) { - throw new IllegalArgumentException("Unable to read from file.", e); - } - return lineCounter[0]; - } - - - /** - * Parses the line into an AnnotatorInputTableFeature object. - * - * @param line - */ - public AnnotatorInputTableFeature decode(String line) { - final ArrayList header = this.header; //optimization - final ArrayList values = Utils.split(line, DELIMITER, header.size()); - - //if ( values.size() > header.size()) { - // throw new CodecLineParsingException(String.format("Encountered a line within " + file + " that has %d columns which is > the number of columns in the header which has %d columns.\nHeader: " + header + "\nLine: " + values, values.size(), header.size())); - //} - - final AnnotatorInputTableFeature feature = new AnnotatorInputTableFeature(header); - for ( int i = 0; i < header.size(); i++ ) { - feature.putColumnValue(header.get(i), values.get(i)); - } - - final GenomeLoc loc = GenomeLocParser.parseGenomeLoc(values.get(0)); //GenomeLocParser.parseGenomeInterval(values.get(0)); - TODO switch to this - - //parse the location - feature.setChr(loc.getContig()); - feature.setStart((int) loc.getStart()); - feature.setEnd((int) loc.getStop()); - - return feature; - } - - - - /** - * Returns the header. - * @param source - * @return - * @throws IOException - */ - public static ArrayList readHeader(final File source) throws IOException { - FileInputStream is = new FileInputStream(source); - try { - return readHeader(new AsciiLineReader(is), null); - } finally { - is.close(); - } - } - - - /** - * Returns the header, and also sets the 2nd arg to the number of lines in the header. - * @param source - * @param lineCounter An array of length 1 or null. If not null, array[0] will be set to the number of lines in the header. - * @return The header fields. - * @throws IOException - */ - private static ArrayList readHeader(final LineReader source, int[] lineCounter) throws IOException { - - ArrayList header = null; - int numLines = 0; - - //find the 1st line that's non-empty and not a comment - String line = null; - while( (line = source.readLine()) != null ) { - numLines++; - line = line.trim(); - if ( line.isEmpty() || line.startsWith("#") ) { - continue; - } - - //parse the header - header = Utils.split(line, DELIMITER); - break; - } - - // check that we found the header - if ( header == null ) { - throw new IllegalArgumentException("No header in " + source + ". All lines are either comments or empty."); - } - - if(lineCounter != null) { - lineCounter[0] = numLines; - } - logger.info(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header))); - - return header; - } - -} diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableFeature.java deleted file mode 100755 index b7f52848d..000000000 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/sampileup/AnnotatorInputTableFeature.java +++ /dev/null @@ -1,282 +0,0 @@ -package org.broadinstitute.sting.gatk.refdata.features.sampileup; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import java.util.Set; -import java.util.Map.Entry; - -import org.broad.tribble.Feature; - -/** - * This class represents a single record in an AnnotatorInputTable. - */ -public class AnnotatorInputTableFeature implements Feature { - - private ArrayList columnNames; - private HashMap columnValues; - - private String chr; - private int start; - private int end; - - - // Temporary attributes were added to make it easier to implement certain - // optimizations for RODs that span an interval. For example, if a Walker - // needs to do a time-consuming computation on data from a ROD, it would normally - // have to repeat this computation every time its map(..) method is called. - // If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - // However, many computations (including validation and parsing) are done per ROD rather than - // per position. Therefore, substantial optimizations are possible if the result - // of the first computation is cached and reused on subsequent map(..) calls. - // Temporary attributes provide a convenient place to store these results, - // freeing the Walkers from having to maintain their own ROD -> result hashmaps. - private Map temporaryAttributes; - - - - - /** - * Constructor. - * @param columnNames The column names as parsed out of the file header. - */ - public AnnotatorInputTableFeature(ArrayList columnNames) { - this.columnNames = columnNames; - this.columnValues = new HashMap(); - } - - - - /** - * Returns the list of column names from the file header. - * @return - */ - public ArrayList getHeader() { - return columnNames; - } - - - /** - * Returns the value of the given column. - * - * @param columnName The column name as it appears in the file header. - * @return The value - */ - public String getColumnValue(final Object columnName) { - return columnValues.get(columnName); - } - - - public boolean containsColumnName(final Object columnName) { - return columnValues.containsKey(columnName); - } - - - /** - * Sets the value for the given column. - * - * @param columnName The column name as it appears in the file header. - * @param value The value - * @return The existing value associated with the columnName, if there is one. - */ - protected String putColumnValue(final String columnName, final String value) { - return columnValues.put(columnName, value); - } - - /** - * Returns all values in this line, hashed by their column names. - * - * @return - */ - public Map getColumnValues() { - return Collections.unmodifiableMap(columnValues); - } - - - /** - * Returns the entry set of all column name-value pairs. - * - * @return - */ - public Set> getEntrySet() { - - return columnValues.entrySet(); - } - - - public String getChr() { - return chr; - } - - public int getStart() { - return start; - } - - public int getEnd() { - return end; - } - - protected void setChr(String chr) { - this.chr = chr; - } - - protected void setStart(int start) { - this.start = start; - } - - protected void setEnd(int end) { - this.end = end; - } - - /* - // ---------------------------------------------------------------------- - // - // ROD accessors - // - // ---------------------------------------------------------------------- - public GenomeLoc getLocation() { - if ( loc != null ) - return loc; - String s = get(header.get(0)); - if ( s == null ) - return null; - return GenomeLocParser.parseGenomeLoc(s); - } - - public String getAttributeString() { - List strings = new ArrayList(columnValues.size()); - for ( String key : header ) { - if ( containsKey(key) ) { // avoid the header - strings.add(this.get(key)); - //System.out.printf("Adding %s%n", this.get(key)); - } - } - return Utils.join("\t", strings); - } - // ---------------------------------------------------------------------- - // - // map functions - // - // ---------------------------------------------------------------------- - public int size() { return columnValues.size(); } - public boolean isEmpty() { return columnValues.isEmpty(); } - public boolean containsValue(Object o) { return columnValues.containsValue(o); } - public String remove(Object o) { return columnValues.remove(o); } - public void clear() { columnValues.clear(); } - public java.util.Set keySet() { return columnValues.keySet(); } - public java.util.Collection values() { return columnValues.values(); } - - public void putAll(java.util.Map map) { - columnValues.putAll(map); - } - - public java.util.Set> entrySet() { - return columnValues.entrySet(); - } - - */ - - /** - * Checks whether an attribute has been set for the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return True if an attribute has been set for this key. - */ - public boolean containsTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.containsKey(key); - } - return false; - } - - /** - * Sets the key to the given value, replacing any previous value. The previous - * value is returned. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @param value value - * @return attribute - */ - public Object setTemporaryAttribute(Object key, Object value) { - if(temporaryAttributes == null) { - temporaryAttributes = new HashMap(); - } - return temporaryAttributes.put(key, value); - } - - /** - * Looks up the value associated with the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return The value, or null. - */ - public Object getTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.get(key); - } - return null; - } - - /** - * Removes the attribute that has the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return The value that was associated with this key, or null. - */ - public Object removeTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.remove(key); - } - return null; - } - - - - - -} diff --git a/java/test/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotatorIntegrationTest.java b/java/test/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotatorIntegrationTest.java index 0fd957b59..bff12b8ee 100755 --- a/java/test/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotatorIntegrationTest.java +++ b/java/test/org/broadinstitute/sting/playground/gatk/walkers/annotator/GenomicAnnotatorIntegrationTest.java @@ -37,6 +37,6 @@ public class GenomicAnnotatorIntegrationTest extends WalkerTest { "-s dbsnp.name,dbsnp.refUCSC,dbsnp.strand,dbsnp.observed,dbsnp.avHet", 1, Arrays.asList(md5WithDashSArg)); - //executeTest("test with dbSNP and -s arg", specWithSArg); + executeTest("test with dbSNP and -s arg", specWithSArg); } }