Removed AnnotatorROD which has been ported to Tribble

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@3387 348d0f76-0448-11de-a6fe-93d51630548a
2010-05-19 03:39:34 +00:00 · 2010-05-19 03:39:34 +00:00 · d65b2d32d1
parent b82116f488
commit d65b2d32d1
1 changed files with 0 additions and 292 deletions
--- a/java/src/org/broadinstitute/sting/gatk/refdata/AnnotatorROD.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/AnnotatorROD.java
@ -1,292 +0,0 @@
 /*
 * Copyright (c) 2010 The Broad Institute
 *
 * Permission is hereby granted, free of charge, to any person
 * obtaining a copy of this software and associated documentation
 * files (the "Software"), to deal in the Software without
 * restriction, including without limitation the rights to use,
 * copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following
 * conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
 * THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 */
 package org.broadinstitute.sting.gatk.refdata;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Map;
 import org.apache.log4j.Logger;
 import org.broadinstitute.sting.utils.GenomeLoc;
 import org.broadinstitute.sting.utils.GenomeLocParser;
 import org.broadinstitute.sting.utils.Utils;
 import org.broadinstitute.sting.utils.text.XReadLines;
 /**
 * <pre>
 * This slightly modified TabularROD format is used as input to the
 * GenomicAnnotator.
 * The main differences from TabularROD are:
 * - the delimiter is \t instead of \s+
 * - incomplete records are allowed (eg. where some values are left blank)
 * - the header column is just the first non-comment, non-empty row in the file.
 *    It no longer has to start with the "HEADER" keyword.
 * </pre>
 *
 * More details can be found here:  http://www.broadinstitute.org/gsa/wiki/index.php/GenomicAnnotator
 */
 public class AnnotatorROD extends TabularROD {
    private static Logger logger = Logger.getLogger(AnnotatorROD.class);
    /** Special column names */
    public static final String CHRPOS_COLUMN = "chrpos";
    public static final String HAPLOTYPE_REFERENCE_COLUMN = "haplotypeReference";
    public static final String HAPLOTYPE_ALTERNATE_COLUMN = "haplotypeAlternate";
    public static final String HAPLOTYPE_STRAND_COLUMN = "haplotypeStrand";
    private static int parsedRecords = 0;
    // Temporary attributes were added to make it easier to implement certain
    // optimizations for RODs that span an interval. For example, if a Walker
    // needs to do a time-consuming computation on data from a ROD, it would normally
    // have to repeat this computation every time its map(..) method is called.
    // If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
    // However, many computations (including validation and parsing) are done per ROD rather than
    // per position. Therefore, substantial optimizations are possible if the result
    // of the first computation is cached and reused on subsequent map(..) calls.
    // Temporary attributes provide a convenient place to store these results,
    // freeing the Walkers from having to maintain their own ROD -> result hashmaps.
    private Map<Object, Object> temporaryAttributes;
   /**
    * Constructor.
    *
    * @param name The binding name provided as the first in the list of -B args.
    */
    public AnnotatorROD(String name) {
        super(name, new ArrayList<String>());
        setDelimiter("\t", "\\t");
    }
    /**
     * Walks through the source files looking for the header line, which it
     * returns as a list of strings.
     *
     * @param source
     * @return
     */
    public Object initialize(final File source) throws FileNotFoundException {
        ArrayList<String> header = AnnotatorROD.readHeader(source);
        return header;
    }
    /**
     * Finds and parses the header string in the file.
     * @param source The source file.
     * @return A List of the column names parsed out of the header.
     * @throws FileNotFoundException
     */
    public static ArrayList<String> readHeader(final File source) throws FileNotFoundException {
        ArrayList<String> header = null;
        XReadLines reader = new XReadLines(source);
        //find the 1st line that's non-empty and not a comment
        for ( String line : reader ) {
            line = line.trim();
            if ( line.isEmpty() || line.startsWith("#") ) {
                continue;
            }
            header = new ArrayList<String>(Arrays.asList(line.split(DELIMITER_REGEX)));
            break;
        }
        // check that we found the header
        if ( header == null ) {
            throw new RuntimeException("No header in " + source + ". All lines are either comments or empty.");
        }
        logger.info(String.format("Found header line containing %d columns:\n[%s]", header.size(), Utils.join("\t", header)));
        try {
            reader.close();
        } catch ( IOException e ) {
            throw new RuntimeException(e);
        }
        return header;
    }
    /**
     * Used by ROD management system to set the data in this ROD associated with a line in a rod
     *
     * @param headerObj
     * @param parts
     * @return
     * @throws IOException
     */
    public boolean parseLine(final Object headerObj, final String[] parts) throws IOException {
        ArrayList<String> header = (ArrayList<String>)(headerObj);
        //save the header in the super-class
        getHeader().addAll(header);
        if ( parts.length == 0 || parts[0].startsWith("#") || header.get(0).equals(parts[0]) /* Skip the header line */ )
            return false;
        if ( header.size() < parts.length) {
            throw new IOException(String.format("Encountered line with more columns than have names in the header. Header has %d columns, this line has %d columns.", header.size(), parts.length));
        }
        for ( int i = 0; i < parts.length; i++ ) {
            put(header.get(i), parts[i]);
        }
        if ( printRecordsParsed ) System.out.printf("Parsed %d records %s%n", ++parsedRecords, this);
        return true;
    }
    // ----------------------------------------------------------------------
    //
    // ROD accessors
    //
    // ----------------------------------------------------------------------
    public GenomeLoc getLocation() {
        if ( loc == null ) {
            String s = get(getHeader().get(0));
            if(s == null) {
                throw new RuntimeException("Location not set.."); //this should never happen unless the line in the file is empty
            }
            loc = GenomeLocParser.parseGenomeLoc(s);
        }
        return loc;
    }
    /**
     * Checks whether an attribute has been set for the given key.
     *
     * Temporary attributes make it easier to implement certain
     * optimizations for RODs that span an interval. For example, if a Walker
     * needs to do a time-consuming computation on data from a ROD, it would normally
     * have to repeat this computation every time its map(..) method is called.
     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
     * However, many computations (including validation and parsing) are done per ROD rather than
     * per position. Therefore, substantial optimizations are possible if the result
     * of the first computation is cached and reused on subsequent map(..) calls.
     * Temporary attributes provide a convenient place to store these results,
     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
     *
     * @param key key
     * @return True if an attribute has been set for this key.
     */
    public boolean containsTemporaryAttribute(Object key) {
        if(temporaryAttributes != null) {
            return temporaryAttributes.containsKey(key);
        }
        return false;
    }
    /**
     * Sets the key to the given value, replacing any previous value. The previous
     * value is returned.
     *
     * Temporary attributes make it easier to implement certain
     * optimizations for RODs that span an interval. For example, if a Walker
     * needs to do a time-consuming computation on data from a ROD, it would normally
     * have to repeat this computation every time its map(..) method is called.
     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
     * However, many computations (including validation and parsing) are done per ROD rather than
     * per position. Therefore, substantial optimizations are possible if the result
     * of the first computation is cached and reused on subsequent map(..) calls.
     * Temporary attributes provide a convenient place to store these results,
     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
     *
     * @param key    key
     * @param value  value
     * @return attribute
     */
    public Object setTemporaryAttribute(Object key, Object value) {
        if(temporaryAttributes == null) {
            temporaryAttributes = new HashMap<Object, Object>();
        }
        return temporaryAttributes.put(key, value);
    }
    /**
     * Looks up the value associated with the given key.
     *
     * Temporary attributes make it easier to implement certain
     * optimizations for RODs that span an interval. For example, if a Walker
     * needs to do a time-consuming computation on data from a ROD, it would normally
     * have to repeat this computation every time its map(..) method is called.
     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
     * However, many computations (including validation and parsing) are done per ROD rather than
     * per position. Therefore, substantial optimizations are possible if the result
     * of the first computation is cached and reused on subsequent map(..) calls.
     * Temporary attributes provide a convenient place to store these results,
     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
     *
     * @param key key
     * @return The value, or null.
     */
    public Object getTemporaryAttribute(Object key) {
        if(temporaryAttributes != null) {
            return temporaryAttributes.get(key);
        }
        return null;
    }
    /**
     * Removes the attribute that has the given key.
     *
     * Temporary attributes make it easier to implement certain
     * optimizations for RODs that span an interval. For example, if a Walker
     * needs to do a time-consuming computation on data from a ROD, it would normally
     * have to repeat this computation every time its map(..) method is called.
     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
     * However, many computations (including validation and parsing) are done per ROD rather than
     * per position. Therefore, substantial optimizations are possible if the result
     * of the first computation is cached and reused on subsequent map(..) calls.
     * Temporary attributes provide a convenient place to store these results,
     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
     *
     * @param key key
     * @return The value that was associated with this key, or null.
     */
    public Object removeTemporaryAttribute(Object key) {
         if(temporaryAttributes != null) {
             return temporaryAttributes.remove(key);
         }
         return null;
    }
 }