Success! TranscriptToGenomicInfo now works without the delicate hacks that Ben had put in.

git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4232 348d0f76-0448-11de-a6fe-93d51630548a
2010-09-08 18:06:00 +00:00 · 2010-09-08 18:06:00 +00:00 · 40283f6456
parent cd091d7309
commit 40283f6456
2 changed files with 35 additions and 144 deletions
--- a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java
+++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java
@ -43,28 +43,13 @@ public class AnnotatorInputTableFeature implements Feature {
    private String chr;
    private int start;
    private int end;
-
-
-    // Temporary attributes were added to make it easier to implement certain
-    // optimizations for RODs that span an interval. For example, if a Walker
-    // needs to do a time-consuming computation on data from a ROD, it would normally
-    // have to repeat this computation every time its map(..) method is called.
-    // If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
-    // However, many computations (including validation and parsing) are done per ROD rather than
-    // per position. Therefore, substantial optimizations are possible if the result
-    // of the first computation is cached and reused on subsequent map(..) calls.
-    // Temporary attributes provide a convenient place to store these results,
-    // freeing the Walkers from having to maintain their own ROD -> result hashmaps.
-    private Map<Object, Object> temporaryAttributes;
-
-
-
+    private String strRep = null;

    /**
     * Constructor.
-     * @param chr The chromosome name.
-     * @param start
-     * @param end
+     * @param chr    The chromosome name.
+     * @param start  The start position
+     * @param end    The end position
     */
    public AnnotatorInputTableFeature(String chr, int start, int end) {
        this.chr = chr;
@ -85,8 +70,7 @@ public class AnnotatorInputTableFeature implements Feature {


    /**
-     * Returns the list of column names from the file header.
-     * @return
+     * @return the list of column names from the file header.
     */
    public ArrayList<String> getHeader() {
        return columnNames;
@ -99,12 +83,12 @@ public class AnnotatorInputTableFeature implements Feature {
     * @param columnName The column name as it appears in the file header.
     * @return The value
     */
-    public String getColumnValue(final Object columnName) {
+    public String getColumnValue(final String columnName) {
        return columnValues.get(columnName);
    }


-    public boolean containsColumnName(final Object columnName) {
+    public boolean containsColumnName(final String columnName) {
        return columnValues.containsKey(columnName);
    }

@ -121,9 +105,7 @@ public class AnnotatorInputTableFeature implements Feature {
    }

    /**
-     * Returns all values in this line, hashed by their column names.
-     *
-     * @return
+     * @return all values in this line, hashed by their column names.
     */
    public Map<String,String> getColumnValues() {
        return Collections.unmodifiableMap(columnValues);
@ -154,124 +136,23 @@ public class AnnotatorInputTableFeature implements Feature {
        this.end = end;
    }

-
-    /**
-     * Checks whether an attribute has been set for the given key.
-     *
-     * Temporary attributes make it easier to implement certain
-     * optimizations for RODs that span an interval. For example, if a Walker
-     * needs to do a time-consuming computation on data from a ROD, it would normally
-     * have to repeat this computation every time its map(..) method is called.
-     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
-     * However, many computations (including validation and parsing) are done per ROD rather than
-     * per position. Therefore, substantial optimizations are possible if the result
-     * of the first computation is cached and reused on subsequent map(..) calls.
-     * Temporary attributes provide a convenient place to store these results,
-     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
-     *
-     * @param key key
-     * @return True if an attribute has been set for this key.
-     */
-    public boolean containsTemporaryAttribute(Object key) {
-        if(temporaryAttributes != null) {
-            return temporaryAttributes.containsKey(key);
-        }
-        return false;
-    }
-
-    /**
-     * Sets the key to the given value, replacing any previous value. The previous
-     * value is returned.
-     *
-     * Temporary attributes make it easier to implement certain
-     * optimizations for RODs that span an interval. For example, if a Walker
-     * needs to do a time-consuming computation on data from a ROD, it would normally
-     * have to repeat this computation every time its map(..) method is called.
-     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
-     * However, many computations (including validation and parsing) are done per ROD rather than
-     * per position. Therefore, substantial optimizations are possible if the result
-     * of the first computation is cached and reused on subsequent map(..) calls.
-     * Temporary attributes provide a convenient place to store these results,
-     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
-     *
-     * @param key    key
-     * @param value  value
-     * @return attribute
-     */
-    public Object setTemporaryAttribute(Object key, Object value) {
-        if(temporaryAttributes == null) {
-            temporaryAttributes = new HashMap<Object, Object>();
-        }
-        return temporaryAttributes.put(key, value);
-    }
-
-    /**
-     * Looks up the value associated with the given key.
-     *
-     * Temporary attributes make it easier to implement certain
-     * optimizations for RODs that span an interval. For example, if a Walker
-     * needs to do a time-consuming computation on data from a ROD, it would normally
-     * have to repeat this computation every time its map(..) method is called.
-     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
-     * However, many computations (including validation and parsing) are done per ROD rather than
-     * per position. Therefore, substantial optimizations are possible if the result
-     * of the first computation is cached and reused on subsequent map(..) calls.
-     * Temporary attributes provide a convenient place to store these results,
-     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
-     *
-     * @param key key
-     * @return The value, or null.
-     */
-    public Object getTemporaryAttribute(Object key) {
-        if(temporaryAttributes != null) {
-            return temporaryAttributes.get(key);
-        }
-        return null;
-    }
-
-    /**
-     * Removes the attribute that has the given key.
-     *
-     * Temporary attributes make it easier to implement certain
-     * optimizations for RODs that span an interval. For example, if a Walker
-     * needs to do a time-consuming computation on data from a ROD, it would normally
-     * have to repeat this computation every time its map(..) method is called.
-     * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
-     * However, many computations (including validation and parsing) are done per ROD rather than
-     * per position. Therefore, substantial optimizations are possible if the result
-     * of the first computation is cached and reused on subsequent map(..) calls.
-     * Temporary attributes provide a convenient place to store these results,
-     * freeing the Walkers from having to maintain their own ROD -> result hashmaps.
-     *
-     * @param key key
-     * @return The value that was associated with this key, or null.
-     */
-    public Object removeTemporaryAttribute(Object key) {
-         if(temporaryAttributes != null) {
-             return temporaryAttributes.remove(key);
-         }
-         return null;
-    }
-
-
-
-
    @Override
    public String toString() {
-        StringBuilder sb = new StringBuilder();
+        if ( strRep == null ) {
+            StringBuilder sb = new StringBuilder();

-        for(String columnName : columnNames ) {
-            if(sb.length() == 0) {
-                sb.append("[");
-            } else {
-                sb.append(", ");
+            for(String columnName : columnNames ) {
+                if ( sb.length() == 0 )
+                    sb.append("[");
+                else
+                    sb.append(", ");
+                sb.append(columnName + "=" + columnValues.get(columnName));
            }
-            sb.append(columnName + "=" + columnValues.get(columnName));
+            sb.append("]");
+
+            strRep = sb.toString();
        }
-        sb.append("]");
-        return sb.toString();
+
+        return strRep;
    }
-
-
-
 }
--- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java
+++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java
@ -66,7 +66,7 @@ import org.broadinstitute.sting.utils.StingException;
@By(DataSource.REFERENCE)
@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } )
 public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
-    private static final String ROD_NAME = "transcripts";
+    public static final String ROD_NAME = "transcripts";

    //@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false)
    //protected String[] PASS_THROUGH_COLUMNS = {};
@ -140,6 +140,11 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
        intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron
    }

+    /**
+     * Store rods until we hit their ends so that we don't have to recompute
+     * basic information every time we see them in map().
+      */
+    private Map<String, TranscriptTableRecord> storedTranscriptInfo = new HashMap<String, TranscriptTableRecord>();

    /**
     * Prepare the output file and the list of available features.
@ -219,10 +224,12 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
        for ( Object transcriptRodObject : transcriptRODs ) {
            //parse this ROD if it hasn't been already.
            final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject;
-            TranscriptTableRecord parsedTranscriptRod = (TranscriptTableRecord) transcriptRod.getTemporaryAttribute("parsedTranscriptRod");
-            if( parsedTranscriptRod == null ) {
+            String featureKey = transcriptRod.toString();
+
+            TranscriptTableRecord parsedTranscriptRod = storedTranscriptInfo.get(featureKey);
+            if ( parsedTranscriptRod == null ) {
                parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS);
-                transcriptRod.setTemporaryAttribute("parsedTranscriptRod", parsedTranscriptRod);
+                storedTranscriptInfo.put(featureKey, parsedTranscriptRod);
            }

            //populate parsedTranscriptRod.txSequence
@ -274,6 +281,9 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
                    throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e);
                }

+                // remove it from the cache
+                storedTranscriptInfo.put(featureKey, null);
+
                transcriptsProcessedCounter++;
                if ( transcriptsProcessedCounter % 100 == 0 )
                    logger.info(new Date() + ": " +  transcriptsProcessedCounter + " transcripts processed");