From 40283f645643e80627d042b0b1fc1679ee3ec87e Mon Sep 17 00:00:00 2001 From: ebanks Date: Wed, 8 Sep 2010 18:06:00 +0000 Subject: [PATCH] Success! TranscriptToGenomicInfo now works without the delicate hacks that Ben had put in. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4232 348d0f76-0448-11de-a6fe-93d51630548a --- .../annotator/AnnotatorInputTableFeature.java | 161 +++--------------- .../TranscriptToGenomicInfo.java | 18 +- 2 files changed, 35 insertions(+), 144 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java index cb3f91ed9..d97e378fb 100755 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/annotator/AnnotatorInputTableFeature.java @@ -43,28 +43,13 @@ public class AnnotatorInputTableFeature implements Feature { private String chr; private int start; private int end; - - - // Temporary attributes were added to make it easier to implement certain - // optimizations for RODs that span an interval. For example, if a Walker - // needs to do a time-consuming computation on data from a ROD, it would normally - // have to repeat this computation every time its map(..) method is called. - // If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - // However, many computations (including validation and parsing) are done per ROD rather than - // per position. Therefore, substantial optimizations are possible if the result - // of the first computation is cached and reused on subsequent map(..) calls. - // Temporary attributes provide a convenient place to store these results, - // freeing the Walkers from having to maintain their own ROD -> result hashmaps. - private Map temporaryAttributes; - - - + private String strRep = null; /** * Constructor. - * @param chr The chromosome name. - * @param start - * @param end + * @param chr The chromosome name. + * @param start The start position + * @param end The end position */ public AnnotatorInputTableFeature(String chr, int start, int end) { this.chr = chr; @@ -85,8 +70,7 @@ public class AnnotatorInputTableFeature implements Feature { /** - * Returns the list of column names from the file header. - * @return + * @return the list of column names from the file header. */ public ArrayList getHeader() { return columnNames; @@ -99,12 +83,12 @@ public class AnnotatorInputTableFeature implements Feature { * @param columnName The column name as it appears in the file header. * @return The value */ - public String getColumnValue(final Object columnName) { + public String getColumnValue(final String columnName) { return columnValues.get(columnName); } - public boolean containsColumnName(final Object columnName) { + public boolean containsColumnName(final String columnName) { return columnValues.containsKey(columnName); } @@ -121,9 +105,7 @@ public class AnnotatorInputTableFeature implements Feature { } /** - * Returns all values in this line, hashed by their column names. - * - * @return + * @return all values in this line, hashed by their column names. */ public Map getColumnValues() { return Collections.unmodifiableMap(columnValues); @@ -154,124 +136,23 @@ public class AnnotatorInputTableFeature implements Feature { this.end = end; } - - /** - * Checks whether an attribute has been set for the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return True if an attribute has been set for this key. - */ - public boolean containsTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.containsKey(key); - } - return false; - } - - /** - * Sets the key to the given value, replacing any previous value. The previous - * value is returned. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @param value value - * @return attribute - */ - public Object setTemporaryAttribute(Object key, Object value) { - if(temporaryAttributes == null) { - temporaryAttributes = new HashMap(); - } - return temporaryAttributes.put(key, value); - } - - /** - * Looks up the value associated with the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return The value, or null. - */ - public Object getTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.get(key); - } - return null; - } - - /** - * Removes the attribute that has the given key. - * - * Temporary attributes make it easier to implement certain - * optimizations for RODs that span an interval. For example, if a Walker - * needs to do a time-consuming computation on data from a ROD, it would normally - * have to repeat this computation every time its map(..) method is called. - * If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD. - * However, many computations (including validation and parsing) are done per ROD rather than - * per position. Therefore, substantial optimizations are possible if the result - * of the first computation is cached and reused on subsequent map(..) calls. - * Temporary attributes provide a convenient place to store these results, - * freeing the Walkers from having to maintain their own ROD -> result hashmaps. - * - * @param key key - * @return The value that was associated with this key, or null. - */ - public Object removeTemporaryAttribute(Object key) { - if(temporaryAttributes != null) { - return temporaryAttributes.remove(key); - } - return null; - } - - - - @Override public String toString() { - StringBuilder sb = new StringBuilder(); + if ( strRep == null ) { + StringBuilder sb = new StringBuilder(); - for(String columnName : columnNames ) { - if(sb.length() == 0) { - sb.append("["); - } else { - sb.append(", "); + for(String columnName : columnNames ) { + if ( sb.length() == 0 ) + sb.append("["); + else + sb.append(", "); + sb.append(columnName + "=" + columnValues.get(columnName)); } - sb.append(columnName + "=" + columnValues.get(columnName)); + sb.append("]"); + + strRep = sb.toString(); } - sb.append("]"); - return sb.toString(); + + return strRep; } - - - } diff --git a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java index 2da1b53d9..53c63a467 100755 --- a/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java +++ b/java/src/org/broadinstitute/sting/gatk/walkers/annotator/genomicannotator/TranscriptToGenomicInfo.java @@ -66,7 +66,7 @@ import org.broadinstitute.sting.utils.StingException; @By(DataSource.REFERENCE) @Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } ) public class TranscriptToGenomicInfo extends RodWalker { - private static final String ROD_NAME = "transcripts"; + public static final String ROD_NAME = "transcripts"; //@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false) //protected String[] PASS_THROUGH_COLUMNS = {}; @@ -140,6 +140,11 @@ public class TranscriptToGenomicInfo extends RodWalker { intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron } + /** + * Store rods until we hit their ends so that we don't have to recompute + * basic information every time we see them in map(). + */ + private Map storedTranscriptInfo = new HashMap(); /** * Prepare the output file and the list of available features. @@ -219,10 +224,12 @@ public class TranscriptToGenomicInfo extends RodWalker { for ( Object transcriptRodObject : transcriptRODs ) { //parse this ROD if it hasn't been already. final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject; - TranscriptTableRecord parsedTranscriptRod = (TranscriptTableRecord) transcriptRod.getTemporaryAttribute("parsedTranscriptRod"); - if( parsedTranscriptRod == null ) { + String featureKey = transcriptRod.toString(); + + TranscriptTableRecord parsedTranscriptRod = storedTranscriptInfo.get(featureKey); + if ( parsedTranscriptRod == null ) { parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS); - transcriptRod.setTemporaryAttribute("parsedTranscriptRod", parsedTranscriptRod); + storedTranscriptInfo.put(featureKey, parsedTranscriptRod); } //populate parsedTranscriptRod.txSequence @@ -274,6 +281,9 @@ public class TranscriptToGenomicInfo extends RodWalker { throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e); } + // remove it from the cache + storedTranscriptInfo.put(featureKey, null); + transcriptsProcessedCounter++; if ( transcriptsProcessedCounter % 100 == 0 ) logger.info(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed");