Success! TranscriptToGenomicInfo now works without the delicate hacks that Ben had put in.
git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@4232 348d0f76-0448-11de-a6fe-93d51630548a
This commit is contained in:
parent
cd091d7309
commit
40283f6456
|
|
@ -43,28 +43,13 @@ public class AnnotatorInputTableFeature implements Feature {
|
||||||
private String chr;
|
private String chr;
|
||||||
private int start;
|
private int start;
|
||||||
private int end;
|
private int end;
|
||||||
|
private String strRep = null;
|
||||||
|
|
||||||
// Temporary attributes were added to make it easier to implement certain
|
|
||||||
// optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
// needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
// have to repeat this computation every time its map(..) method is called.
|
|
||||||
// If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
// However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
// per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
// of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
// Temporary attributes provide a convenient place to store these results,
|
|
||||||
// freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
private Map<Object, Object> temporaryAttributes;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Constructor.
|
* Constructor.
|
||||||
* @param chr The chromosome name.
|
* @param chr The chromosome name.
|
||||||
* @param start
|
* @param start The start position
|
||||||
* @param end
|
* @param end The end position
|
||||||
*/
|
*/
|
||||||
public AnnotatorInputTableFeature(String chr, int start, int end) {
|
public AnnotatorInputTableFeature(String chr, int start, int end) {
|
||||||
this.chr = chr;
|
this.chr = chr;
|
||||||
|
|
@ -85,8 +70,7 @@ public class AnnotatorInputTableFeature implements Feature {
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns the list of column names from the file header.
|
* @return the list of column names from the file header.
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
public ArrayList<String> getHeader() {
|
public ArrayList<String> getHeader() {
|
||||||
return columnNames;
|
return columnNames;
|
||||||
|
|
@ -99,12 +83,12 @@ public class AnnotatorInputTableFeature implements Feature {
|
||||||
* @param columnName The column name as it appears in the file header.
|
* @param columnName The column name as it appears in the file header.
|
||||||
* @return The value
|
* @return The value
|
||||||
*/
|
*/
|
||||||
public String getColumnValue(final Object columnName) {
|
public String getColumnValue(final String columnName) {
|
||||||
return columnValues.get(columnName);
|
return columnValues.get(columnName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public boolean containsColumnName(final Object columnName) {
|
public boolean containsColumnName(final String columnName) {
|
||||||
return columnValues.containsKey(columnName);
|
return columnValues.containsKey(columnName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -121,9 +105,7 @@ public class AnnotatorInputTableFeature implements Feature {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns all values in this line, hashed by their column names.
|
* @return all values in this line, hashed by their column names.
|
||||||
*
|
|
||||||
* @return
|
|
||||||
*/
|
*/
|
||||||
public Map<String,String> getColumnValues() {
|
public Map<String,String> getColumnValues() {
|
||||||
return Collections.unmodifiableMap(columnValues);
|
return Collections.unmodifiableMap(columnValues);
|
||||||
|
|
@ -154,124 +136,23 @@ public class AnnotatorInputTableFeature implements Feature {
|
||||||
this.end = end;
|
this.end = end;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Checks whether an attribute has been set for the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return True if an attribute has been set for this key.
|
|
||||||
*/
|
|
||||||
public boolean containsTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.containsKey(key);
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sets the key to the given value, replacing any previous value. The previous
|
|
||||||
* value is returned.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @param value value
|
|
||||||
* @return attribute
|
|
||||||
*/
|
|
||||||
public Object setTemporaryAttribute(Object key, Object value) {
|
|
||||||
if(temporaryAttributes == null) {
|
|
||||||
temporaryAttributes = new HashMap<Object, Object>();
|
|
||||||
}
|
|
||||||
return temporaryAttributes.put(key, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Looks up the value associated with the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return The value, or null.
|
|
||||||
*/
|
|
||||||
public Object getTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.get(key);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes the attribute that has the given key.
|
|
||||||
*
|
|
||||||
* Temporary attributes make it easier to implement certain
|
|
||||||
* optimizations for RODs that span an interval. For example, if a Walker
|
|
||||||
* needs to do a time-consuming computation on data from a ROD, it would normally
|
|
||||||
* have to repeat this computation every time its map(..) method is called.
|
|
||||||
* If a ROD spans an interval, the Walker's map(..) method will be called for every position in ROD.
|
|
||||||
* However, many computations (including validation and parsing) are done per ROD rather than
|
|
||||||
* per position. Therefore, substantial optimizations are possible if the result
|
|
||||||
* of the first computation is cached and reused on subsequent map(..) calls.
|
|
||||||
* Temporary attributes provide a convenient place to store these results,
|
|
||||||
* freeing the Walkers from having to maintain their own ROD -> result hashmaps.
|
|
||||||
*
|
|
||||||
* @param key key
|
|
||||||
* @return The value that was associated with this key, or null.
|
|
||||||
*/
|
|
||||||
public Object removeTemporaryAttribute(Object key) {
|
|
||||||
if(temporaryAttributes != null) {
|
|
||||||
return temporaryAttributes.remove(key);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
StringBuilder sb = new StringBuilder();
|
if ( strRep == null ) {
|
||||||
|
StringBuilder sb = new StringBuilder();
|
||||||
|
|
||||||
for(String columnName : columnNames ) {
|
for(String columnName : columnNames ) {
|
||||||
if(sb.length() == 0) {
|
if ( sb.length() == 0 )
|
||||||
sb.append("[");
|
sb.append("[");
|
||||||
} else {
|
else
|
||||||
sb.append(", ");
|
sb.append(", ");
|
||||||
|
sb.append(columnName + "=" + columnValues.get(columnName));
|
||||||
}
|
}
|
||||||
sb.append(columnName + "=" + columnValues.get(columnName));
|
sb.append("]");
|
||||||
|
|
||||||
|
strRep = sb.toString();
|
||||||
}
|
}
|
||||||
sb.append("]");
|
|
||||||
return sb.toString();
|
return strRep;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -66,7 +66,7 @@ import org.broadinstitute.sting.utils.StingException;
|
||||||
@By(DataSource.REFERENCE)
|
@By(DataSource.REFERENCE)
|
||||||
@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } )
|
@Requires(value={DataSource.REFERENCE}, referenceMetaData={ @RMD(name=TranscriptToGenomicInfo.ROD_NAME,type=AnnotatorInputTableFeature.class) } )
|
||||||
public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
||||||
private static final String ROD_NAME = "transcripts";
|
public static final String ROD_NAME = "transcripts";
|
||||||
|
|
||||||
//@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false)
|
//@Argument(fullName="pass-through", shortName="t", doc="Optionally specifies which columns from the transcript table should be copied verbatim (aka. passed-through) to the records in the output table. For example, -B transcripts,AnnotatorInputTable,/data/refGene.txt -t id will cause the refGene id column to be copied to the output table.", required=false)
|
||||||
//protected String[] PASS_THROUGH_COLUMNS = {};
|
//protected String[] PASS_THROUGH_COLUMNS = {};
|
||||||
|
|
@ -140,6 +140,11 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
||||||
intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron
|
intergenic, intron, utr5, CDS, utr3, non_coding_exon, non_coding_intron
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Store rods until we hit their ends so that we don't have to recompute
|
||||||
|
* basic information every time we see them in map().
|
||||||
|
*/
|
||||||
|
private Map<String, TranscriptTableRecord> storedTranscriptInfo = new HashMap<String, TranscriptTableRecord>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Prepare the output file and the list of available features.
|
* Prepare the output file and the list of available features.
|
||||||
|
|
@ -219,10 +224,12 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
||||||
for ( Object transcriptRodObject : transcriptRODs ) {
|
for ( Object transcriptRodObject : transcriptRODs ) {
|
||||||
//parse this ROD if it hasn't been already.
|
//parse this ROD if it hasn't been already.
|
||||||
final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject;
|
final AnnotatorInputTableFeature transcriptRod = (AnnotatorInputTableFeature) transcriptRodObject;
|
||||||
TranscriptTableRecord parsedTranscriptRod = (TranscriptTableRecord) transcriptRod.getTemporaryAttribute("parsedTranscriptRod");
|
String featureKey = transcriptRod.toString();
|
||||||
if( parsedTranscriptRod == null ) {
|
|
||||||
|
TranscriptTableRecord parsedTranscriptRod = storedTranscriptInfo.get(featureKey);
|
||||||
|
if ( parsedTranscriptRod == null ) {
|
||||||
parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS);
|
parsedTranscriptRod = new TranscriptTableRecord(transcriptRod, GENE_NAME_COLUMNS);
|
||||||
transcriptRod.setTemporaryAttribute("parsedTranscriptRod", parsedTranscriptRod);
|
storedTranscriptInfo.put(featureKey, parsedTranscriptRod);
|
||||||
}
|
}
|
||||||
|
|
||||||
//populate parsedTranscriptRod.txSequence
|
//populate parsedTranscriptRod.txSequence
|
||||||
|
|
@ -274,6 +281,9 @@ public class TranscriptToGenomicInfo extends RodWalker<Integer, Integer> {
|
||||||
throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e);
|
throw new RuntimeException(Thread.currentThread().getName() + " - Unexpected error occurred at position: [" + parsedTranscriptRod.txChrom + ":" + position + "] in transcript: " + parsedTranscriptRod, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// remove it from the cache
|
||||||
|
storedTranscriptInfo.put(featureKey, null);
|
||||||
|
|
||||||
transcriptsProcessedCounter++;
|
transcriptsProcessedCounter++;
|
||||||
if ( transcriptsProcessedCounter % 100 == 0 )
|
if ( transcriptsProcessedCounter % 100 == 0 )
|
||||||
logger.info(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed");
|
logger.info(new Date() + ": " + transcriptsProcessedCounter + " transcripts processed");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue