From 2cab4c68d43b9e0f228a2eb1e061012c14ad8c67 Mon Sep 17 00:00:00 2001 From: asivache Date: Thu, 22 Oct 2009 14:55:07 +0000 Subject: [PATCH] Added method: isCodingExon(). Returns true if position is simultaneously within an exon AND within coding interval of any single transcript from the list. The old method of detecting coding positions as isExon() && isCoding() is buggy, as the position could be in the UTR part of one transcript (isExon() is true), and within coding region bounds (but not in the exon) of another transcript (isCoding() is true). As a result UTR positions would be erroneously annotated as coding. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@1898 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/rodRefSeq.java | 32 ++++++++++++++++--- 1 file changed, 27 insertions(+), 5 deletions(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java index e546d9968..1d0944350 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/rodRefSeq.java @@ -131,8 +131,8 @@ public class rodRefSeq extends BasicReferenceOrderedDatum implements Transcript } /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to - * rodRefSeqSimple though: takes list of rods (transcripts) at determines whether the current position of the - * whole list is fully whithin an exon of any of those transcripts. Passing null is safe (will return false). + * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether + * this position is fully whithin an exon of any of those transcripts. Passing null is safe (will return false). * NOTE: position can be still within a UTR, see #isCoding * @return */ @@ -150,12 +150,13 @@ public class rodRefSeq extends BasicReferenceOrderedDatum implements Transcript } /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to - * rodRefSeqSimple though: takes list of rods (transcripts) at determines whether the current position of the - * whole list is fully whithin a coding region of any of those transcripts. Passing null is safe (will return false). + * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether + * this position is fully whithin a coding region of any of those transcripts. + * Passing null is safe (will return false). * NOTE: "coding" interval is defined as a single genomic interval, so it * does not include the UTRs of the outermost exons, but it includes introns between exons spliced into a * transcript, or internal exons that are not spliced into a given transcript. To check that a position is - * indeed within an exon but not in UTR, use isExon() && isCoding(). #see isExon . + * indeed within an exon but not in UTR, use #isCodingExon(). * @return */ public static boolean isCoding(RODRecordList l) { @@ -170,4 +171,25 @@ public class rodRefSeq extends BasicReferenceOrderedDatum implements Transcript return false; } + + /** Convenience method, which is packaged here for a lack of better place; it is indeed closely related to + * rodRefSeq though: takes list of rods (transcripts) overlapping with a given position and determines whether + * this position is fully whithin a coding exon portion (i.e. true coding sequence) of any of those transcripts. + * Passing null is safe (will return false). In other words, this method returns true if the list contains a transcript, + * for which the current position is within an exon and within a coding interval simultaneously. + * @return + */ + public static boolean isCodingExon(RODRecordList l) { + + if ( l == null ) return false; + + GenomeLoc loc = l.getLocation(); + + for ( rodRefSeq t : l ) { + if ( t.overlapsCodingP(loc) && t.overlapsExonP(loc) ) return true; + } + return false; + + } + }