From c5de06a6417c58ac917bfa499d2a905317fc88c3 Mon Sep 17 00:00:00 2001 From: chartl Date: Tue, 21 Jun 2011 14:07:58 +0000 Subject: [PATCH] Fixing up the RefSeqCodec so a bad entry in RefSeq (some transcripts are odd and have a negative length which may signify something special (?) ) doesn't cause failure, but issues a warning instead. Integration tests pass. git-svn-id: file:///humgen/gsa-scr1/gsa-engineering/svn_contents/trunk@6021 348d0f76-0448-11de-a6fe-93d51630548a --- .../sting/gatk/refdata/features/refseq/RefSeqCodec.java | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java index 044258c6c..461aab9a5 100644 --- a/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java +++ b/java/src/org/broadinstitute/sting/gatk/refdata/features/refseq/RefSeqCodec.java @@ -6,6 +6,7 @@ import org.broad.tribble.readers.LineReader; import org.broadinstitute.sting.gatk.refdata.ReferenceDependentFeatureCodec; import org.broadinstitute.sting.utils.GenomeLoc; import org.broadinstitute.sting.utils.GenomeLocParser; +import org.broadinstitute.sting.utils.Utils; import org.broadinstitute.sting.utils.exceptions.UserException; import java.util.ArrayList; @@ -35,7 +36,12 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec " + line + ", we expected at least 3 columns, we saw " + fields.length); String contig_name = fields[2]; - return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + try { + return new RefSeqFeature(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5]))); + } catch ( UserException.MalformedGenomeLoc e ) { + Utils.warnUser("RefSeq file is potentially incorrect, as some transcripts or exons have a negative length ("+fields[2]+")"); + return null; + } } /** Fills this object from a text line in RefSeq (UCSC) text dump file */