fix: RefSeq contains records with zero coding length and the refsec codec/feature used to crash on those; now such records are ignored, with warning printed (once)
This commit is contained in:
parent
710d34633e
commit
a423546cdd
|
|
@ -1,5 +1,6 @@
|
||||||
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
package org.broadinstitute.sting.gatk.refdata.features.refseq;
|
||||||
|
|
||||||
|
import org.apache.commons.io.filefilter.FalseFileFilter;
|
||||||
import org.broad.tribble.Feature;
|
import org.broad.tribble.Feature;
|
||||||
import org.broad.tribble.TribbleException;
|
import org.broad.tribble.TribbleException;
|
||||||
import org.broad.tribble.readers.LineReader;
|
import org.broad.tribble.readers.LineReader;
|
||||||
|
|
@ -20,7 +21,7 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
||||||
* The parser to use when resolving genome-wide locations.
|
* The parser to use when resolving genome-wide locations.
|
||||||
*/
|
*/
|
||||||
private GenomeLocParser genomeLocParser;
|
private GenomeLocParser genomeLocParser;
|
||||||
|
private boolean zero_coding_length_user_warned = false;
|
||||||
/**
|
/**
|
||||||
* Set the parser to use when resolving genetic data.
|
* Set the parser to use when resolving genetic data.
|
||||||
* @param genomeLocParser The supplied parser.
|
* @param genomeLocParser The supplied parser.
|
||||||
|
|
@ -60,9 +61,20 @@ public class RefSeqCodec implements ReferenceDependentFeatureCodec<RefSeqFeature
|
||||||
else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1);
|
else if ( fields[3].length()==1 && fields[3].charAt(0)=='-') feature.setStrand(-1);
|
||||||
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
else throw new UserException.MalformedFile("Expected strand symbol (+/-), found: "+fields[3] + " for line=" + line);
|
||||||
|
|
||||||
|
int coding_start = Integer.parseInt(fields[6])+1;
|
||||||
|
int coding_stop = Integer.parseInt(fields[7]);
|
||||||
|
|
||||||
|
if ( coding_start > coding_stop ) {
|
||||||
|
if ( ! zero_coding_length_user_warned ) {
|
||||||
|
Utils.warnUser("RefSeq file contains transcripts with zero coding length. "+
|
||||||
|
"Such transcripts will be ignored (this warning is printed only once)");
|
||||||
|
zero_coding_length_user_warned = true;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
feature.setTranscript_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[4])+1, Integer.parseInt(fields[5])));
|
||||||
feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, Integer.parseInt(fields[6])+1, Integer.parseInt(fields[7])));
|
feature.setTranscript_coding_interval(genomeLocParser.createGenomeLoc(contig_name, coding_start, coding_stop));
|
||||||
feature.setGene_name(fields[12]);
|
feature.setGene_name(fields[12]);
|
||||||
String[] exon_starts = fields[9].split(",");
|
String[] exon_starts = fields[9].split(",");
|
||||||
String[] exon_stops = fields[10].split(",");
|
String[] exon_stops = fields[10].split(",");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue