convert EnsEMBL to UCSC name (optional)

This commit is contained in:
Heng Li 2018-01-05 17:41:18 -05:00
parent cfe87f50c1
commit 209beb9955
1 changed files with 26 additions and 3 deletions

View File

@ -36,15 +36,36 @@ var getopt = function(args, ostr) {
return optopt;
}
var c;
while ((c = getopt(arguments, "")) != null) {
var c, fn_ucsc_fai = null;
while ((c = getopt(arguments, "u:")) != null) {
if (c == 'u') fn_ucsc_fai = getopt.arg;
}
if (getopt.ind == arguments.length) {
print("Usage: k8 gff2bed.js <in.gff>");
print("Usage: k8 gff2bed.js [-u ucsc-genome.fa.fai] <in.gff>");
exit(1);
}
var ens2ucsc = {};
if (fn_ucsc_fai != null) {
var buf = new Bytes();
var file = new File(fn_ucsc_fai);
while (file.readline(buf) >= 0) {
var t = buf.toString().split("\t");
var s = t[0];
if (/_(random|alt)$/.test(s)) {
s = s.replace(/_(random|alt)$/, '');
s = s.replace(/^chr\S+_/, '');
} else {
s = s.replace(/^chrUn_/, '');
}
s = s.replace(/v(\d+)/, ".$1");
if (s != t[0]) ens2ucsc[s] = t[0];
}
file.close();
buf.destroy();
}
function print_bed12(exons, cds_st, cds_en)
{
if (exons.length == 0) return;
@ -100,6 +121,8 @@ while (file.readline(buf) >= 0) {
cds_st = cds_st < t[3]? cds_st : t[3];
cds_en = cds_en > t[4]? cds_en : t[4];
} else if (t[2] == "exon") {
if (fn_ucsc_fai != null && ens2ucsc[t[0]] != null)
t[0] = ens2ucsc[t[0]];
exons.push([t[0], t[3], t[4], t[6], id, type, name]);
}
}