r934: --junc-bed to read BED12
This commit is contained in:
parent
f64e426a5a
commit
49c6d83a8e
48
index.c
48
index.c
|
|
@ -610,7 +610,7 @@ KSTREAM_DECLARE(gzFile, gzread)
|
||||||
#define sort_key_bed(a) ((a).st)
|
#define sort_key_bed(a) ((a).st)
|
||||||
KRADIX_SORT_INIT(bed, mm_idx_intv1_t, sort_key_bed, 4)
|
KRADIX_SORT_INIT(bed, mm_idx_intv1_t, sort_key_bed, 4)
|
||||||
|
|
||||||
mm_idx_intv_t *mm_idx_read_bed(const mm_idx_t *mi, const char *fn)
|
mm_idx_intv_t *mm_idx_read_bed(const mm_idx_t *mi, const char *fn, int read_junc)
|
||||||
{
|
{
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
kstream_t *ks;
|
kstream_t *ks;
|
||||||
|
|
@ -624,8 +624,8 @@ mm_idx_intv_t *mm_idx_read_bed(const mm_idx_t *mi, const char *fn)
|
||||||
while (ks_getuntil(ks, KS_SEP_LINE, &str, 0) >= 0) {
|
while (ks_getuntil(ks, KS_SEP_LINE, &str, 0) >= 0) {
|
||||||
mm_idx_intv_t *r;
|
mm_idx_intv_t *r;
|
||||||
mm_idx_intv1_t t = {-1,-1,-1,-1,0};
|
mm_idx_intv1_t t = {-1,-1,-1,-1,0};
|
||||||
char *p, *q;
|
char *p, *q, *bl, *bs;
|
||||||
int32_t i, id = -1;
|
int32_t i, id = -1, n_blk = 0;
|
||||||
for (p = q = str.s, i = 0;; ++p) {
|
for (p = q = str.s, i = 0;; ++p) {
|
||||||
if (*p == 0 || isspace(*p)) {
|
if (*p == 0 || isspace(*p)) {
|
||||||
int32_t c = *p;
|
int32_t c = *p;
|
||||||
|
|
@ -639,23 +639,49 @@ mm_idx_intv_t *mm_idx_read_bed(const mm_idx_t *mi, const char *fn)
|
||||||
} else if (i == 2) { // end
|
} else if (i == 2) { // end
|
||||||
t.en = atol(q);
|
t.en = atol(q);
|
||||||
if (t.en < 0) break;
|
if (t.en < 0) break;
|
||||||
} else if (i == 3) { // name; do nothing
|
|
||||||
} else if (i == 4) { // BED score
|
} else if (i == 4) { // BED score
|
||||||
t.score = atol(q);
|
t.score = atol(q);
|
||||||
} else if (i == 5) { // strand
|
} else if (i == 5) { // strand
|
||||||
t.strand = *q == '+'? 1 : *q == '-'? -1 : 0;
|
t.strand = *q == '+'? 1 : *q == '-'? -1 : 0;
|
||||||
} else break;
|
} else if (i == 9) {
|
||||||
|
if (!isdigit(*q)) break;
|
||||||
|
n_blk = atol(q);
|
||||||
|
} else if (i == 10) {
|
||||||
|
bl = q;
|
||||||
|
} else if (i == 11) {
|
||||||
|
bs = q;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (c == 0) break;
|
if (c == 0) break;
|
||||||
++i, q = p + 1;
|
++i, q = p + 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (id < 0 || t.st < 0 || t.st >= t.en) continue;
|
if (id < 0 || t.st < 0 || t.st >= t.en) continue;
|
||||||
r = &I[id];
|
r = &I[id];
|
||||||
if (r->n == r->m) {
|
if (i >= 11 && read_junc) { // BED12
|
||||||
r->m = r->m? r->m + (r->m>>1) : 16;
|
int32_t st, sz, en;
|
||||||
r->a = (mm_idx_intv1_t*)realloc(r->a, sizeof(*r->a) * r->m);
|
st = strtol(bs, &bs, 10); ++bs;
|
||||||
|
sz = strtol(bl, &bl, 10); ++bl;
|
||||||
|
en = t.st + st + sz;
|
||||||
|
for (i = 1; i < n_blk; ++i) {
|
||||||
|
mm_idx_intv1_t s = t;
|
||||||
|
if (r->n == r->m) {
|
||||||
|
r->m = r->m? r->m + (r->m>>1) : 16;
|
||||||
|
r->a = (mm_idx_intv1_t*)realloc(r->a, sizeof(*r->a) * r->m);
|
||||||
|
}
|
||||||
|
st = strtol(bs, &bs, 10); ++bs;
|
||||||
|
sz = strtol(bl, &bl, 10); ++bl;
|
||||||
|
s.st = en, s.en = t.st + st;
|
||||||
|
en = t.st + st + sz;
|
||||||
|
if (s.en > s.st) r->a[r->n++] = s;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (r->n == r->m) {
|
||||||
|
r->m = r->m? r->m + (r->m>>1) : 16;
|
||||||
|
r->a = (mm_idx_intv1_t*)realloc(r->a, sizeof(*r->a) * r->m);
|
||||||
|
}
|
||||||
|
r->a[r->n++] = t;
|
||||||
}
|
}
|
||||||
r->a[r->n++] = t;
|
|
||||||
}
|
}
|
||||||
free(str.s);
|
free(str.s);
|
||||||
ks_destroy(ks);
|
ks_destroy(ks);
|
||||||
|
|
@ -663,11 +689,11 @@ mm_idx_intv_t *mm_idx_read_bed(const mm_idx_t *mi, const char *fn)
|
||||||
return I;
|
return I;
|
||||||
}
|
}
|
||||||
|
|
||||||
int mm_idx_bed_read(mm_idx_t *mi, const char *fn)
|
int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc)
|
||||||
{
|
{
|
||||||
int32_t i;
|
int32_t i;
|
||||||
if (mi->h == 0) mm_idx_index_name(mi);
|
if (mi->h == 0) mm_idx_index_name(mi);
|
||||||
mi->I = mm_idx_read_bed(mi, fn);
|
mi->I = mm_idx_read_bed(mi, fn, read_junc);
|
||||||
if (mi->I == 0) return -1;
|
if (mi->I == 0) return -1;
|
||||||
for (i = 0; i < mi->n_seq; ++i) // TODO: eliminate redundant intervals
|
for (i = 0; i < mi->n_seq; ++i) // TODO: eliminate redundant intervals
|
||||||
radix_sort_bed(mi->I[i].a, mi->I[i].a + mi->I[i].n);
|
radix_sort_bed(mi->I[i].a, mi->I[i].a + mi->I[i].n);
|
||||||
|
|
|
||||||
4
main.c
4
main.c
|
|
@ -6,7 +6,7 @@
|
||||||
#include "mmpriv.h"
|
#include "mmpriv.h"
|
||||||
#include "ketopt.h"
|
#include "ketopt.h"
|
||||||
|
|
||||||
#define MM_VERSION "2.16-r933-dirty"
|
#define MM_VERSION "2.16-r934-dirty"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
@ -366,7 +366,7 @@ int main(int argc, char *argv[])
|
||||||
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
|
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), mi->n_seq);
|
||||||
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
|
if (argc != o.ind + 1) mm_mapopt_update(&opt, mi);
|
||||||
if (mm_verbose >= 3) mm_idx_stat(mi);
|
if (mm_verbose >= 3) mm_idx_stat(mi);
|
||||||
if (junc_bed) mm_idx_bed_read(mi, junc_bed);
|
if (junc_bed) mm_idx_bed_read(mi, junc_bed, 1);
|
||||||
if (!(opt.flag & MM_F_FRAG_MODE)) {
|
if (!(opt.flag & MM_F_FRAG_MODE)) {
|
||||||
for (i = o.ind + 1; i < argc; ++i)
|
for (i = o.ind + 1; i < argc; ++i)
|
||||||
mm_map_file(mi, argv[i], &opt, n_threads);
|
mm_map_file(mi, argv[i], &opt, n_threads);
|
||||||
|
|
|
||||||
|
|
@ -367,7 +367,7 @@ int mm_idx_index_name(mm_idx_t *mi);
|
||||||
int mm_idx_name2id(const mm_idx_t *mi, const char *name);
|
int mm_idx_name2id(const mm_idx_t *mi, const char *name);
|
||||||
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
||||||
|
|
||||||
int mm_idx_bed_read(mm_idx_t *mi, const char *fn);
|
int mm_idx_bed_read(mm_idx_t *mi, const char *fn, int read_junc);
|
||||||
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
|
int mm_idx_bed_junc(const mm_idx_t *mi, int32_t ctg, int32_t st, int32_t en, uint8_t *s);
|
||||||
|
|
||||||
// deprecated APIs for backward compatibility
|
// deprecated APIs for backward compatibility
|
||||||
|
|
|
||||||
10
minimap2.1
10
minimap2.1
|
|
@ -1,4 +1,4 @@
|
||||||
.TH minimap2 1 "28 Feburary 2019" "minimap2-2.16-dirty (r933)" "Bioinformatics tools"
|
.TH minimap2 1 "28 Feburary 2019" "minimap2-2.16-dirty (r934)" "Bioinformatics tools"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.PP
|
.PP
|
||||||
minimap2 - mapping and alignment between collections of DNA sequences
|
minimap2 - mapping and alignment between collections of DNA sequences
|
||||||
|
|
@ -365,10 +365,10 @@ on SIRV data, please add
|
||||||
to the command line.
|
to the command line.
|
||||||
.TP
|
.TP
|
||||||
.BR --junc-bed \ FILE
|
.BR --junc-bed \ FILE
|
||||||
BED file consisting of annotated introns and their strands. With this option,
|
Gene annotations in the BED12 format (aka 12-column BED), or intron positions
|
||||||
minimap2 prefers splicing in annotations.
|
in 5-column BED. With this option, minimap2 prefers splicing in annotations.
|
||||||
.I FILE
|
BED12 file can be converted from GTF/GFF3 with `paftools.js gff2bed anno.gtf'
|
||||||
can be generated with `paftools.js gff2bed -j ann.gtf' [].
|
[].
|
||||||
.TP
|
.TP
|
||||||
.BR --junc-bonus \ INT
|
.BR --junc-bonus \ INT
|
||||||
Score bonus for a splice donor or acceptor found in annotation (effective with
|
Score bonus for a splice donor or acceptor found in annotation (effective with
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue