r148: revamped regs handling after cigar
This commit is contained in:
parent
696ebce66e
commit
2e4fd9f1d0
6
Makefile
6
Makefile
|
|
@ -1,9 +1,9 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
|
||||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES= -I.
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o hit.o sdust.o \
|
||||
index.o format.o map.o ksw2_extz2_sse.o
|
||||
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o misc.o bseq.o sketch.o sdust.o \
|
||||
index.o chain.o align.o hit.o map.o format.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
|
|||
7
align.c
7
align.c
|
|
@ -239,11 +239,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
r->p->dp_score += ez->max;
|
||||
re1 = rs + (ez->max_t + 1);
|
||||
qe1 = qs + (ez->max_q + 1);
|
||||
if (r->cnt - (j + 1) >= opt->min_cnt) {
|
||||
if (r->cnt - (j + 1) >= opt->min_cnt)
|
||||
mm_split_reg(r, r2, j + 1, qlen, a);
|
||||
if (j + 1 < opt->min_cnt)
|
||||
r2->id = r->id, r2->parent = r2->id, r->id = -1, r->parent = MM_PARENT_TMP_PRI;
|
||||
}
|
||||
break;
|
||||
} else r->p->dp_score += ez->score;
|
||||
rs = re, qs = qe;
|
||||
|
|
@ -305,9 +302,9 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
}
|
||||
}
|
||||
*n_regs_ = n_regs;
|
||||
mm_sync_regs(km, n_regs, regs);
|
||||
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
||||
kfree(km, ez.cigar);
|
||||
mm_filter_regs(km, opt, n_regs_, regs);
|
||||
mm_hit_sort_by_dp(km, n_regs_, regs);
|
||||
return regs;
|
||||
}
|
||||
|
|
|
|||
115
hit.c
115
hit.c
|
|
@ -3,6 +3,22 @@
|
|||
#include "mmpriv.h"
|
||||
#include "kalloc.h"
|
||||
|
||||
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
||||
{
|
||||
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
|
||||
r->rev = a[k].x>>63;
|
||||
r->rid = a[k].x<<1>>33;
|
||||
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
|
||||
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
||||
if (!r->rev) {
|
||||
r->qs = (int32_t)a[k].y + 1 - q_span;
|
||||
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
||||
} else {
|
||||
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
||||
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
|
||||
}
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits
|
||||
{
|
||||
mm128_t *z, tmp;
|
||||
|
|
@ -59,78 +75,55 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compu
|
|||
{
|
||||
int i, j, k, *w;
|
||||
if (n <= 0) return;
|
||||
for (i = 0; i < n; ++i) r[i].id = i;
|
||||
w = (int*)kmalloc(km, n * sizeof(int));
|
||||
w[0] = 0, r[0].parent = 0;
|
||||
for (i = 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
mm_reg1_t *ri = &r[i];
|
||||
int si = ri->qs, ei = ri->qe;
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
mm_reg1_t *rp = &r[w[j]];
|
||||
int sj = rp->qs, ej = rp->qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
ri->parent = rp->parent;
|
||||
rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score;
|
||||
if (rp->p && ri->p)
|
||||
rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i, r[i].parent = i;
|
||||
if (j == k) w[k++] = i, ri->parent = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
}
|
||||
|
||||
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r) // due to changes to r.{qs,qe} after DP extension
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
|
||||
{
|
||||
int i, j, k, *w, n_pri = 0;
|
||||
if (n <= 0) return;
|
||||
for (i = 0; i < n; ++i)
|
||||
if (r[i].id == r[i].parent || r[i].parent < 0) ++n_pri;
|
||||
if (n_pri <= 1) return; // FIXME: this is not right
|
||||
w = (int*)kmalloc(km, n_pri * sizeof(int));
|
||||
for (i = j = 0; i < n; ++i) // find the first primary
|
||||
if (r[i].id == r[i].parent) break;
|
||||
for (w[0] = i, i = i + 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
if (r[i].id != r[i].parent && r[i].parent >= 0) {
|
||||
r[i].parent = r[r[i].parent].parent;
|
||||
continue;
|
||||
}
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
int32_t i, n_aux, n = *n_regs;
|
||||
uint64_t *aux;
|
||||
mm_reg1_t *t;
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
mm_reg1_t *ri = &r[i], *rp, tmp;
|
||||
int t;
|
||||
if (ri->p == 0 || ri->id == ri->parent || ri->parent < 0) continue;
|
||||
rp = &r[ri->parent];
|
||||
if (rp->p && ri->p->dp_max > rp->p->dp_max) {
|
||||
ri->subsc = rp->score;
|
||||
tmp = *ri, *ri = *rp, *rp = tmp;
|
||||
t = ri->id, ri->id = rp->id, rp->id = t;
|
||||
t = ri->parent, ri->parent = rp->parent, rp->parent = t;
|
||||
if (n <= 1) return;
|
||||
aux = (uint64_t*)kmalloc(km, n * 8);
|
||||
t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t));
|
||||
for (i = n_aux = 0; i < n; ++i) {
|
||||
if (r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
|
||||
assert(r[i].p);
|
||||
aux[n_aux++] = (uint64_t)r[i].p->dp_max << 32 | i;
|
||||
} else if (r[i].p) {
|
||||
kfree(km, r[i].p);
|
||||
r[i].p = 0;
|
||||
}
|
||||
}
|
||||
|
||||
for (i = 0; i < n; ++i) {
|
||||
mm_reg1_t *ri = &r[i], *rp;
|
||||
if (ri->id == ri->parent || ri->parent < 0) continue;
|
||||
rp = &r[ri->parent];
|
||||
rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score;
|
||||
if (rp->p && ri->p)
|
||||
rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max;
|
||||
}
|
||||
radix_sort_64(aux, aux + n_aux);
|
||||
for (i = n_aux - 1; i >= 0; --i)
|
||||
t[n_aux - 1 - i] = r[(int32_t)aux[i]];
|
||||
memcpy(r, t, sizeof(mm_reg1_t) * n_aux);
|
||||
*n_regs = n_aux;
|
||||
kfree(km, aux);
|
||||
kfree(km, t);
|
||||
}
|
||||
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id
|
||||
|
|
@ -164,7 +157,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int
|
|||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, n = *n_, n_2nd = 0;
|
||||
for (i = k = 0; i < n; ++i)
|
||||
if (r[i].parent == i || r[i].parent < 0) r[k++] = r[i]; // NB: r[i].parent may be -1 if its parent has been filtered
|
||||
if (r[i].parent == i) r[k++] = r[i];
|
||||
else if (r[i].score >= r[r[i].parent].score * pri_ratio && n_2nd++ < best_n) r[k++] = r[i];
|
||||
else if (r[i].p) free(r[i].p);
|
||||
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
|
||||
|
|
@ -175,7 +168,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int
|
|||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs)
|
||||
{ // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out
|
||||
int i, k;
|
||||
for (i = 0; i < *n_regs; ++i) {
|
||||
for (i = k = 0; i < *n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
int flt = 0;
|
||||
if (r->cnt < opt->min_cnt) flt = 1;
|
||||
|
|
@ -188,15 +181,7 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
|
|||
else if (r->p->dp_max < opt->min_dp_max) flt = 1;
|
||||
if (flt) free(r->p);
|
||||
}
|
||||
if (flt) r->cnt = 0;
|
||||
}
|
||||
for (i = 0; i < *n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent >= 0 && r->id != r->parent && regs[r->parent].cnt == 0)
|
||||
r->cnt = 0;
|
||||
}
|
||||
for (i = k = 0; i < *n_regs; ++i) {
|
||||
if (regs[i].cnt) {
|
||||
if (!flt) {
|
||||
if (k < i) regs[k++] = regs[i];
|
||||
else ++k;
|
||||
}
|
||||
|
|
|
|||
2
main.c
2
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r146-pre"
|
||||
#define MM_VERSION "2.0-r148-pre"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
|
|||
2
map.c
2
map.c
|
|
@ -247,7 +247,7 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
if (opt->flag & MM_F_CIGAR) {
|
||||
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
|
||||
if (!(opt->flag & MM_F_AVA)) {
|
||||
mm_update_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs);
|
||||
}
|
||||
} else mm_filter_regs(b->km, opt, n_regs, regs);
|
||||
|
|
|
|||
18
mmpriv.h
18
mmpriv.h
|
|
@ -40,30 +40,14 @@ mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a);
|
|||
void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
|
||||
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
||||
{
|
||||
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
|
||||
r->rev = a[k].x>>63;
|
||||
r->rid = a[k].x<<1>>33;
|
||||
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
|
||||
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
||||
if (!r->rev) {
|
||||
r->qs = (int32_t)a[k].y + 1 - q_span;
|
||||
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
||||
} else {
|
||||
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
||||
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue