code refactoring for mm_reg1_t
This commit is contained in:
parent
cc67f1b781
commit
ecedfe5788
3
Makefile
3
Makefile
|
|
@ -2,7 +2,7 @@ CC= gcc
|
|||
CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES= -I.
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o hit.o sdust.o \
|
||||
index.o format.o map.o ksw2_extz2_sse.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust
|
||||
|
|
@ -42,6 +42,7 @@ align.o: minimap.h mmpriv.h bseq.h ksw2.h
|
|||
bseq.o: bseq.h kseq.h
|
||||
chain.o: minimap.h mmpriv.h bseq.h kalloc.h
|
||||
format.o: mmpriv.h minimap.h bseq.h
|
||||
hit.o: mmpriv.h minimap.h bseq.h kalloc.h
|
||||
index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h
|
||||
kalloc.o: kalloc.h
|
||||
ksw2_extz2_sse.o: ksw2.h
|
||||
|
|
|
|||
|
|
@ -0,0 +1,156 @@
|
|||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "mmpriv.h"
|
||||
#include "kalloc.h"
|
||||
|
||||
mm_reg1_t *mm_gen_regs(int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits
|
||||
{
|
||||
mm_reg1_t *r;
|
||||
int i, k;
|
||||
r = (mm_reg1_t*)calloc(n_u, sizeof(mm_reg1_t));
|
||||
for (i = k = 0; i < n_u; ++i) {
|
||||
mm_reg1_t *ri = &r[i];
|
||||
ri->id = i;
|
||||
ri->parent = MM_PARENT_UNSET;
|
||||
ri->subsc = 0;
|
||||
ri->score = u[i]>>32;
|
||||
ri->cnt = (int32_t)u[i];
|
||||
ri->as = k;
|
||||
mm_reg_set_coor(ri, qlen, a);
|
||||
k += ri->cnt;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compute mm_reg1_t::subsc
|
||||
{
|
||||
int i, j, k, *w;
|
||||
if (n <= 0) return;
|
||||
w = (int*)kmalloc(km, n * sizeof(int));
|
||||
w[0] = 0, r[0].parent = 0;
|
||||
for (i = 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i, r[i].parent = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
}
|
||||
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id
|
||||
{
|
||||
int *tmp, i, max_id = -1, n_tmp;
|
||||
if (n_regs <= 0) return;
|
||||
for (i = 0; i < n_regs; ++i)
|
||||
max_id = max_id > regs[i].id? max_id : regs[i].id;
|
||||
n_tmp = max_id + 1;
|
||||
tmp = (int*)kmalloc(km, n_tmp * sizeof(int));
|
||||
for (i = 0; i < n_tmp; ++i) tmp[i] = -1;
|
||||
for (i = 0; i < n_regs; ++i)
|
||||
if (regs[i].id >= 0) tmp[regs[i].id] = i;
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
r->id = i;
|
||||
if (r->parent == MM_PARENT_TMP_PRI)
|
||||
r->parent = i;
|
||||
else if (r->parent >= 0 && tmp[r->parent] >= 0)
|
||||
r->parent = tmp[r->parent];
|
||||
else r->parent = MM_PARENT_UNSET;
|
||||
}
|
||||
kfree(km, tmp);
|
||||
}
|
||||
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r)
|
||||
{
|
||||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, n = *n_;
|
||||
mm_set_parent(km, mask_level, n, r);
|
||||
for (i = k = 0; i < n; ++i)
|
||||
if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio)
|
||||
r[k++] = r[i];
|
||||
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
|
||||
*n_ = k;
|
||||
}
|
||||
}
|
||||
|
||||
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{ // squeeze out regions in a[] that are not referenced by regs[]
|
||||
int i, n_aux, as = 0;
|
||||
uint64_t *aux;
|
||||
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||
for (i = n_aux = 0; i < n_regs; ++i)
|
||||
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->as != as) {
|
||||
memmove(&a[as], &a[r->as], r->cnt * 16);
|
||||
r->as = as;
|
||||
}
|
||||
as += r->cnt;
|
||||
}
|
||||
kfree(km, aux);
|
||||
return as;
|
||||
}
|
||||
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
int i, n_aux;
|
||||
uint64_t *aux;
|
||||
|
||||
if (n_regs < 2) return; // nothing to join
|
||||
mm_squeeze_a(km, n_regs, regs, a);
|
||||
|
||||
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||
for (i = n_aux = 0; i < n_regs; ++i)
|
||||
if (regs[i].parent == i || regs[i].parent < 0)
|
||||
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||
radix_sort_64(aux, aux + n_aux);
|
||||
|
||||
for (i = n_aux - 1; i >= 1; --i) {
|
||||
mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]];
|
||||
mm128_t *a0e, *a1s;
|
||||
int max_gap, min_gap;
|
||||
|
||||
// test
|
||||
if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[]
|
||||
if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand
|
||||
if (r0->score < opt->min_join_flank_sc || r1->score < opt->min_join_flank_sc) continue; // require good flanking chains
|
||||
a0e = &a[r0->as + r0->cnt - 1];
|
||||
a1s = &a[r1->as];
|
||||
if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity
|
||||
max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y;
|
||||
max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x;
|
||||
min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x;
|
||||
if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue;
|
||||
|
||||
// all conditions satisfied; join
|
||||
a[r1->as].y |= 1ULL<<40;
|
||||
r0->cnt += r1->cnt, r0->score += r1->score;
|
||||
mm_reg_set_coor(r0, qlen, a);
|
||||
r1->cnt = 0;
|
||||
}
|
||||
kfree(km, aux);
|
||||
}
|
||||
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs)
|
||||
{
|
||||
int i;
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent == r->id) {
|
||||
int mapq;
|
||||
mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score));
|
||||
mapq = mapq > 0? mapq : 0;
|
||||
r->mapq = mapq < 60? mapq : 60;
|
||||
} else r->mapq = 0;
|
||||
}
|
||||
}
|
||||
139
map.c
139
map.c
|
|
@ -1,6 +1,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include <assert.h>
|
||||
#include "kthread.h"
|
||||
#include "kvec.h"
|
||||
#include "kalloc.h"
|
||||
|
|
@ -164,137 +164,6 @@ void mm_pair_thin(mm_tbuf_t *b, int radius, mm_match_t *m1, mm_match_t *m2)
|
|||
// printf("%d,%d; %d,%d\n", m[0]->qpos>>1, m[1]->qpos>>1, m[0]->n, m[1]->n);
|
||||
}
|
||||
#endif
|
||||
mm_reg1_t *mm_gen_reg(int qlen, int n_u, uint64_t *u, mm128_t *a)
|
||||
{
|
||||
mm_reg1_t *r;
|
||||
int i, k;
|
||||
r = (mm_reg1_t*)calloc(n_u, sizeof(mm_reg1_t));
|
||||
for (i = k = 0; i < n_u; ++i) {
|
||||
mm_reg1_t *ri = &r[i];
|
||||
ri->id = i;
|
||||
ri->parent = -1, ri->subsc = 0;
|
||||
ri->score = u[i]>>32;
|
||||
ri->cnt = (int32_t)u[i];
|
||||
ri->as = k;
|
||||
mm_reg_set_coor(ri, qlen, a);
|
||||
k += ri->cnt;
|
||||
}
|
||||
return r;
|
||||
}
|
||||
|
||||
static void mm_set_subsc(float mask_level, int n, mm_reg1_t *r, void *km)
|
||||
{
|
||||
int i, j, k, *w;
|
||||
if (n <= 0) return;
|
||||
w = (int*)kmalloc(km, n * sizeof(int));
|
||||
w[0] = 0, r[0].parent = 0;
|
||||
for (i = 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i, r[i].parent = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
}
|
||||
|
||||
void mm_select_sub(float mask_level, float pri_ratio, int *n_, mm_reg1_t *r, void *km)
|
||||
{
|
||||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, *tmp, n = *n_;
|
||||
mm_set_subsc(mask_level, n, r, km);
|
||||
tmp = (int*)kmalloc(km, n * sizeof(int));
|
||||
for (i = 0; i < n; ++i) tmp[i] = -1;
|
||||
for (i = k = 0; i < n; ++i)
|
||||
if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio)
|
||||
tmp[i] = k, r[k++] = r[i];
|
||||
n = k;
|
||||
for (i = 0; i < n; ++i) // remap mm_reg1_t::parent, as some mappings may have been dropped
|
||||
if (tmp[r[i].parent] >= 0)
|
||||
r[i].parent = tmp[r[i].parent];
|
||||
kfree(km, tmp);
|
||||
*n_ = n;
|
||||
for (i = 0; i < n; ++i) r[i].id = i; // reset mm_reg1_t::id
|
||||
}
|
||||
}
|
||||
|
||||
static int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{ // squeeze out regions in a[] that are not referenced by regs[]
|
||||
int i, n_aux, as = 0;
|
||||
uint64_t *aux;
|
||||
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||
for (i = n_aux = 0; i < n_regs; ++i)
|
||||
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->as != as) {
|
||||
memmove(&a[as], &a[r->as], r->cnt * 16);
|
||||
r->as = as;
|
||||
}
|
||||
as += r->cnt;
|
||||
}
|
||||
kfree(km, aux);
|
||||
return as;
|
||||
}
|
||||
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
int i, n_aux;
|
||||
uint64_t *aux;
|
||||
|
||||
if (n_regs < 2) return; // nothing to join
|
||||
mm_squeeze_a(km, n_regs, regs, a);
|
||||
|
||||
aux = (uint64_t*)kmalloc(km, n_regs * 8);
|
||||
for (i = n_aux = 0; i < n_regs; ++i)
|
||||
if (regs[i].parent == i || regs[i].parent < 0)
|
||||
aux[n_aux++] = (uint64_t)regs[i].as << 32 | i;
|
||||
radix_sort_64(aux, aux + n_aux);
|
||||
|
||||
for (i = n_aux - 1; i >= 1; --i) {
|
||||
mm_reg1_t *r0 = ®s[(int32_t)aux[i-1]], *r1 = ®s[(int32_t)aux[i]];
|
||||
mm128_t *a0e, *a1s;
|
||||
int max_gap, min_gap;
|
||||
|
||||
// test
|
||||
if (r0->as + r0->cnt != r1->as) continue; // not adjacent in a[]
|
||||
if (r0->rid != r1->rid || r0->rev != r1->rev) continue; // make sure on the same target and strand
|
||||
if (r0->score < opt->min_join_flank_sc || r1->score < opt->min_join_flank_sc) continue; // require good flanking chains
|
||||
a0e = &a[r0->as + r0->cnt - 1];
|
||||
a1s = &a[r1->as];
|
||||
if (a1s->x <= a0e->x || (int32_t)a1s->y <= (int32_t)a0e->y) continue; // keep colinearity
|
||||
max_gap = min_gap = (int32_t)a1s->y - (int32_t)a0e->y;
|
||||
max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x;
|
||||
min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x;
|
||||
if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue;
|
||||
|
||||
// all conditions satisfied; join
|
||||
a[r1->as].y |= 1ULL<<40;
|
||||
r0->cnt += r1->cnt, r0->score += r1->score;
|
||||
mm_reg_set_coor(r0, qlen, a);
|
||||
r1->cnt = 0;
|
||||
}
|
||||
kfree(km, aux);
|
||||
}
|
||||
|
||||
void mm_set_mapq(mm_reg1_t *r, int which)
|
||||
{
|
||||
if (r->parent == which) {
|
||||
int mapq;
|
||||
mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score));
|
||||
mapq = mapq > 0? mapq : 0;
|
||||
r->mapq = mapq < 60? mapq : 60;
|
||||
} else r->mapq = 0;
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en, const char *qname, int qlen, const char *seq, int *n_regs)
|
||||
{
|
||||
int i, n = m_en - m_st, j, n_u;
|
||||
|
|
@ -366,15 +235,15 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
kfree(b->km, m);
|
||||
|
||||
n_u = mm_chain_dp(opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, n_a, a, &u, b->km);
|
||||
regs = mm_gen_reg(qlen, n_u, u, a);
|
||||
regs = mm_gen_regs(qlen, n_u, u, a);
|
||||
*n_regs = n_u;
|
||||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||
mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs);
|
||||
mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
if (opt->flag & MM_F_CIGAR)
|
||||
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a);
|
||||
for (i = 0; i < *n_regs; ++i) mm_set_mapq(®s[i], i);
|
||||
mm_set_mapq(*n_regs, regs);
|
||||
|
||||
// free
|
||||
kfree(b->km, a);
|
||||
|
|
|
|||
9
mmpriv.h
9
mmpriv.h
|
|
@ -5,6 +5,9 @@
|
|||
#include "minimap.h"
|
||||
#include "bseq.h"
|
||||
|
||||
#define MM_PARENT_UNSET (-1)
|
||||
#define MM_PARENT_TMP_PRI (-2)
|
||||
|
||||
#ifndef kroundup32
|
||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
|
@ -33,6 +36,12 @@ void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_re
|
|||
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km);
|
||||
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
||||
|
||||
mm_reg1_t *mm_gen_regs(int qlen, int n_u, uint64_t *u, mm128_t *a);
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue