start to work on alignment
This commit is contained in:
parent
c6d5dea314
commit
44cdd18de0
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@ CC= gcc
|
|||
CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
|
||||
CPPFLAGS=
|
||||
INCLUDES= -I.
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o sdust.o index.o map.o
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o index.o map.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
|
|||
|
|
@ -0,0 +1,38 @@
|
|||
#include <assert.h>
|
||||
#include "minimap.h"
|
||||
#include "ksw2.h"
|
||||
|
||||
void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
extern unsigned char seq_nt4_table[256];
|
||||
int i, reg;
|
||||
uint8_t *qseq0[2];
|
||||
|
||||
qseq0[0] = (uint8_t*)kmalloc(km, qlen);
|
||||
qseq0[1] = (uint8_t*)kmalloc(km, qlen);
|
||||
for (i = 0; i < qlen; ++i) {
|
||||
qseq0[0][i] = seq_nt4_table[(uint8_t)qstr[i]];
|
||||
qseq0[1][qlen - 1 - i] = qseq0[0][i] < 4? 3 - qseq0[0][i] : 4;
|
||||
}
|
||||
|
||||
for (reg = 0; reg < n_regs; ++reg) {
|
||||
mm_reg1_t *r = ®s[reg];
|
||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63;
|
||||
uint8_t *tseq;
|
||||
tseq = (uint8_t*)kmalloc(km, r->re - r->rs);
|
||||
for (i = 0; i < r->cnt - 1; ++i) {
|
||||
uint8_t *qseq;
|
||||
int32_t rs, re, qs, qe, ret;
|
||||
rs = (int32_t)a[r->as + i].x + 1;
|
||||
re = (int32_t)a[r->as + i + 1].x + 1;
|
||||
qs = (int32_t)a[r->as + i].y + 1;
|
||||
qe = (int32_t)a[r->as + i + 1].y + 1;
|
||||
qseq = &qseq0[rev][qs];
|
||||
ret = mm_idx_getseq(mi, rid, rs, re, tseq);
|
||||
assert(ret > 0);
|
||||
}
|
||||
kfree(km, tseq);
|
||||
}
|
||||
|
||||
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
||||
}
|
||||
12
index.c
12
index.c
|
|
@ -82,6 +82,18 @@ void mm_idx_stat(const mm_idx_t *mi)
|
|||
__func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), n, 100.0*n1/n, (double)sum / n, (double)len / sum);
|
||||
}
|
||||
|
||||
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq)
|
||||
{
|
||||
uint64_t i, st1, en1;
|
||||
if (rid >= mi->n_seq || st >= mi->seq[rid].len) return -1;
|
||||
if (en > mi->seq[rid].len) en = mi->seq[rid].len;
|
||||
st1 = mi->seq[rid].offset + st;
|
||||
en1 = mi->seq[rid].offset + en;
|
||||
for (i = st1; i < en1; ++i)
|
||||
seq[i - st1] = mm_seq4_get(mi->S, i);
|
||||
return en - st;
|
||||
}
|
||||
|
||||
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f)
|
||||
{
|
||||
int i;
|
||||
|
|
|
|||
16
map.c
16
map.c
|
|
@ -11,7 +11,6 @@
|
|||
void mm_mapopt_init(mm_mapopt_t *opt)
|
||||
{
|
||||
memset(opt, 0, sizeof(mm_mapopt_t));
|
||||
opt->n_frag_mini = 100000;
|
||||
opt->max_occ_frac = 1e-5f;
|
||||
opt->mid_occ_frac = 2e-4f;
|
||||
opt->sdust_thres = 0;
|
||||
|
|
@ -176,6 +175,7 @@ mm_reg1_t *mm_gen_reg(int qlen, int n_u, uint64_t *u, mm128_t *a)
|
|||
ri->qs = qlen - ((int32_t)a[k + ri->cnt - 1].y + 1);
|
||||
ri->qe = qlen - ((int32_t)a[k].y + 1 - (int32_t)(a[k].y>>32));
|
||||
}
|
||||
ri->as = k;
|
||||
k += ri->cnt;
|
||||
}
|
||||
return r;
|
||||
|
|
@ -224,13 +224,13 @@ void mm_select_sub(float mask_level, float pri_ratio, int *n_, mm_reg1_t *r, voi
|
|||
}
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en, const char *qname, int qlen, int *n_regs)
|
||||
mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b, uint32_t m_st, uint32_t m_en, const char *qname, int qlen, const char *seq, int *n_regs)
|
||||
{
|
||||
int i, n = m_en - m_st, j, n_a, n_u;
|
||||
uint64_t *u;
|
||||
mm_match_t *m;
|
||||
mm128_t *a;
|
||||
mm_reg1_t *reg;
|
||||
mm_reg1_t *regs;
|
||||
|
||||
// convert to local representation
|
||||
m = (mm_match_t*)kmalloc(b->km, n * sizeof(mm_match_t));
|
||||
|
|
@ -294,13 +294,15 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
//for (i = 0; i < n_a; ++i) printf("%c\t%s\t%d\t%d\n", "+-"[a[i].x>>63], mi->seq[a[i].x<<1>>33].name, (uint32_t)a[i].x, (uint32_t)a[i].y);
|
||||
|
||||
n_u = mm_chain_dp(opt->max_gap, opt->bw, opt->max_skip, opt->min_score, n_a, a, &u, b->km);
|
||||
reg = mm_gen_reg(qlen, n_u, u, a);
|
||||
regs = mm_gen_reg(qlen, n_u, u, a);
|
||||
*n_regs = n_u;
|
||||
mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
|
||||
mm_align_skeleton(b->km, opt, mi, qlen, seq, *n_regs, regs, a);
|
||||
|
||||
// free
|
||||
kfree(b->km, a);
|
||||
kfree(b->km, u);
|
||||
return reg;
|
||||
return regs;
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
|
|
@ -310,8 +312,8 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, m
|
|||
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
|
||||
if (opt->sdust_thres > 0)
|
||||
mm_dust_minier(&b->mini, l_seq, seq, opt->sdust_thres, b->sdb);
|
||||
regs = mm_map_frag(opt, mi, b, 0, b->mini.n, qname, l_seq, n_regs);
|
||||
mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
|
||||
regs = mm_map_frag(opt, mi, b, 0, b->mini.n, qname, l_seq, seq, n_regs);
|
||||
// mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
|
||||
return regs;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
char *name; // name of the db sequence
|
||||
uint64_t offset; // offset in mm_idx_t::seq16
|
||||
uint64_t offset; // offset in mm_idx_t::S
|
||||
uint32_t len; // length
|
||||
} mm_idx_seq_t;
|
||||
|
||||
|
|
@ -47,10 +47,10 @@ typedef struct {
|
|||
int32_t score;
|
||||
int32_t qs, qe, rs, re;
|
||||
int32_t parent, subsc;
|
||||
int32_t as;
|
||||
} mm_reg1_t;
|
||||
|
||||
typedef struct {
|
||||
int n_frag_mini;
|
||||
float max_occ_frac;
|
||||
float mid_occ_frac;
|
||||
int sdust_thres; // score threshold for SDUST; 0 to disable
|
||||
|
|
@ -91,6 +91,7 @@ mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, in
|
|||
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
||||
void mm_idx_stat(const mm_idx_t *idx);
|
||||
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
||||
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
||||
|
||||
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);
|
||||
|
||||
|
|
|
|||
2
mmpriv.h
2
mmpriv.h
|
|
@ -16,6 +16,8 @@ uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
|
|||
|
||||
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_sc, int n, mm128_t *a, uint64_t **_u, void *km);
|
||||
|
||||
void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue