From 4cd456b9ba31de902061d8d909579593b29341e8 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 29 Jun 2017 19:44:11 -0400 Subject: [PATCH] r108: refactoring, move reg1 routines to hit.c --- align.c | 36 ++---------------------------------- format.c | 10 +++++----- hit.c | 17 +++++++++++++++++ main.c | 2 +- map.c | 4 ++-- mmpriv.h | 5 +++-- 6 files changed, 30 insertions(+), 44 deletions(-) diff --git a/align.c b/align.c index 7c31e5a..b84e361 100644 --- a/align.c +++ b/align.c @@ -26,23 +26,6 @@ static inline void mm_seq_rev(uint32_t len, uint8_t *seq) t = seq[i], seq[i] = seq[len - 1 - i], seq[len - 1 - i] = t; } -static void mm_reg_split(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a) -{ - if (n <= 0 || n >= r->cnt) return; - *r2 = *r; - r2->id = -1; - r2->p = 0; - r2->cnt = r->cnt - n; - r2->score = (int32_t)(r->score * ((float)r2->cnt / r->cnt) + .499); - r2->as = r->as + n; - r2->parent = -2; - mm_reg_set_coor(r2, qlen, a); - r->cnt -= r2->cnt; - r->score -= r2->score; - mm_reg_set_coor(r, qlen, a); - r->split = r2->split = 1; -} - static inline int test_zdrop_aux(int32_t score, int i, int j, int32_t *max, int *max_i, int *max_j, int e, int zdrop) { if (score < *max) { @@ -250,7 +233,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int re1 = rs + (ez->max_t + 1); qe1 = qs + (ez->max_q + 1); if (r->cnt - (j + 1) >= opt->min_cnt) { - mm_reg_split(r, r2, j + 1, qlen, a); + mm_split_reg(r, r2, j + 1, qlen, a); if (j + 1 < opt->min_cnt) r2->id = r->id, r->id = -1; } @@ -325,22 +308,7 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m else if (i < r) regs[i++] = regs[r]; // NB: this also move the regs[r].p pointer else ++i; } - - // remap mm_reg1_t::{id,parent} - if (i < n_regs || n_regs != *n_regs_) { // there are region splits or drops - int32_t *id; - n_regs = i; - id = (int32_t*)kmalloc(km, *n_regs_ * 4); - for (r = 0; r < *n_regs_; ++r) id[r] = -1; - for (r = 0; r < n_regs; ++r) { - if (regs[r].id >= 0) id[regs[r].id] = r; - regs[r].id = r; - } - for (r = 0; r < n_regs; ++r) - if (regs[r].parent >= 0) - regs[r].parent = id[regs[r].parent]; - kfree(km, id); - } *n_regs_ = n_regs; + mm_sync_regs(km, n_regs, regs); return regs; } diff --git a/format.c b/format.c index 33a681e..0b861b0 100644 --- a/format.c +++ b/format.c @@ -54,7 +54,7 @@ static void mm_sprintf_lite(kstring_t *s, const char *fmt, ...) s->s[s->l] = 0; } -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r) +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, mm_reg1_t *r) { s->l = 0; mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]); @@ -65,7 +65,7 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_re else mm_sprintf_lite(s, "\t%d\t%d", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs); mm_sprintf_lite(s, "\t%d\tcm:i:%d", r->mapq, r->cnt); if (r->p) mm_sprintf_lite(s, "\ts1:i:%d", r->score); - if (r->parent == which) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); + if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); if (r->p) { uint32_t k; mm_sprintf_lite(s, "\tNM:i:%d\tAS:i:%d\tnn:i:%d\tcg:Z:", r->p->n_diff, r->p->score, r->p->n_ambi); @@ -98,12 +98,12 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp) } else str_copy(s, seq, seq + l); } -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r) +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, mm_reg1_t *r) { int flag = 0; s->l = 0; if (r->rev) flag |= 0x10; - if (r->parent != which) flag |= 0x80; + if (r->parent != r->id) flag |= 0x80; mm_sprintf_lite(s, "%s\t%d\t%s\t%d\t%d\t", t->name, flag, mi->seq[r->rid].name, r->rs+1, r->mapq); if (r->p) { // TODO: using hard clippings uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs; @@ -117,6 +117,6 @@ void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_re sam_write_sq(s, t->seq, t->l_seq, r->rev, r->rev); mm_sprintf_lite(s, "\t*\tcm:i:%d", r->cnt); if (r->p) mm_sprintf_lite(s, "\ts1:i:%d", r->score); - if (r->parent == which) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); + if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->score, r->p->n_ambi); } diff --git a/hit.c b/hit.c index 2df04a4..d3408b8 100644 --- a/hit.c +++ b/hit.c @@ -22,6 +22,23 @@ mm_reg1_t *mm_gen_regs(int qlen, int n_u, uint64_t *u, mm128_t *a) // convert ch return r; } +void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a) +{ + if (n <= 0 || n >= r->cnt) return; + *r2 = *r; + r2->id = -1; + r2->p = 0; + r2->cnt = r->cnt - n; + r2->score = (int32_t)(r->score * ((float)r2->cnt / r->cnt) + .499); + r2->as = r->as + n; + if (r->parent == r->id) r2->parent = MM_PARENT_TMP_PRI; + mm_reg_set_coor(r2, qlen, a); + r->cnt -= r2->cnt; + r->score -= r2->score; + mm_reg_set_coor(r, qlen, a); + r->split = r2->split = 1; +} + void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compute mm_reg1_t::subsc { int i, j, k, *w; diff --git a/main.c b/main.c index 725f39c..c52dd06 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r105-pre" +#define MM_VERSION "2.0-r108-pre" void liftrlimit() { diff --git a/map.c b/map.c index 713ddff..a19d53e 100644 --- a/map.c +++ b/map.c @@ -322,8 +322,8 @@ static void *worker_pipeline(void *shared, int step, void *in) for (j = 0; j < s->n_reg[i]; ++j) { mm_reg1_t *r = &s->reg[i][j]; if (r->cnt == 0) continue; - if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, j, r); - else mm_write_paf(&p->str, mi, t, j, r); + if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r); + else mm_write_paf(&p->str, mi, t, r); puts(p->str.s); free(r->p); } diff --git a/mmpriv.h b/mmpriv.h index 5b6ec57..40397be 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -31,12 +31,13 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end); void radix_sort_64(uint64_t *beg, uint64_t *end); uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r); -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r); +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, mm_reg1_t *r); +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, mm_reg1_t *r); int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km); mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a); mm_reg1_t *mm_gen_regs(int qlen, int n_u, uint64_t *u, mm128_t *a); +void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a); void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r); void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a);