From 98a999fe44b9e65a14f194762d9d3c784ca1ced9 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Fri, 8 Dec 2017 12:57:57 -0500 Subject: [PATCH] r611: added pseudocount when est divergence --- esterr.c | 15 +++++++++------ hit.c | 1 + main.c | 2 +- map.c | 2 +- mmpriv.h | 2 +- 5 files changed, 13 insertions(+), 9 deletions(-) diff --git a/esterr.c b/esterr.c index c4fefde..e3b6e85 100644 --- a/esterr.c +++ b/esterr.c @@ -27,7 +27,7 @@ static int get_mini_idx(int qlen, const mm128_t *a, int32_t n, const uint64_t *m return -1; } -void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos) +void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos) { int i; uint64_t sum_k = 0; @@ -40,18 +40,21 @@ void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t for (i = 0; i < n_regs; ++i) { mm_reg1_t *r = ®s[i]; - int32_t st, en, j, k, n_mis = 0; + int32_t st, en, j, k, n_match, n_tot, l_ref; r->div = -1.0f; if (r->cnt == 0) continue; st = en = get_mini_idx(qlen, r->rev? &a[r->as + r->cnt - 1] : &a[r->as], n, mini_pos); assert(st >= 0); - for (k = 1, j = st + 1; j < n && k < r->cnt; ++j) { + l_ref = mi->seq[r->rid].len; + for (k = 1, j = st + 1, n_match = 1; j < n && k < r->cnt; ++j) { int32_t x; x = get_for_qpos(qlen, r->rev? &a[r->as + r->cnt - 1 - k] : &a[r->as + k]); if (x == (int32_t)mini_pos[j]) - mini_pos[j] |= 1ULL<<40, ++k, en = j; - else mini_pos[j] &= ~(1ULL<<40), ++n_mis; + ++k, en = j, ++n_match; } - r->div = -logf((float)(en - st + 1 - n_mis) / (en - st + 1)) / avg_k; + n_tot = en - st + 1; + if (r->qs > avg_k && r->rs > avg_k) ++n_tot; + if (qlen - r->qs > avg_k && l_ref - r->re > avg_k) ++n_tot; + r->div = logf((float)n_tot / n_match) / avg_k; } } diff --git a/hit.c b/hit.c index 2312eaa..b30337d 100644 --- a/hit.c +++ b/hit.c @@ -80,6 +80,7 @@ mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u, ri->hash = (uint32_t)z[i].x; ri->cnt = (int32_t)z[i].y; ri->as = z[i].y >> 32; + ri->div = -1.0f; mm_reg_set_coor(ri, qlen, a); } kfree(km, z); diff --git a/main.c b/main.c index cbd9f73..926a68c 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.5-r610-dirty" +#define MM_VERSION "2.5-r611-dirty" #ifdef __linux__ #include diff --git a/map.c b/map.c index 0875ddb..ac21564 100644 --- a/map.c +++ b/map.c @@ -348,7 +348,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x)); chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a); - if (!is_sr) mm_est_err(qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos); + if (!is_sr) mm_est_err(mi, qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos); if (n_segs == 1) { // uni-segment regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a); diff --git a/mmpriv.h b/mmpriv.h index 9ec080e..cf474a3 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -79,7 +79,7 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_re void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r); void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr); -void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos); +void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos); mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a); void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);