r611: added pseudocount when est divergence

This commit is contained in:
Heng Li 2017-12-08 12:57:57 -05:00
parent fec7bd713f
commit 98a999fe44
5 changed files with 13 additions and 9 deletions

View File

@ -27,7 +27,7 @@ static int get_mini_idx(int qlen, const mm128_t *a, int32_t n, const uint64_t *m
return -1;
}
void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos)
void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos)
{
int i;
uint64_t sum_k = 0;
@ -40,18 +40,21 @@ void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t
for (i = 0; i < n_regs; ++i) {
mm_reg1_t *r = &regs[i];
int32_t st, en, j, k, n_mis = 0;
int32_t st, en, j, k, n_match, n_tot, l_ref;
r->div = -1.0f;
if (r->cnt == 0) continue;
st = en = get_mini_idx(qlen, r->rev? &a[r->as + r->cnt - 1] : &a[r->as], n, mini_pos);
assert(st >= 0);
for (k = 1, j = st + 1; j < n && k < r->cnt; ++j) {
l_ref = mi->seq[r->rid].len;
for (k = 1, j = st + 1, n_match = 1; j < n && k < r->cnt; ++j) {
int32_t x;
x = get_for_qpos(qlen, r->rev? &a[r->as + r->cnt - 1 - k] : &a[r->as + k]);
if (x == (int32_t)mini_pos[j])
mini_pos[j] |= 1ULL<<40, ++k, en = j;
else mini_pos[j] &= ~(1ULL<<40), ++n_mis;
++k, en = j, ++n_match;
}
r->div = -logf((float)(en - st + 1 - n_mis) / (en - st + 1)) / avg_k;
n_tot = en - st + 1;
if (r->qs > avg_k && r->rs > avg_k) ++n_tot;
if (qlen - r->qs > avg_k && l_ref - r->re > avg_k) ++n_tot;
r->div = logf((float)n_tot / n_match) / avg_k;
}
}

1
hit.c
View File

@ -80,6 +80,7 @@ mm_reg1_t *mm_gen_regs(void *km, uint32_t hash, int qlen, int n_u, uint64_t *u,
ri->hash = (uint32_t)z[i].x;
ri->cnt = (int32_t)z[i].y;
ri->as = z[i].y >> 32;
ri->div = -1.0f;
mm_reg_set_coor(ri, qlen, a);
}
kfree(km, z);

2
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.5-r610-dirty"
#define MM_VERSION "2.5-r611-dirty"
#ifdef __linux__
#include <sys/resource.h>

2
map.c
View File

@ -348,7 +348,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
if (!is_sr) mm_est_err(qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos);
if (!is_sr) mm_est_err(mi, qlen_sum, n_regs0, regs0, a, b->n_mini_pos, b->mini_pos);
if (n_segs == 1) { // uni-segment
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], quals? quals[0] : 0, &n_regs0, regs0, a);

View File

@ -79,7 +79,7 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_re
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc, int match_sc, int rep_len, int is_sr);
void mm_est_err(int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, uint64_t *mini_pos);
void mm_est_err(const mm_idx_t *mi, int qlen, int n_regs, mm_reg1_t *regs, const mm128_t *a, int32_t n, const uint64_t *mini_pos);
mm_seg_t *mm_seg_gen(void *km, uint32_t hash, int n_segs, const int *qlens, int n_regs0, const mm_reg1_t *regs0, int *n_regs, mm_reg1_t **regs, const mm128_t *a);
void mm_seg_free(void *km, int n_segs, mm_seg_t *segs);