r1116: cut long chains at weak points

This commit is contained in:
Heng Li 2021-11-20 19:07:44 -05:00
parent db37fc43a7
commit fcaadc22b7
2 changed files with 19 additions and 16 deletions

View File

@ -6,23 +6,25 @@
#include "kalloc.h" #include "kalloc.h"
#include "krmq.h" #include "krmq.h"
static int64_t mg_chain_bk_end(const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k) static int64_t mg_chain_bk_end(int32_t max_drop, const mm128_t *z, const int32_t *f, const int64_t *p, int32_t *t, int64_t k)
{ {
int64_t i = z[k].y, end_i = -1; int64_t i = z[k].y, end_i = -1, max_i = i;
int32_t max_s = 0;
if (i < 0 || t[i] != 0) return i; if (i < 0 || t[i] != 0) return i;
while (1) { do {
int32_t s; int32_t s;
t[i] = 2; t[i] = 2;
end_i = i = p[i]; end_i = i = p[i];
s = i < 0? z[k].y : (int32_t)z[k].x - f[i]; s = i < 0? z[k].y : (int32_t)z[k].x - f[i];
if (i < 0 || t[i] != 0) break; if (s > max_s) max_s = s, max_i = i;
} else if (max_s - s > max_drop) break;
} while (i >= 0 && t[i] == 0);
for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[] for (i = z[k].y; i >= 0 && i != end_i; i = p[i]) // reset modified t[]
t[i] = 0; t[i] = 0;
return end_i; return max_i;
} }
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_) uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t max_drop, int32_t *n_u_, int32_t *n_v_)
{ {
mm128_t *z; mm128_t *z;
uint64_t *u; uint64_t *u;
@ -40,10 +42,10 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
memset(t, 0, n * 4); memset(t, 0, n * 4);
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // precompute n_u
if (t[z[k].y] != 0) { if (t[z[k].y] == 0) {
int64_t n_v0 = n_v, end_i; int64_t n_v0 = n_v, end_i;
int32_t sc; int32_t sc;
end_i = mg_chain_bk_end(z, f, p, t, k); end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
for (i = z[k].y; i != end_i; i = p[i]) for (i = z[k].y; i != end_i; i = p[i])
++n_v, t[i] = 1; ++n_v, t[i] = 1;
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i]; sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
@ -55,10 +57,10 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
KMALLOC(km, u, n_u); KMALLOC(km, u, n_u);
memset(t, 0, n * 4); memset(t, 0, n * 4);
for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[] for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
if (t[z[k].y] != 0) { if (t[z[k].y] == 0) {
int64_t n_v0 = n_v, end_i; int64_t n_v0 = n_v, end_i;
int32_t sc; int32_t sc;
end_i = mg_chain_bk_end(z, f, p, t, k); end_i = mg_chain_bk_end(max_drop, z, f, p, t, k);
for (i = z[k].y; i != end_i; i = p[i]) for (i = z[k].y; i != end_i; i = p[i])
v[n_v++] = i, t[i] = 1; v[n_v++] = i, t[i] = 1;
sc = i < 0? z[k].x : (int32_t)z[k].x - f[i]; sc = i < 0? z[k].x : (int32_t)z[k].x - f[i];
@ -146,7 +148,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int max_iter, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km) int is_cdna, int n_seg, int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
{ // TODO: make sure this works when n has more than 32 bits { // TODO: make sure this works when n has more than 32 bits
int32_t *f, *t, *v, n_u, n_v, mmax_f = 0; int32_t *f, *t, *v, n_u, n_v, mmax_f = 0, max_drop = bw;
int64_t *p, i, j, max_ii, st = 0, n_iter = 0; int64_t *p, i, j, max_ii, st = 0, n_iter = 0;
uint64_t *u; uint64_t *u;
@ -157,6 +159,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
} }
if (max_dist_x < bw) max_dist_x = bw; if (max_dist_x < bw) max_dist_x = bw;
if (max_dist_y < bw && !is_cdna) max_dist_y = bw; if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
if (is_cdna) max_drop = INT32_MAX;
KMALLOC(km, p, n); KMALLOC(km, p, n);
KMALLOC(km, f, n); KMALLOC(km, f, n);
KMALLOC(km, v, n); KMALLOC(km, v, n);
@ -203,7 +206,7 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
if (mmax_f < max_f) mmax_f = max_f; if (mmax_f < max_f) mmax_f = max_f;
} }
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v); u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
kfree(km, p); kfree(km, f); kfree(km, t); kfree(km, p); kfree(km, f); kfree(km, t);
if (n_u == 0) { if (n_u == 0) {
@ -247,7 +250,7 @@ static inline int32_t comput_sc_simple(const mm128_t *ai, const mm128_t *aj, flo
mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip, mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_skip, int cap_rmq_size, int min_cnt, int min_sc, float chn_pen_gap, float chn_pen_skip,
int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km) int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
{ {
int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0; int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0; int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
uint64_t *u; uint64_t *u;
lc_elem_t *root = 0, *root_inner = 0; lc_elem_t *root = 0, *root_inner = 0;
@ -355,7 +358,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
} }
km_destroy(mem_mp); km_destroy(mem_mp);
u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, &n_u, &n_v); u = mg_chain_backtrack(km, n, f, p, v, t, min_cnt, min_sc, max_drop, &n_u, &n_v);
*n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here *n_u_ = n_u, *_u = u; // NB: note that u[] may not be sorted by score here
kfree(km, p); kfree(km, f); kfree(km, t); kfree(km, p); kfree(km, f); kfree(km, t);
if (n_u == 0) { if (n_u == 0) {

2
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "ketopt.h" #include "ketopt.h"
#define MM_VERSION "2.23-r1115-dirty" #define MM_VERSION "2.23-r1116-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>