r168: fixed a bug in long join: a[] not sorted

Also added length requirement for long join and changed -g in the ava mode
This commit is contained in:
Heng Li 2017-07-09 12:14:20 -04:00
parent 1ac48556ae
commit 782449975d
3 changed files with 28 additions and 10 deletions

28
chain.c
View File

@ -23,8 +23,8 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
{ // TODO: make sure this works when n has more than 32 bits
int32_t st = 0, j, k, *f, *p, *t, *v, n_u, n_v;
int64_t i;
uint64_t *u;
mm128_t *b;
uint64_t *u, *u2;
mm128_t *b, *w;
if (_u) *_u = 0;
f = (int32_t*)kmalloc(km, n * 4);
@ -99,19 +99,35 @@ int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int
}
if (k0 == k) n_v = n_v0; // no new chain added, reset
}
n_u = k, *_u = u;
n_u = k, *_u = u; // NB: note that u[] may not be sorted by score here
// free
kfree(km, f); kfree(km, p); kfree(km, t);
// write the result to _a_
// write the result to b[]
b = (mm128_t*)kmalloc(km, n_v * sizeof(mm128_t));
for (i = 0, k = 0; i < n_u; ++i) {
int32_t k0 = k, ni = (int32_t)u[i];
for (j = 0; j < ni; ++j)
b[k] = a[v[k0 + (ni - j - 1)]], ++k;
}
memcpy(a, b, n_v * sizeof(mm128_t));
kfree(km, v); kfree(km, b);
kfree(km, v);
// sort u[] and a[] by a[].x, such that adjacent chains may be joined (required by mm_join_long)
w = (mm128_t*)kmalloc(km, n_u * sizeof(mm128_t));
for (i = k = 0; i < n_u; ++i) {
w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
k += (int32_t)u[i];
}
radix_sort_128x(w, w + n_u);
u2 = (uint64_t*)kmalloc(km, n_u * 8);
for (i = k = 0; i < n_u; ++i) {
int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
u2[i] = u[j];
memcpy(&a[k], &b[w[i].y>>32], n * sizeof(mm128_t));
k += n;
}
memcpy(u, u2, n_u * 8);
kfree(km, b); kfree(km, w); kfree(km, u2);
return n_u;
}

4
hit.c
View File

@ -247,8 +247,10 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_r
max_gap = max_gap > a1s->x - a0e->x? max_gap : a1s->x - a0e->x;
min_gap = min_gap < a1s->x - a0e->x? min_gap : a1s->x - a0e->x;
if (max_gap > opt->max_join_long || min_gap > opt->max_join_short) continue;
sc_thres = (int)((float)opt->min_join_flank_sc / opt->max_join_long * max_gap + .499); // TODO: also require length
sc_thres = (int)((float)opt->min_join_flank_sc / opt->max_join_long * max_gap + .499);
if (r0->score < sc_thres || r1->score < sc_thres) continue; // require good flanking chains
if (r0->re - r0->rs < max_gap>>1 || r0->qe - r0->qs < max_gap>>1) continue; // require enough flanking length
if (r1->re - r1->rs < max_gap>>1 || r1->qe - r1->qs < max_gap>>1) continue;
// all conditions satisfied; join
a[r1->as].y |= 1ULL<<40;

6
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r167-pre"
#define MM_VERSION "2.0-r168-pre"
void liftrlimit()
{
@ -120,7 +120,7 @@ int main(int argc, char *argv[])
} else if (c == 'x') {
if (strcmp(optarg, "ava10k") == 0) {
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f;
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f, opt.max_gap = 10000;
is_hpc = 1, k = 19, w = 5;
} else if (strcmp(optarg, "map10k") == 0) {
is_hpc = 1, k = 19;
@ -155,7 +155,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 -g10000 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
fprintf(stderr, " Alignment:\n");