r126: filter by fraction of seed coverage
otherwise we may get too many poor overlap mappings.
This commit is contained in:
parent
d73bb28097
commit
426c2975f6
16
align.c
16
align.c
|
|
@ -304,21 +304,9 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
++n_regs;
|
||||
}
|
||||
}
|
||||
*n_regs_ = n_regs;
|
||||
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
||||
kfree(km, ez.cigar);
|
||||
|
||||
// filter
|
||||
for (r = i = 0; r < n_regs; ++r) {
|
||||
mm_reg1_t *reg = ®s[r];
|
||||
int flt = 0;
|
||||
if (reg->cnt < opt->min_cnt) flt = 1;
|
||||
else if (reg->p->blen - reg->p->n_ambi - reg->p->n_diff < opt->min_chain_score) flt = 1;
|
||||
else if (reg->p->dp_max < opt->min_dp_max) flt = 1;
|
||||
if (flt) free(reg->p);
|
||||
else if (i < r) regs[i++] = regs[r]; // NB: this also move the regs[r].p pointer
|
||||
else ++i;
|
||||
}
|
||||
*n_regs_ = i;
|
||||
mm_sync_regs(km, i, regs);
|
||||
mm_filter_regs(km, opt, n_regs_, regs);
|
||||
return regs;
|
||||
}
|
||||
|
|
|
|||
25
hit.c
25
hit.c
|
|
@ -147,6 +147,31 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1
|
|||
}
|
||||
}
|
||||
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs)
|
||||
{
|
||||
int i, k;
|
||||
for (i = k = 0; i < *n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
int flt = 0;
|
||||
if (r->cnt < opt->min_cnt) flt = 1;
|
||||
else {
|
||||
int blen = r->qe - r->qs < r->re - r->rs? r->qe - r->qs : r->re - r->rs;
|
||||
if (r->score < blen * opt->min_seedcov_ratio) flt = 1;
|
||||
}
|
||||
if (r->p) {
|
||||
if (r->p->blen - r->p->n_ambi - r->p->n_diff < opt->min_chain_score) flt = 1;
|
||||
else if (r->p->dp_max < opt->min_dp_max) flt = 1;
|
||||
if (flt) free(r->p);
|
||||
}
|
||||
if (!flt) {
|
||||
if (k < i) regs[k++] = regs[i];
|
||||
else ++k;
|
||||
}
|
||||
}
|
||||
*n_regs = k;
|
||||
mm_sync_regs(km, k, regs);
|
||||
}
|
||||
|
||||
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
{ // squeeze out regions in a[] that are not referenced by regs[]
|
||||
int i, as = 0;
|
||||
|
|
|
|||
16
main.c
16
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r125-pre"
|
||||
#define MM_VERSION "2.0-r126-pre"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -68,7 +68,7 @@ int main(int argc, char *argv[])
|
|||
mm_realtime0 = realtime();
|
||||
mm_mapopt_init(&opt);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "bw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:", long_options, &long_idx)) >= 0) {
|
||||
while ((c = getopt_long(argc, argv, "bw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:", long_options, &long_idx)) >= 0) {
|
||||
if (c == 'w') w = atoi(optarg);
|
||||
else if (c == 'k') k = atoi(optarg);
|
||||
else if (c == 'H') is_hpc = 1;
|
||||
|
|
@ -79,6 +79,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'v') mm_verbose = atoi(optarg);
|
||||
else if (c == 'g') opt.max_gap = atoi(optarg);
|
||||
else if (c == 'p') opt.pri_ratio = atof(optarg);
|
||||
else if (c == 'D') opt.min_seedcov_ratio = atof(optarg);
|
||||
else if (c == 'M') opt.mask_level = atof(optarg);
|
||||
else if (c == 'c') opt.flag |= MM_F_CIGAR;
|
||||
else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
|
|
@ -109,7 +110,7 @@ int main(int argc, char *argv[])
|
|||
} else if (c == 'x') {
|
||||
if (strcmp(optarg, "ava10k") == 0) {
|
||||
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
opt.min_chain_score = 100, opt.pri_ratio = 0.0f;
|
||||
opt.min_chain_score = 100, opt.pri_ratio = 0.0f, opt.min_seedcov_ratio = 0.05f;
|
||||
is_hpc = 1, k = 19, w = 5;
|
||||
} else if (strcmp(optarg, "map10k") == 0) {
|
||||
is_hpc = 1, k = 19;
|
||||
|
|
@ -140,11 +141,12 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
||||
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
||||
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n");
|
||||
fprintf(stderr, " -p FLOAT threshold to output a mapping [%g]\n", opt.pri_ratio);
|
||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||
fprintf(stderr, " -D FLOAT min fraction of seed matches [%g]\n", opt.min_seedcov_ratio);
|
||||
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
||||
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 (PacBio/ONT all-vs-all read mapping)\n");
|
||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
|
||||
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
|
||||
fprintf(stderr, " Alignment:\n");
|
||||
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
|
||||
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
|
||||
|
|
|
|||
5
map.c
5
map.c
|
|
@ -20,6 +20,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->bw = 1000;
|
||||
opt->max_gap = 10000;
|
||||
opt->max_chain_skip = 15;
|
||||
opt->min_seedcov_ratio = 0.0f;
|
||||
|
||||
opt->pri_ratio = 2.0f;
|
||||
opt->mask_level = 0.5f;
|
||||
|
|
@ -243,12 +244,12 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
if (opt->flag & MM_F_CIGAR) {
|
||||
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a);
|
||||
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
|
||||
if (!(opt->flag & MM_F_AVA)) {
|
||||
mm_update_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs);
|
||||
}
|
||||
}
|
||||
} else mm_filter_regs(b->km, opt, n_regs, regs);
|
||||
mm_set_mapq(*n_regs, regs);
|
||||
|
||||
// free
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ typedef struct {
|
|||
int max_chain_skip;
|
||||
int min_cnt;
|
||||
int min_chain_score;
|
||||
float min_seedcov_ratio;
|
||||
|
||||
float pri_ratio;
|
||||
float mask_level;
|
||||
|
|
|
|||
1
mmpriv.h
1
mmpriv.h
|
|
@ -42,6 +42,7 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
|
|||
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue