r488: parameter to control max fragment length
This commit is contained in:
parent
f150257a0d
commit
61e56c941d
14
main.c
14
main.c
|
|
@ -6,7 +6,7 @@
|
||||||
#include "mmpriv.h"
|
#include "mmpriv.h"
|
||||||
#include "getopt.h"
|
#include "getopt.h"
|
||||||
|
|
||||||
#define MM_VERSION "2.2-r487-dirty"
|
#define MM_VERSION "2.2-r488-dirty"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
@ -64,7 +64,7 @@ static inline int64_t mm_parse_num(const char *str)
|
||||||
|
|
||||||
int main(int argc, char *argv[])
|
int main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
const char *opt_str = "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:h";
|
const char *opt_str = "aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:";
|
||||||
mm_mapopt_t opt;
|
mm_mapopt_t opt;
|
||||||
mm_idxopt_t ipt;
|
mm_idxopt_t ipt;
|
||||||
int i, c, n_threads = 3, long_idx, max_gap_ref = 0;
|
int i, c, n_threads = 3, long_idx, max_gap_ref = 0;
|
||||||
|
|
@ -98,6 +98,7 @@ int main(int argc, char *argv[])
|
||||||
else if (c == 'v') mm_verbose = atoi(optarg);
|
else if (c == 'v') mm_verbose = atoi(optarg);
|
||||||
else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg);
|
else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg);
|
||||||
else if (c == 'G') max_gap_ref = (int)mm_parse_num(optarg);
|
else if (c == 'G') max_gap_ref = (int)mm_parse_num(optarg);
|
||||||
|
else if (c == 'F') opt.max_frag_len = (int)mm_parse_num(optarg);
|
||||||
else if (c == 'N') opt.best_n = atoi(optarg);
|
else if (c == 'N') opt.best_n = atoi(optarg);
|
||||||
else if (c == 'p') opt.pri_ratio = atof(optarg);
|
else if (c == 'p') opt.pri_ratio = atof(optarg);
|
||||||
else if (c == 'M') opt.mask_level = atof(optarg);
|
else if (c == 'M') opt.mask_level = atof(optarg);
|
||||||
|
|
@ -180,10 +181,8 @@ int main(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (max_gap_ref > 0) {
|
if (max_gap_ref > 0) {
|
||||||
if (opt.flag & MM_F_FRAG_MODE)
|
opt.max_gap_ref = max_gap_ref;
|
||||||
opt.max_gap_ref = max_gap_ref;
|
if (opt.flag & MM_F_SPLICE) opt.bw = max_gap_ref; // in the splice mode, this also changes the bandwidth
|
||||||
if (opt.flag & MM_F_SPLICE)
|
|
||||||
opt.max_gap_ref = opt.bw = max_gap_ref;
|
|
||||||
}
|
}
|
||||||
if ((opt.flag & MM_F_OUT_SAM) && (opt.flag & MM_F_OUT_CS_LONG)) {
|
if ((opt.flag & MM_F_OUT_SAM) && (opt.flag & MM_F_OUT_CS_LONG)) {
|
||||||
opt.flag &= ~MM_F_OUT_CS_LONG;
|
opt.flag &= ~MM_F_OUT_CS_LONG;
|
||||||
|
|
@ -203,7 +202,8 @@ int main(int argc, char *argv[])
|
||||||
fprintf(fp_help, " Mapping:\n");
|
fprintf(fp_help, " Mapping:\n");
|
||||||
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
fprintf(fp_help, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
|
||||||
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
fprintf(fp_help, " -g NUM stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
|
||||||
fprintf(fp_help, " -G NUM max intron length (-x splice) [200k]; or insert size (-x sr) [1000] []\n");
|
fprintf(fp_help, " -G NUM max reference skip length [-xsplice:200k]\n");
|
||||||
|
fprintf(fp_help, " -F NUM max fragment length in the fragment mode [-xsr:800]\n");
|
||||||
fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
|
fprintf(fp_help, " -r NUM bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
|
||||||
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
fprintf(fp_help, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
||||||
fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
fprintf(fp_help, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
||||||
|
|
|
||||||
31
map.c
31
map.c
|
|
@ -83,14 +83,14 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
||||||
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND;
|
mo->flag |= MM_F_SR | MM_F_FRAG_MODE | MM_F_NO_PRINT_2ND;
|
||||||
mo->pe_ori = 0<<1|1; // FR
|
mo->pe_ori = 0<<1|1; // FR
|
||||||
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1;
|
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1;
|
||||||
|
mo->max_frag_len = 800;
|
||||||
mo->max_gap = 200;
|
mo->max_gap = 200;
|
||||||
mo->max_gap_ref = 1000;
|
mo->bw = 100;
|
||||||
mo->pri_ratio = 0.5f;
|
mo->pri_ratio = 0.5f;
|
||||||
mo->min_cnt = 2;
|
mo->min_cnt = 2;
|
||||||
mo->min_chain_score = 25;
|
mo->min_chain_score = 25;
|
||||||
mo->min_dp_max = 40;
|
mo->min_dp_max = 40;
|
||||||
mo->best_n = 20;
|
mo->best_n = 20;
|
||||||
mo->bw = 100;
|
|
||||||
mo->mid_occ = 1000;
|
mo->mid_occ = 1000;
|
||||||
mo->max_occ = 5000;
|
mo->max_occ = 5000;
|
||||||
mo->mini_batch_size = 50000000;
|
mo->mini_batch_size = 50000000;
|
||||||
|
|
@ -235,12 +235,12 @@ static mm128_t *collect_seed_hits(const mm_mapopt_t *opt, int max_occ, const mm_
|
||||||
return a;
|
return a;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void chain_post(const mm_mapopt_t *opt, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||||
{
|
{
|
||||||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||||
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
|
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
|
||||||
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||||
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, opt->max_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
|
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
|
||||||
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR) && !(opt->flag & MM_F_NO_LJOIN))
|
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR) && !(opt->flag & MM_F_NO_LJOIN))
|
||||||
mm_join_long(km, opt, qlen, n_regs, regs, a);
|
mm_join_long(km, opt, qlen, n_regs, regs, a);
|
||||||
}
|
}
|
||||||
|
|
@ -260,7 +260,8 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
|
||||||
|
|
||||||
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **seqs, int *n_regs, mm_reg1_t **regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||||
{
|
{
|
||||||
int i, j, max_gap_ref, rep_len, qlen_sum, n_regs0;
|
int i, j, rep_len, qlen_sum, n_regs0;
|
||||||
|
int max_chain_gap_qry, max_chain_gap_ref, is_splice = !!(opt->flag & MM_F_SPLICE);
|
||||||
uint32_t hash;
|
uint32_t hash;
|
||||||
int64_t n_a;
|
int64_t n_a;
|
||||||
uint64_t *u;
|
uint64_t *u;
|
||||||
|
|
@ -287,8 +288,18 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
i == 0? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
i == 0? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
||||||
}
|
}
|
||||||
|
|
||||||
max_gap_ref = opt->max_gap_ref >= 0? opt->max_gap_ref : opt->max_gap;
|
// set max chaining gap on the query and the reference sequence
|
||||||
a = mm_chain_dp(max_gap_ref, opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, !!(opt->flag&MM_F_SPLICE), n_segs, n_a, a, &n_regs0, &u, b->km);
|
if (opt->flag & MM_F_SR)
|
||||||
|
max_chain_gap_qry = qlen_sum > opt->max_gap? qlen_sum : opt->max_gap;
|
||||||
|
else max_chain_gap_qry = opt->max_gap;
|
||||||
|
if (opt->max_gap_ref > 0) {
|
||||||
|
max_chain_gap_ref = opt->max_gap_ref; // always honor mm_mapopt_t::max_gap_ref if set
|
||||||
|
} else if (opt->max_frag_len > 0) {
|
||||||
|
max_chain_gap_ref = opt->max_frag_len - qlen_sum;
|
||||||
|
if (max_chain_gap_ref < opt->max_gap) max_chain_gap_ref = opt->max_gap;
|
||||||
|
} else max_chain_gap_ref = opt->max_gap;
|
||||||
|
|
||||||
|
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||||
|
|
||||||
if ((opt->flag & MM_F_SR) && rep_len > 0) {
|
if ((opt->flag & MM_F_SR) && rep_len > 0) {
|
||||||
int rechain = 0;
|
int rechain = 0;
|
||||||
|
|
@ -309,7 +320,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
kfree(b->km, u);
|
kfree(b->km, u);
|
||||||
a = collect_seed_hits(opt, opt->max_occ, mi, qname, qlen_sum, &n_a, &rep_len, b);
|
a = collect_seed_hits(opt, opt->max_occ, mi, qname, qlen_sum, &n_a, &rep_len, b);
|
||||||
radix_sort_128x(a, a + n_a);
|
radix_sort_128x(a, a + n_a);
|
||||||
a = mm_chain_dp(max_gap_ref, opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, !!(opt->flag&MM_F_SPLICE), n_segs, n_a, a, &n_regs0, &u, b->km);
|
a = mm_chain_dp(max_chain_gap_ref, max_chain_gap_qry, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, is_splice, n_segs, n_a, a, &n_regs0, &u, b->km);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -321,7 +332,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
|
fprintf(stderr, "CN\t%d\t%s\t%d\t%c\t%d\t%d\t%d\n", j, mi->seq[a[i].x<<1>>33].name, (int32_t)a[i].x, "+-"[a[i].x>>63], (int32_t)a[i].y, (int32_t)(a[i].y>>32&0xff),
|
||||||
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
i == regs0[j].as? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
||||||
|
|
||||||
chain_post(opt, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
|
chain_post(opt, max_chain_gap_ref, mi, b->km, qlen_sum, n_segs, qlens, &n_regs0, regs0, a);
|
||||||
|
|
||||||
if (n_segs == 1) { // uni-segment
|
if (n_segs == 1) { // uni-segment
|
||||||
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
regs0 = align_regs(opt, mi, b->km, qlens[0], seqs[0], &n_regs0, regs0, a);
|
||||||
|
|
@ -338,7 +349,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
|
||||||
}
|
}
|
||||||
mm_seg_free(b->km, n_segs, seg);
|
mm_seg_free(b->km, n_segs, seg);
|
||||||
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
if (n_segs == 2 && opt->pe_ori >= 0 && (opt->flag&MM_F_CIGAR))
|
||||||
mm_pair(b->km, max_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs); // pairing
|
mm_pair(b->km, max_chain_gap_ref, opt->pe_bonus, opt->a * 2 + opt->b, opt->a, qlens, n_regs, regs); // pairing
|
||||||
}
|
}
|
||||||
|
|
||||||
kfree(b->km, a);
|
kfree(b->km, a);
|
||||||
|
|
|
||||||
|
|
@ -89,6 +89,7 @@ typedef struct {
|
||||||
|
|
||||||
int bw; // bandwidth
|
int bw; // bandwidth
|
||||||
int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window
|
int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window
|
||||||
|
int max_frag_len;
|
||||||
int max_chain_skip;
|
int max_chain_skip;
|
||||||
int min_cnt; // min number of minimizers on each chain
|
int min_cnt; // min number of minimizers on each chain
|
||||||
int min_chain_score; // min chaining score
|
int min_chain_score; // min chaining score
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue