Merge branch 'dev' into 'master'
Fixed bugs when reporting secondary alignments See merge request !1
This commit is contained in:
commit
6ead8af104
6
Makefile
6
Makefile
|
|
@ -1,9 +1,9 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
|
||||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES= -I.
|
||||
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o hit.o sdust.o \
|
||||
index.o format.o map.o ksw2_extz2_sse.o
|
||||
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o misc.o bseq.o sketch.o sdust.o \
|
||||
index.o chain.o align.o hit.o map.o format.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
|
|
|||
6
align.c
6
align.c
|
|
@ -239,11 +239,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
r->p->dp_score += ez->max;
|
||||
re1 = rs + (ez->max_t + 1);
|
||||
qe1 = qs + (ez->max_q + 1);
|
||||
if (r->cnt - (j + 1) >= opt->min_cnt) {
|
||||
if (r->cnt - (j + 1) >= opt->min_cnt)
|
||||
mm_split_reg(r, r2, j + 1, qlen, a);
|
||||
if (j + 1 < opt->min_cnt)
|
||||
r2->id = r->id, r->id = -1;
|
||||
}
|
||||
break;
|
||||
} else r->p->dp_score += ez->score;
|
||||
rs = re, qs = qe;
|
||||
|
|
@ -308,5 +305,6 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
kfree(km, qseq0[0]); kfree(km, qseq0[1]);
|
||||
kfree(km, ez.cigar);
|
||||
mm_filter_regs(km, opt, n_regs_, regs);
|
||||
mm_hit_sort_by_dp(km, n_regs_, regs);
|
||||
return regs;
|
||||
}
|
||||
|
|
|
|||
113
hit.c
113
hit.c
|
|
@ -3,6 +3,22 @@
|
|||
#include "mmpriv.h"
|
||||
#include "kalloc.h"
|
||||
|
||||
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
||||
{
|
||||
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
|
||||
r->rev = a[k].x>>63;
|
||||
r->rid = a[k].x<<1>>33;
|
||||
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
|
||||
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
||||
if (!r->rev) {
|
||||
r->qs = (int32_t)a[k].y + 1 - q_span;
|
||||
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
||||
} else {
|
||||
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
||||
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
|
||||
}
|
||||
}
|
||||
|
||||
mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits
|
||||
{
|
||||
mm128_t *z, tmp;
|
||||
|
|
@ -59,60 +75,62 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compu
|
|||
{
|
||||
int i, j, k, *w;
|
||||
if (n <= 0) return;
|
||||
for (i = 0; i < n; ++i) r[i].id = i;
|
||||
w = (int*)kmalloc(km, n * sizeof(int));
|
||||
w[0] = 0, r[0].parent = 0;
|
||||
for (i = 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
mm_reg1_t *ri = &r[i];
|
||||
int si = ri->qs, ei = ri->qe;
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
mm_reg1_t *rp = &r[w[j]];
|
||||
int sj = rp->qs, ej = rp->qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
ri->parent = rp->parent;
|
||||
rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score;
|
||||
if (rp->p && ri->p)
|
||||
rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (j == k) w[k++] = i, r[i].parent = i;
|
||||
if (j == k) w[k++] = i, ri->parent = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
}
|
||||
|
||||
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r) // due to changes to r.{qs,qe} after DP extension
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
|
||||
{
|
||||
int i, j, k, *w, n_pri = 0;
|
||||
if (n <= 0) return;
|
||||
for (i = 0; i < n; ++i)
|
||||
if (r[i].id == r[i].parent) ++n_pri;
|
||||
if (n_pri <= 1) return;
|
||||
w = (int*)kmalloc(km, n_pri * sizeof(int));
|
||||
for (i = j = 0; i < n; ++i) // find the first primary
|
||||
if (r[i].id == r[i].parent) break;
|
||||
for (w[0] = i, i = i + 1, k = 1; i < n; ++i) {
|
||||
int si = r[i].qs, ei = r[i].qe;
|
||||
if (r[i].id != r[i].parent) continue; // only check primary
|
||||
for (j = 0; j < k; ++j) {
|
||||
int sj = r[w[j]].qs, ej = r[w[j]].qe;
|
||||
int min = ej - sj < ei - si? ej - sj : ei - si;
|
||||
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
|
||||
if (ol > mask_level * min) {
|
||||
r[i].parent = r[w[j]].parent;
|
||||
if (r[w[j]].subsc < r[i].score)
|
||||
r[w[j]].subsc = r[i].score;
|
||||
break;
|
||||
}
|
||||
int32_t i, n_aux, n = *n_regs;
|
||||
uint64_t *aux;
|
||||
mm_reg1_t *t;
|
||||
|
||||
if (n <= 1) return;
|
||||
aux = (uint64_t*)kmalloc(km, n * 8);
|
||||
t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t));
|
||||
for (i = n_aux = 0; i < n; ++i) {
|
||||
if (r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
|
||||
assert(r[i].p);
|
||||
aux[n_aux++] = (uint64_t)r[i].p->dp_max << 32 | i;
|
||||
} else if (r[i].p) {
|
||||
kfree(km, r[i].p);
|
||||
r[i].p = 0;
|
||||
}
|
||||
if (j == k) w[k++] = i;
|
||||
}
|
||||
kfree(km, w);
|
||||
radix_sort_64(aux, aux + n_aux);
|
||||
for (i = n_aux - 1; i >= 0; --i)
|
||||
t[n_aux - 1 - i] = r[(int32_t)aux[i]];
|
||||
memcpy(r, t, sizeof(mm_reg1_t) * n_aux);
|
||||
*n_regs = n_aux;
|
||||
kfree(km, aux);
|
||||
kfree(km, t);
|
||||
}
|
||||
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id
|
||||
{
|
||||
int *tmp, i, max_id = -1, n_tmp, n_pri;
|
||||
if (n_regs <= 0) return;
|
||||
for (i = 0; i < n_regs; ++i)
|
||||
for (i = 0; i < n_regs; ++i) // NB: doesn't work if mm_reg1_t::id is negative
|
||||
max_id = max_id > regs[i].id? max_id : regs[i].id;
|
||||
n_tmp = max_id + 1;
|
||||
tmp = (int*)kmalloc(km, n_tmp * sizeof(int));
|
||||
|
|
@ -134,13 +152,13 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,
|
|||
kfree(km, tmp);
|
||||
}
|
||||
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r)
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r)
|
||||
{
|
||||
if (pri_ratio > 0.0f && *n_ > 0) {
|
||||
int i, k, n = *n_;
|
||||
int i, k, n = *n_, n_2nd = 0;
|
||||
for (i = k = 0; i < n; ++i)
|
||||
if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio)
|
||||
r[k++] = r[i];
|
||||
if (r[i].parent == i) r[k++] = r[i];
|
||||
else if (r[i].score >= r[r[i].parent].score * pri_ratio && n_2nd++ < best_n) r[k++] = r[i];
|
||||
else if (r[i].p) free(r[i].p);
|
||||
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
|
||||
*n_ = k;
|
||||
|
|
@ -148,7 +166,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1
|
|||
}
|
||||
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs)
|
||||
{
|
||||
{ // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out
|
||||
int i, k;
|
||||
for (i = k = 0; i < *n_regs; ++i) {
|
||||
mm_reg1_t *r = ®s[i];
|
||||
|
|
@ -169,7 +187,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
|
|||
}
|
||||
}
|
||||
*n_regs = k;
|
||||
mm_sync_regs(km, k, regs);
|
||||
}
|
||||
|
||||
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
|
|
@ -192,9 +209,9 @@ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
|||
return as;
|
||||
}
|
||||
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a)
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_reg1_t *regs, mm128_t *a)
|
||||
{
|
||||
int i, n_aux;
|
||||
int i, n_aux, n_regs = *n_regs_, n_drop = 0;
|
||||
uint64_t *aux;
|
||||
|
||||
if (n_regs < 2) return; // nothing to join
|
||||
|
|
@ -228,8 +245,22 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg
|
|||
r0->cnt += r1->cnt, r0->score += r1->score;
|
||||
mm_reg_set_coor(r0, qlen, a);
|
||||
r1->cnt = 0;
|
||||
r1->parent = r0->id;
|
||||
++n_drop;
|
||||
}
|
||||
kfree(km, aux);
|
||||
|
||||
if (n_drop > 0) { // then fix the hits hierarchy
|
||||
for (i = 0; i < n_regs; ++i) { // adjust the mm_reg1_t::parent
|
||||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent >= 0 && r->id != r->parent) { // fix for secondary hits only
|
||||
if (regs[r->parent].parent >= 0 && regs[r->parent].parent != r->parent)
|
||||
r->parent = regs[r->parent].parent;
|
||||
}
|
||||
}
|
||||
mm_filter_regs(km, opt, n_regs_, regs);
|
||||
mm_sync_regs(km, *n_regs_, regs);
|
||||
}
|
||||
}
|
||||
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs)
|
||||
|
|
@ -239,7 +270,9 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs)
|
|||
mm_reg1_t *r = ®s[i];
|
||||
if (r->parent == r->id) {
|
||||
int mapq;
|
||||
mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score));
|
||||
if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0)
|
||||
mapq = (int)(30.0 * (1. - (float)(r->p->dp_max2 * r->subsc) / (r->p->dp_max * r->score)) * logf(r->score));
|
||||
else mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score));
|
||||
mapq = mapq > 0? mapq : 0;
|
||||
r->mapq = mapq < 60? mapq : 60;
|
||||
} else r->mapq = 0;
|
||||
|
|
|
|||
22
main.c
22
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r141-pre"
|
||||
#define MM_VERSION "2.0-r151-pre"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -45,6 +45,8 @@ static struct option long_options[] = {
|
|||
{ "bucket-bits", required_argument, 0, 0 },
|
||||
{ "mb-size", required_argument, 0, 0 },
|
||||
{ "int-rname", no_argument, 0, 0 },
|
||||
{ "no-kalloc", no_argument, 0, 0 },
|
||||
{ "print-qname", no_argument, 0, 0 },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
{ "min-count", required_argument, 0, 'n' },
|
||||
{ "min-chain-score",required_argument, 0, 'm' },
|
||||
|
|
@ -68,7 +70,7 @@ int main(int argc, char *argv[])
|
|||
mm_realtime0 = realtime();
|
||||
mm_mapopt_init(&opt);
|
||||
|
||||
while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:", long_options, &long_idx)) >= 0) {
|
||||
while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:", long_options, &long_idx)) >= 0) {
|
||||
if (c == 'w') w = atoi(optarg);
|
||||
else if (c == 'k') k = atoi(optarg);
|
||||
else if (c == 'H') is_hpc = 1;
|
||||
|
|
@ -78,11 +80,12 @@ int main(int argc, char *argv[])
|
|||
else if (c == 't') n_threads = atoi(optarg);
|
||||
else if (c == 'v') mm_verbose = atoi(optarg);
|
||||
else if (c == 'g') opt.max_gap = atoi(optarg);
|
||||
else if (c == 'N') opt.best_n = atoi(optarg);
|
||||
else if (c == 'p') opt.pri_ratio = atof(optarg);
|
||||
else if (c == 'D') opt.min_seedcov_ratio = atof(optarg);
|
||||
else if (c == 'M') opt.mask_level = atof(optarg);
|
||||
else if (c == 'c') opt.flag |= MM_F_CIGAR;
|
||||
else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
|
||||
else if (c == 'T') opt.sdust_thres = atoi(optarg);
|
||||
else if (c == 'n') opt.min_cnt = atoi(optarg);
|
||||
|
|
@ -93,8 +96,10 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'E') opt.e = atoi(optarg);
|
||||
else if (c == 'z') opt.zdrop = atoi(optarg);
|
||||
else if (c == 's') opt.min_dp_max = atoi(optarg);
|
||||
else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // bucket-bits
|
||||
else if (c == 0 && long_idx == 2) keep_name = 0; // int-rname
|
||||
else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // --bucket-bits
|
||||
else if (c == 0 && long_idx == 2) keep_name = 0; // --int-rname
|
||||
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
|
||||
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
|
||||
else if (c == 'V') {
|
||||
puts(MM_VERSION);
|
||||
return 0;
|
||||
|
|
@ -140,11 +145,12 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
|
||||
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
|
||||
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
|
||||
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n");
|
||||
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
|
||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
|
||||
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
|
||||
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
|
||||
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
||||
fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
|
||||
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
|
||||
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
|
||||
fprintf(stderr, " Alignment:\n");
|
||||
|
|
@ -160,7 +166,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
|
||||
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
|
||||
fprintf(stderr, " -V show version number\n");
|
||||
fprintf(stderr, "\nSee minimap2.1 for detailed description of the command-line options.\n");
|
||||
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
18
map.c
18
map.c
|
|
@ -22,8 +22,9 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->max_chain_skip = 15;
|
||||
opt->min_seedcov_ratio = 0.0f;
|
||||
|
||||
opt->pri_ratio = 2.0f;
|
||||
opt->mask_level = 0.5f;
|
||||
opt->pri_ratio = 0.8f;
|
||||
opt->best_n = 5;
|
||||
|
||||
opt->max_join_long = 20000;
|
||||
opt->max_join_short = 2000;
|
||||
|
|
@ -63,7 +64,7 @@ mm_tbuf_t *mm_tbuf_init(void)
|
|||
{
|
||||
mm_tbuf_t *b;
|
||||
b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t));
|
||||
if (mm_verbose < 10) b->km = km_init();
|
||||
if (!(mm_dbg_flag & 1)) b->km = km_init();
|
||||
b->sdb = sdust_buf_init(b->km);
|
||||
return b;
|
||||
}
|
||||
|
|
@ -240,16 +241,16 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
*n_regs = n_u;
|
||||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||
mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs);
|
||||
mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs);
|
||||
mm_join_long(b->km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
if (opt->flag & MM_F_CIGAR) {
|
||||
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
|
||||
if (!(opt->flag & MM_F_AVA)) {
|
||||
mm_update_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs);
|
||||
mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs);
|
||||
}
|
||||
} else mm_filter_regs(b->km, opt, n_regs, regs);
|
||||
}
|
||||
mm_set_mapq(*n_regs, regs);
|
||||
|
||||
// free
|
||||
|
|
@ -261,7 +262,6 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
|
||||
{
|
||||
mm_reg1_t *regs;
|
||||
if (mm_verbose >= 11) fprintf(stderr, "===> %s <===\n", qname);
|
||||
b->mini.n = 0;
|
||||
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
|
||||
if (opt->sdust_thres > 0)
|
||||
|
|
@ -294,6 +294,8 @@ typedef struct {
|
|||
static void worker_for(void *_data, long i, int tid) // kt_for() callback
|
||||
{
|
||||
step_t *step = (step_t*)_data;
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
|
||||
fprintf(stderr, "Processing query %s on thread %d\n", step->seq[i].name, tid);
|
||||
step->reg[i] = mm_map(step->p->mi, step->seq[i].l_seq, step->seq[i].seq, &step->n_reg[i], step->buf[tid], step->p->opt, step->seq[i].name);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ typedef struct {
|
|||
|
||||
typedef struct {
|
||||
uint32_t capacity;
|
||||
int32_t dp_score, dp_max;
|
||||
int32_t dp_score, dp_max, dp_max2;
|
||||
uint32_t blen;
|
||||
uint32_t n_diff, n_ambi;
|
||||
uint32_t n_cigar;
|
||||
|
|
@ -77,8 +77,9 @@ typedef struct {
|
|||
int min_chain_score;
|
||||
float min_seedcov_ratio;
|
||||
|
||||
float pri_ratio;
|
||||
float mask_level;
|
||||
float pri_ratio;
|
||||
int best_n;
|
||||
|
||||
int max_join_long, max_join_short;
|
||||
int min_join_flank_sc;
|
||||
|
|
@ -92,7 +93,7 @@ typedef struct {
|
|||
int mid_occ;
|
||||
} mm_mapopt_t;
|
||||
|
||||
extern int mm_verbose;
|
||||
extern int mm_verbose, mm_dbg_flag;
|
||||
extern double mm_realtime0;
|
||||
|
||||
struct mm_tbuf_s;
|
||||
|
|
|
|||
30
minimap2.1
30
minimap2.1
|
|
@ -1,4 +1,4 @@
|
|||
.TH minimap2 1 "1 July 2017" "minimap2-2.0-r141-pre" "Bioinformatics tools"
|
||||
.TH minimap2 1 "1 July 2017" "minimap2-2.0-r145-pre" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
minimap2 - mapping and alignment between collections of DNA sequences
|
||||
|
|
@ -142,21 +142,27 @@ not using
|
|||
.BR -H )
|
||||
minus base-2 logarithm gap penalty. It is computed with dynamic programming.
|
||||
.TP
|
||||
.B -S
|
||||
.B -X
|
||||
Perform all-vs-all mapping. In this mode, if the query sequence name is
|
||||
lexicographically larger than the target sequence name, the hits between them
|
||||
will be suppressed; if the query sequence name is the same as the target name,
|
||||
diagonal minimizer hits will also be suppressed.
|
||||
.TP
|
||||
.BI -p \ FLOAT
|
||||
Minimal secondary-to-primary score ratio to output secondary mappings [2].
|
||||
Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
|
||||
Between two chains overlaping over half of the shorter chain (controled by
|
||||
.BR --mask-level ),
|
||||
the chain with a lower score is secondary to the chain with a higher score.
|
||||
If the ratio of the scores is below
|
||||
.IR FLOAT ,
|
||||
the secondary chain will not be outputted or extended with DP alignment later.
|
||||
The default value suppresses all secondary chains.
|
||||
.TP
|
||||
.BI -N \ INT
|
||||
Output at most
|
||||
.I INT
|
||||
secondary alignments [5]. This option has no effect when
|
||||
.B -X
|
||||
is applied.
|
||||
.TP
|
||||
.BI -D \ FLOAT
|
||||
Discard a chain if the fraction of matching bases over the length of
|
||||
|
|
@ -175,7 +181,7 @@ are:
|
|||
.RS
|
||||
.TP 8
|
||||
.B ava10k
|
||||
PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -Sw5 -p0 -m100 -D.05)
|
||||
PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -D.05)
|
||||
.TP
|
||||
.B map10k
|
||||
PacBio/Oxford Nanopore read to reference mapping (-Hk19)
|
||||
|
|
@ -220,14 +226,24 @@ by default.
|
|||
Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag.
|
||||
.TP
|
||||
.BI -t \ INT
|
||||
Number of threads [3]. Minimap2 uses at most three threads when collecting
|
||||
minimizers on target sequences, and uses up to
|
||||
Number of threads [3]. Minimap2 uses at most three threads when indexing target
|
||||
sequences, and uses up to
|
||||
.IR INT +1
|
||||
threads when mapping (the extra thread is for I/O, which is frequently idle and
|
||||
takes little CPU time).
|
||||
.TP
|
||||
.B -V
|
||||
Print version number to stdout
|
||||
.SS Miscellaneous options
|
||||
.TP 10
|
||||
.B --no-kalloc
|
||||
Use the libc default allocator instead of the kalloc thread-local allocator.
|
||||
This debugging option is mostly used with Valgrind to detect invalid memory
|
||||
accesses. Minimap2 runs slower with this option, especially in the
|
||||
multi-threading mode.
|
||||
.TP
|
||||
.B --print-qname
|
||||
Print query names to stderr, mostly to see which query is crashing minimap2.
|
||||
.SH OUTPUT FORMAT
|
||||
.PP
|
||||
Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by
|
||||
|
|
|
|||
1
misc.c
1
misc.c
|
|
@ -3,6 +3,7 @@
|
|||
#include "minimap.h"
|
||||
|
||||
int mm_verbose = 3;
|
||||
int mm_dbg_flag = 0;
|
||||
double mm_realtime0;
|
||||
|
||||
double cputime()
|
||||
|
|
|
|||
25
mmpriv.h
25
mmpriv.h
|
|
@ -8,6 +8,9 @@
|
|||
#define MM_PARENT_UNSET (-1)
|
||||
#define MM_PARENT_TMP_PRI (-2)
|
||||
|
||||
#define MM_DBG_NO_KALLOC 0x1
|
||||
#define MM_DBG_PRINT_QNAME 0x2
|
||||
|
||||
#ifndef kroundup32
|
||||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
|
@ -40,30 +43,14 @@ mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a);
|
|||
void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
|
||||
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
|
||||
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r);
|
||||
void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r);
|
||||
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
|
||||
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
|
||||
void mm_set_mapq(int n_regs, mm_reg1_t *regs);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
|
||||
{
|
||||
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
|
||||
r->rev = a[k].x>>63;
|
||||
r->rid = a[k].x<<1>>33;
|
||||
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
|
||||
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
|
||||
if (!r->rev) {
|
||||
r->qs = (int32_t)a[k].y + 1 - q_span;
|
||||
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
|
||||
} else {
|
||||
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
|
||||
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue