Merge branch 'dev' into 'master'

Fixed bugs when reporting secondary alignments

See merge request !1
This commit is contained in:
Heng Li 2017-07-03 16:17:08 +00:00
commit 6ead8af104
9 changed files with 136 additions and 92 deletions

View File

@ -1,9 +1,9 @@
CC= gcc CC= gcc
CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function CFLAGS= -g -Wall -O2 -Wc++-compat
CPPFLAGS= -DHAVE_KALLOC CPPFLAGS= -DHAVE_KALLOC
INCLUDES= -I. INCLUDES= -I.
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o hit.o sdust.o \ OBJS= kthread.o kalloc.o ksw2_extz2_sse.o misc.o bseq.o sketch.o sdust.o \
index.o format.o map.o ksw2_extz2_sse.o index.o chain.o align.o hit.o map.o format.o
PROG= minimap2 PROG= minimap2
PROG_EXTRA= sdust PROG_EXTRA= sdust
LIBS= -lm -lz -lpthread LIBS= -lm -lz -lpthread

View File

@ -239,11 +239,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
r->p->dp_score += ez->max; r->p->dp_score += ez->max;
re1 = rs + (ez->max_t + 1); re1 = rs + (ez->max_t + 1);
qe1 = qs + (ez->max_q + 1); qe1 = qs + (ez->max_q + 1);
if (r->cnt - (j + 1) >= opt->min_cnt) { if (r->cnt - (j + 1) >= opt->min_cnt)
mm_split_reg(r, r2, j + 1, qlen, a); mm_split_reg(r, r2, j + 1, qlen, a);
if (j + 1 < opt->min_cnt)
r2->id = r->id, r->id = -1;
}
break; break;
} else r->p->dp_score += ez->score; } else r->p->dp_score += ez->score;
rs = re, qs = qe; rs = re, qs = qe;
@ -308,5 +305,6 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
kfree(km, qseq0[0]); kfree(km, qseq0[1]); kfree(km, qseq0[0]); kfree(km, qseq0[1]);
kfree(km, ez.cigar); kfree(km, ez.cigar);
mm_filter_regs(km, opt, n_regs_, regs); mm_filter_regs(km, opt, n_regs_, regs);
mm_hit_sort_by_dp(km, n_regs_, regs);
return regs; return regs;
} }

113
hit.c
View File

@ -3,6 +3,22 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "kalloc.h" #include "kalloc.h"
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
{
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
r->rev = a[k].x>>63;
r->rid = a[k].x<<1>>33;
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
if (!r->rev) {
r->qs = (int32_t)a[k].y + 1 - q_span;
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
} else {
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
}
}
mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a) // convert chains to hits
{ {
mm128_t *z, tmp; mm128_t *z, tmp;
@ -59,60 +75,62 @@ void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r) // and compu
{ {
int i, j, k, *w; int i, j, k, *w;
if (n <= 0) return; if (n <= 0) return;
for (i = 0; i < n; ++i) r[i].id = i;
w = (int*)kmalloc(km, n * sizeof(int)); w = (int*)kmalloc(km, n * sizeof(int));
w[0] = 0, r[0].parent = 0; w[0] = 0, r[0].parent = 0;
for (i = 1, k = 1; i < n; ++i) { for (i = 1, k = 1; i < n; ++i) {
int si = r[i].qs, ei = r[i].qe; mm_reg1_t *ri = &r[i];
int si = ri->qs, ei = ri->qe;
for (j = 0; j < k; ++j) { for (j = 0; j < k; ++j) {
int sj = r[w[j]].qs, ej = r[w[j]].qe; mm_reg1_t *rp = &r[w[j]];
int sj = rp->qs, ej = rp->qe;
int min = ej - sj < ei - si? ej - sj : ei - si; int min = ej - sj < ei - si? ej - sj : ei - si;
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si); int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
if (ol > mask_level * min) { if (ol > mask_level * min) {
r[i].parent = r[w[j]].parent; ri->parent = rp->parent;
if (r[w[j]].subsc < r[i].score) rp->subsc = rp->subsc > ri->score? rp->subsc : ri->score;
r[w[j]].subsc = r[i].score; if (rp->p && ri->p)
rp->p->dp_max2 = rp->p->dp_max2 > ri->p->dp_max? rp->p->dp_max2 : ri->p->dp_max;
break; break;
} }
} }
if (j == k) w[k++] = i, r[i].parent = i; if (j == k) w[k++] = i, ri->parent = i;
} }
kfree(km, w); kfree(km, w);
} }
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r) // due to changes to r.{qs,qe} after DP extension void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r)
{ {
int i, j, k, *w, n_pri = 0; int32_t i, n_aux, n = *n_regs;
if (n <= 0) return; uint64_t *aux;
for (i = 0; i < n; ++i) mm_reg1_t *t;
if (r[i].id == r[i].parent) ++n_pri;
if (n_pri <= 1) return; if (n <= 1) return;
w = (int*)kmalloc(km, n_pri * sizeof(int)); aux = (uint64_t*)kmalloc(km, n * 8);
for (i = j = 0; i < n; ++i) // find the first primary t = (mm_reg1_t*)kmalloc(km, n * sizeof(mm_reg1_t));
if (r[i].id == r[i].parent) break; for (i = n_aux = 0; i < n; ++i) {
for (w[0] = i, i = i + 1, k = 1; i < n; ++i) { if (r[i].cnt > 0) { // squeeze out elements with cnt==0 (soft deleted)
int si = r[i].qs, ei = r[i].qe; assert(r[i].p);
if (r[i].id != r[i].parent) continue; // only check primary aux[n_aux++] = (uint64_t)r[i].p->dp_max << 32 | i;
for (j = 0; j < k; ++j) { } else if (r[i].p) {
int sj = r[w[j]].qs, ej = r[w[j]].qe; kfree(km, r[i].p);
int min = ej - sj < ei - si? ej - sj : ei - si; r[i].p = 0;
int ol = si < sj? (ei < sj? 0 : ei < ej? ei - sj : ej - sj) : (ej < si? 0 : ej < ei? ej - si : ei - si);
if (ol > mask_level * min) {
r[i].parent = r[w[j]].parent;
if (r[w[j]].subsc < r[i].score)
r[w[j]].subsc = r[i].score;
break;
}
} }
if (j == k) w[k++] = i;
} }
kfree(km, w); radix_sort_64(aux, aux + n_aux);
for (i = n_aux - 1; i >= 0; --i)
t[n_aux - 1 - i] = r[(int32_t)aux[i]];
memcpy(r, t, sizeof(mm_reg1_t) * n_aux);
*n_regs = n_aux;
kfree(km, aux);
kfree(km, t);
} }
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,parent} in sync; also reset id
{ {
int *tmp, i, max_id = -1, n_tmp, n_pri; int *tmp, i, max_id = -1, n_tmp, n_pri;
if (n_regs <= 0) return; if (n_regs <= 0) return;
for (i = 0; i < n_regs; ++i) for (i = 0; i < n_regs; ++i) // NB: doesn't work if mm_reg1_t::id is negative
max_id = max_id > regs[i].id? max_id : regs[i].id; max_id = max_id > regs[i].id? max_id : regs[i].id;
n_tmp = max_id + 1; n_tmp = max_id + 1;
tmp = (int*)kmalloc(km, n_tmp * sizeof(int)); tmp = (int*)kmalloc(km, n_tmp * sizeof(int));
@ -134,13 +152,13 @@ void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs) // keep mm_reg1_t::{id,
kfree(km, tmp); kfree(km, tmp);
} }
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r) void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r)
{ {
if (pri_ratio > 0.0f && *n_ > 0) { if (pri_ratio > 0.0f && *n_ > 0) {
int i, k, n = *n_; int i, k, n = *n_, n_2nd = 0;
for (i = k = 0; i < n; ++i) for (i = k = 0; i < n; ++i)
if (r[i].parent == i || r[i].score >= r[r[i].parent].score * pri_ratio) if (r[i].parent == i) r[k++] = r[i];
r[k++] = r[i]; else if (r[i].score >= r[r[i].parent].score * pri_ratio && n_2nd++ < best_n) r[k++] = r[i];
else if (r[i].p) free(r[i].p); else if (r[i].p) free(r[i].p);
if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync() if (k != n) mm_sync_regs(km, k, r); // removing hits requires sync()
*n_ = k; *n_ = k;
@ -148,7 +166,7 @@ void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1
} }
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs) void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs)
{ { // NB: after this call, mm_reg1_t::parent can be -1 if its parent filtered out
int i, k; int i, k;
for (i = k = 0; i < *n_regs; ++i) { for (i = k = 0; i < *n_regs; ++i) {
mm_reg1_t *r = &regs[i]; mm_reg1_t *r = &regs[i];
@ -169,7 +187,6 @@ void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *re
} }
} }
*n_regs = k; *n_regs = k;
mm_sync_regs(km, k, regs);
} }
int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a) int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
@ -192,9 +209,9 @@ int mm_squeeze_a(void *km, int n_regs, mm_reg1_t *regs, mm128_t *a)
return as; return as;
} }
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a) void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs_, mm_reg1_t *regs, mm128_t *a)
{ {
int i, n_aux; int i, n_aux, n_regs = *n_regs_, n_drop = 0;
uint64_t *aux; uint64_t *aux;
if (n_regs < 2) return; // nothing to join if (n_regs < 2) return; // nothing to join
@ -228,8 +245,22 @@ void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg
r0->cnt += r1->cnt, r0->score += r1->score; r0->cnt += r1->cnt, r0->score += r1->score;
mm_reg_set_coor(r0, qlen, a); mm_reg_set_coor(r0, qlen, a);
r1->cnt = 0; r1->cnt = 0;
r1->parent = r0->id;
++n_drop;
} }
kfree(km, aux); kfree(km, aux);
if (n_drop > 0) { // then fix the hits hierarchy
for (i = 0; i < n_regs; ++i) { // adjust the mm_reg1_t::parent
mm_reg1_t *r = &regs[i];
if (r->parent >= 0 && r->id != r->parent) { // fix for secondary hits only
if (regs[r->parent].parent >= 0 && regs[r->parent].parent != r->parent)
r->parent = regs[r->parent].parent;
}
}
mm_filter_regs(km, opt, n_regs_, regs);
mm_sync_regs(km, *n_regs_, regs);
}
} }
void mm_set_mapq(int n_regs, mm_reg1_t *regs) void mm_set_mapq(int n_regs, mm_reg1_t *regs)
@ -239,7 +270,9 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs)
mm_reg1_t *r = &regs[i]; mm_reg1_t *r = &regs[i];
if (r->parent == r->id) { if (r->parent == r->id) {
int mapq; int mapq;
mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score)); if (r->p && r->p->dp_max2 > 0 && r->p->dp_max > 0)
mapq = (int)(30.0 * (1. - (float)(r->p->dp_max2 * r->subsc) / (r->p->dp_max * r->score)) * logf(r->score));
else mapq = (int)(30.0 * (1. - (float)r->subsc / r->score) * logf(r->score));
mapq = mapq > 0? mapq : 0; mapq = mapq > 0? mapq : 0;
r->mapq = mapq < 60? mapq : 60; r->mapq = mapq < 60? mapq : 60;
} else r->mapq = 0; } else r->mapq = 0;

22
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#define MM_VERSION "2.0-r141-pre" #define MM_VERSION "2.0-r151-pre"
void liftrlimit() void liftrlimit()
{ {
@ -45,6 +45,8 @@ static struct option long_options[] = {
{ "bucket-bits", required_argument, 0, 0 }, { "bucket-bits", required_argument, 0, 0 },
{ "mb-size", required_argument, 0, 0 }, { "mb-size", required_argument, 0, 0 },
{ "int-rname", no_argument, 0, 0 }, { "int-rname", no_argument, 0, 0 },
{ "no-kalloc", no_argument, 0, 0 },
{ "print-qname", no_argument, 0, 0 },
{ "version", no_argument, 0, 'V' }, { "version", no_argument, 0, 'V' },
{ "min-count", required_argument, 0, 'n' }, { "min-count", required_argument, 0, 'n' },
{ "min-chain-score",required_argument, 0, 'm' }, { "min-chain-score",required_argument, 0, 'm' },
@ -68,7 +70,7 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime(); mm_realtime0 = realtime();
mm_mapopt_init(&opt); mm_mapopt_init(&opt);
while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:ST:s:x:Hcp:M:n:z:A:B:O:E:m:D:", long_options, &long_idx)) >= 0) { while ((c = getopt_long(argc, argv, "aw:k:t:r:f:Vv:g:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:D:N:", long_options, &long_idx)) >= 0) {
if (c == 'w') w = atoi(optarg); if (c == 'w') w = atoi(optarg);
else if (c == 'k') k = atoi(optarg); else if (c == 'k') k = atoi(optarg);
else if (c == 'H') is_hpc = 1; else if (c == 'H') is_hpc = 1;
@ -78,11 +80,12 @@ int main(int argc, char *argv[])
else if (c == 't') n_threads = atoi(optarg); else if (c == 't') n_threads = atoi(optarg);
else if (c == 'v') mm_verbose = atoi(optarg); else if (c == 'v') mm_verbose = atoi(optarg);
else if (c == 'g') opt.max_gap = atoi(optarg); else if (c == 'g') opt.max_gap = atoi(optarg);
else if (c == 'N') opt.best_n = atoi(optarg);
else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'D') opt.min_seedcov_ratio = atof(optarg); else if (c == 'D') opt.min_seedcov_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'c') opt.flag |= MM_F_CIGAR; else if (c == 'c') opt.flag |= MM_F_CIGAR;
else if (c == 'S') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF;
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR; else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
else if (c == 'T') opt.sdust_thres = atoi(optarg); else if (c == 'T') opt.sdust_thres = atoi(optarg);
else if (c == 'n') opt.min_cnt = atoi(optarg); else if (c == 'n') opt.min_cnt = atoi(optarg);
@ -93,8 +96,10 @@ int main(int argc, char *argv[])
else if (c == 'E') opt.e = atoi(optarg); else if (c == 'E') opt.e = atoi(optarg);
else if (c == 'z') opt.zdrop = atoi(optarg); else if (c == 'z') opt.zdrop = atoi(optarg);
else if (c == 's') opt.min_dp_max = atoi(optarg); else if (c == 's') opt.min_dp_max = atoi(optarg);
else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // bucket-bits else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // --bucket-bits
else if (c == 0 && long_idx == 2) keep_name = 0; // int-rname else if (c == 0 && long_idx == 2) keep_name = 0; // --int-rname
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
else if (c == 'V') { else if (c == 'V') {
puts(MM_VERSION); puts(MM_VERSION);
return 0; return 0;
@ -140,11 +145,12 @@ int main(int argc, char *argv[])
fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt); fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score); fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy // fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
fprintf(stderr, " -S skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio); fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava10k: -Hk19 -Sw5 -p0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n"); fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n"); fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
fprintf(stderr, " Alignment:\n"); fprintf(stderr, " Alignment:\n");
@ -160,7 +166,7 @@ int main(int argc, char *argv[])
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(stderr, " -V show version number\n"); fprintf(stderr, " -V show version number\n");
fprintf(stderr, "\nSee minimap2.1 for detailed description of the command-line options.\n"); fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
return 1; return 1;
} }

18
map.c
View File

@ -22,8 +22,9 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->max_chain_skip = 15; opt->max_chain_skip = 15;
opt->min_seedcov_ratio = 0.0f; opt->min_seedcov_ratio = 0.0f;
opt->pri_ratio = 2.0f;
opt->mask_level = 0.5f; opt->mask_level = 0.5f;
opt->pri_ratio = 0.8f;
opt->best_n = 5;
opt->max_join_long = 20000; opt->max_join_long = 20000;
opt->max_join_short = 2000; opt->max_join_short = 2000;
@ -63,7 +64,7 @@ mm_tbuf_t *mm_tbuf_init(void)
{ {
mm_tbuf_t *b; mm_tbuf_t *b;
b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t)); b = (mm_tbuf_t*)calloc(1, sizeof(mm_tbuf_t));
if (mm_verbose < 10) b->km = km_init(); if (!(mm_dbg_flag & 1)) b->km = km_init();
b->sdb = sdust_buf_init(b->km); b->sdb = sdust_buf_init(b->km);
return b; return b;
} }
@ -240,16 +241,16 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
*n_regs = n_u; *n_regs = n_u;
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
mm_set_parent(b->km, opt->mask_level, *n_regs, regs); mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs);
mm_join_long(b->km, opt, qlen, *n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle mm_join_long(b->km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
} }
if (opt->flag & MM_F_CIGAR) { if (opt->flag & MM_F_CIGAR) {
regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() regs = mm_align_skeleton(b->km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_AVA)) { if (!(opt->flag & MM_F_AVA)) {
mm_update_parent(b->km, opt->mask_level, *n_regs, regs); mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, n_regs, regs); mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, opt->best_n, n_regs, regs);
} }
} else mm_filter_regs(b->km, opt, n_regs, regs); }
mm_set_mapq(*n_regs, regs); mm_set_mapq(*n_regs, regs);
// free // free
@ -261,7 +262,6 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname) mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *qname)
{ {
mm_reg1_t *regs; mm_reg1_t *regs;
if (mm_verbose >= 11) fprintf(stderr, "===> %s <===\n", qname);
b->mini.n = 0; b->mini.n = 0;
mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini); mm_sketch(b->km, seq, l_seq, mi->w, mi->k, 0, mi->is_hpc, &b->mini);
if (opt->sdust_thres > 0) if (opt->sdust_thres > 0)
@ -294,6 +294,8 @@ typedef struct {
static void worker_for(void *_data, long i, int tid) // kt_for() callback static void worker_for(void *_data, long i, int tid) // kt_for() callback
{ {
step_t *step = (step_t*)_data; step_t *step = (step_t*)_data;
if (mm_dbg_flag & MM_DBG_PRINT_QNAME)
fprintf(stderr, "Processing query %s on thread %d\n", step->seq[i].name, tid);
step->reg[i] = mm_map(step->p->mi, step->seq[i].l_seq, step->seq[i].seq, &step->n_reg[i], step->buf[tid], step->p->opt, step->seq[i].name); step->reg[i] = mm_map(step->p->mi, step->seq[i].l_seq, step->seq[i].seq, &step->n_reg[i], step->buf[tid], step->p->opt, step->seq[i].name);
} }

View File

@ -45,7 +45,7 @@ typedef struct {
typedef struct { typedef struct {
uint32_t capacity; uint32_t capacity;
int32_t dp_score, dp_max; int32_t dp_score, dp_max, dp_max2;
uint32_t blen; uint32_t blen;
uint32_t n_diff, n_ambi; uint32_t n_diff, n_ambi;
uint32_t n_cigar; uint32_t n_cigar;
@ -77,8 +77,9 @@ typedef struct {
int min_chain_score; int min_chain_score;
float min_seedcov_ratio; float min_seedcov_ratio;
float pri_ratio;
float mask_level; float mask_level;
float pri_ratio;
int best_n;
int max_join_long, max_join_short; int max_join_long, max_join_short;
int min_join_flank_sc; int min_join_flank_sc;
@ -92,7 +93,7 @@ typedef struct {
int mid_occ; int mid_occ;
} mm_mapopt_t; } mm_mapopt_t;
extern int mm_verbose; extern int mm_verbose, mm_dbg_flag;
extern double mm_realtime0; extern double mm_realtime0;
struct mm_tbuf_s; struct mm_tbuf_s;

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "1 July 2017" "minimap2-2.0-r141-pre" "Bioinformatics tools" .TH minimap2 1 "1 July 2017" "minimap2-2.0-r145-pre" "Bioinformatics tools"
.SH NAME .SH NAME
.PP .PP
minimap2 - mapping and alignment between collections of DNA sequences minimap2 - mapping and alignment between collections of DNA sequences
@ -142,21 +142,27 @@ not using
.BR -H ) .BR -H )
minus base-2 logarithm gap penalty. It is computed with dynamic programming. minus base-2 logarithm gap penalty. It is computed with dynamic programming.
.TP .TP
.B -S .B -X
Perform all-vs-all mapping. In this mode, if the query sequence name is Perform all-vs-all mapping. In this mode, if the query sequence name is
lexicographically larger than the target sequence name, the hits between them lexicographically larger than the target sequence name, the hits between them
will be suppressed; if the query sequence name is the same as the target name, will be suppressed; if the query sequence name is the same as the target name,
diagonal minimizer hits will also be suppressed. diagonal minimizer hits will also be suppressed.
.TP .TP
.BI -p \ FLOAT .BI -p \ FLOAT
Minimal secondary-to-primary score ratio to output secondary mappings [2]. Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
Between two chains overlaping over half of the shorter chain (controled by Between two chains overlaping over half of the shorter chain (controled by
.BR --mask-level ), .BR --mask-level ),
the chain with a lower score is secondary to the chain with a higher score. the chain with a lower score is secondary to the chain with a higher score.
If the ratio of the scores is below If the ratio of the scores is below
.IR FLOAT , .IR FLOAT ,
the secondary chain will not be outputted or extended with DP alignment later. the secondary chain will not be outputted or extended with DP alignment later.
The default value suppresses all secondary chains. .TP
.BI -N \ INT
Output at most
.I INT
secondary alignments [5]. This option has no effect when
.B -X
is applied.
.TP .TP
.BI -D \ FLOAT .BI -D \ FLOAT
Discard a chain if the fraction of matching bases over the length of Discard a chain if the fraction of matching bases over the length of
@ -175,7 +181,7 @@ are:
.RS .RS
.TP 8 .TP 8
.B ava10k .B ava10k
PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -Sw5 -p0 -m100 -D.05) PacBio/Oxford Nanopore all-vs-all overlap mapping (-Hk19 -w5 -Xp0 -m100 -D.05)
.TP .TP
.B map10k .B map10k
PacBio/Oxford Nanopore read to reference mapping (-Hk19) PacBio/Oxford Nanopore read to reference mapping (-Hk19)
@ -220,14 +226,24 @@ by default.
Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag. Generate CIGAR. In PAF, the CIGAR is written to the `cg' custom tag.
.TP .TP
.BI -t \ INT .BI -t \ INT
Number of threads [3]. Minimap2 uses at most three threads when collecting Number of threads [3]. Minimap2 uses at most three threads when indexing target
minimizers on target sequences, and uses up to sequences, and uses up to
.IR INT +1 .IR INT +1
threads when mapping (the extra thread is for I/O, which is frequently idle and threads when mapping (the extra thread is for I/O, which is frequently idle and
takes little CPU time). takes little CPU time).
.TP .TP
.B -V .B -V
Print version number to stdout Print version number to stdout
.SS Miscellaneous options
.TP 10
.B --no-kalloc
Use the libc default allocator instead of the kalloc thread-local allocator.
This debugging option is mostly used with Valgrind to detect invalid memory
accesses. Minimap2 runs slower with this option, especially in the
multi-threading mode.
.TP
.B --print-qname
Print query names to stderr, mostly to see which query is crashing minimap2.
.SH OUTPUT FORMAT .SH OUTPUT FORMAT
.PP .PP
Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by Minimap2 outputs mapping positions in the Pairwise mApping Format (PAF) by

1
misc.c
View File

@ -3,6 +3,7 @@
#include "minimap.h" #include "minimap.h"
int mm_verbose = 3; int mm_verbose = 3;
int mm_dbg_flag = 0;
double mm_realtime0; double mm_realtime0;
double cputime() double cputime()

View File

@ -8,6 +8,9 @@
#define MM_PARENT_UNSET (-1) #define MM_PARENT_UNSET (-1)
#define MM_PARENT_TMP_PRI (-2) #define MM_PARENT_TMP_PRI (-2)
#define MM_DBG_NO_KALLOC 0x1
#define MM_DBG_PRINT_QNAME 0x2
#ifndef kroundup32 #ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif #endif
@ -40,30 +43,14 @@ mm_reg1_t *mm_gen_regs(void *km, int qlen, int n_u, uint64_t *u, mm128_t *a);
void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a); void mm_split_reg(mm_reg1_t *r, mm_reg1_t *r2, int n, int qlen, mm128_t *a);
void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs); void mm_sync_regs(void *km, int n_regs, mm_reg1_t *regs);
void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r); void mm_set_parent(void *km, float mask_level, int n, mm_reg1_t *r);
void mm_update_parent(void *km, float mask_level, int n, mm_reg1_t *r); void mm_select_sub(void *km, float mask_level, float pri_ratio, int best_n, int *n_, mm_reg1_t *r);
void mm_select_sub(void *km, float mask_level, float pri_ratio, int *n_, mm_reg1_t *r);
void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs); void mm_filter_regs(void *km, const mm_mapopt_t *opt, int *n_regs, mm_reg1_t *regs);
void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int n_regs, mm_reg1_t *regs, mm128_t *a); void mm_join_long(void *km, const mm_mapopt_t *opt, int qlen, int *n_regs, mm_reg1_t *regs, mm128_t *a);
void mm_hit_sort_by_dp(void *km, int *n_regs, mm_reg1_t *r);
void mm_set_mapq(int n_regs, mm_reg1_t *regs); void mm_set_mapq(int n_regs, mm_reg1_t *regs);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif
static inline void mm_reg_set_coor(mm_reg1_t *r, int32_t qlen, const mm128_t *a)
{
int32_t k = r->as, q_span = (int32_t)(a[k].y>>32&0xff);
r->rev = a[k].x>>63;
r->rid = a[k].x<<1>>33;
r->rs = (int32_t)a[k].x + 1 > q_span? (int32_t)a[k].x + 1 - q_span : 0; // NB: target span may be shorter, so this test is necessary
r->re = (int32_t)a[k + r->cnt - 1].x + 1;
if (!r->rev) {
r->qs = (int32_t)a[k].y + 1 - q_span;
r->qe = (int32_t)a[k + r->cnt - 1].y + 1;
} else {
r->qs = qlen - ((int32_t)a[k + r->cnt - 1].y + 1);
r->qe = qlen - ((int32_t)a[k].y + 1 - q_span);
}
}
#endif #endif