r287: refined CLI options and manpage
This commit is contained in:
parent
0f4c823b0c
commit
d240318741
8
align.c
8
align.c
|
|
@ -135,7 +135,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|||
for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); fputc('\n', stderr);
|
||||
for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr);
|
||||
}
|
||||
if (opt->flag & MM_F_CDNA)
|
||||
if (opt->flag & MM_F_SPLICE)
|
||||
ksw_extd2_noins_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, 0, -1, opt->zdrop, flag, ez);
|
||||
else if (opt->q == opt->q2 && opt->e == opt->e2)
|
||||
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, opt->zdrop, flag, ez);
|
||||
|
|
@ -259,7 +259,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
bw = (int)(opt->bw * 1.5 + 1.);
|
||||
|
||||
r2->cnt = 0;
|
||||
if (!(opt->flag & MM_F_CDNA))
|
||||
if (!(opt->flag & MM_F_SPLICE))
|
||||
mm_fix_bad_ends(r, a, opt->bw, &as1, &cnt1);
|
||||
else as1 = r->as, cnt1 = r->cnt;
|
||||
mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10);
|
||||
|
|
@ -362,7 +362,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
assert(re1 - rs1 <= re0 - rs0);
|
||||
if (r->p) {
|
||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||
mm_update_extra(r->p, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, (opt->flag&MM_F_CDNA)? opt->q2 : 0);
|
||||
mm_update_extra(r->p, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, (opt->flag&MM_F_SPLICE)? opt->q2 : 0);
|
||||
}
|
||||
|
||||
kfree(km, tseq);
|
||||
|
|
@ -403,7 +403,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
if (ez->n_cigar == 0) goto end_align1_inv; // should never be here
|
||||
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
|
||||
r_inv->p->dp_score = ez->max;
|
||||
mm_update_extra(r_inv->p, qseq + q_off, tseq + t_off, mat, opt->q, opt->e, (opt->flag&MM_F_CDNA)? opt->q2 : 0);
|
||||
mm_update_extra(r_inv->p, qseq + q_off, tseq + t_off, mat, opt->q, opt->e, (opt->flag&MM_F_SPLICE)? opt->q2 : 0);
|
||||
r_inv->id = -1;
|
||||
r_inv->parent = MM_PARENT_UNSET;
|
||||
r_inv->inv = 1;
|
||||
|
|
|
|||
43
main.c
43
main.c
|
|
@ -8,7 +8,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r286-dirty"
|
||||
#define MM_VERSION "2.0-r287-dirty"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -31,6 +31,8 @@ static struct option long_options[] = {
|
|||
{ "max-chain-skip", required_argument, 0, 0 },
|
||||
{ "min-dp-len", required_argument, 0, 0 },
|
||||
{ "print-aln-seq", no_argument, 0, 0 },
|
||||
{ "splice", no_argument, 0, 0 },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
{ "min-count", required_argument, 0, 'n' },
|
||||
{ "min-chain-score",required_argument, 0, 'm' },
|
||||
|
|
@ -40,10 +42,21 @@ static struct option long_options[] = {
|
|||
{ 0, 0, 0, 0}
|
||||
};
|
||||
|
||||
static inline int64_t mm_parse_num(const char *str)
|
||||
{
|
||||
double x;
|
||||
char *p;
|
||||
x = strtod(optarg, &p);
|
||||
if (*p == 'G' || *p == 'g') x *= 1e9;
|
||||
else if (*p == 'M' || *p == 'm') x *= 1e6;
|
||||
else if (*p == 'K' || *p == 'k') x *= 1e3;
|
||||
return (int64_t)(x + .499);
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
mm_mapopt_t opt;
|
||||
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx, idx_par_set = 0;
|
||||
int i, c, k = 15, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx, idx_par_set = 0, max_intron_len = 0;
|
||||
int minibatch_size = 200000000;
|
||||
uint64_t batch_size = 4000000000ULL;
|
||||
mm_bseq_file_t *fp = 0;
|
||||
|
|
@ -59,12 +72,12 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'k') k = atoi(optarg), idx_par_set = 1;
|
||||
else if (c == 'H') is_hpc = 1, idx_par_set = 1;
|
||||
else if (c == 'd') fnw = optarg; // the above are indexing related options, except -I
|
||||
else if (c == 'r') opt.bw = atoi(optarg);
|
||||
else if (c == 'r') opt.bw = (int)mm_parse_num(optarg);
|
||||
else if (c == 'f') opt.mid_occ_frac = atof(optarg);
|
||||
else if (c == 't') n_threads = atoi(optarg);
|
||||
else if (c == 'v') mm_verbose = atoi(optarg);
|
||||
else if (c == 'g') opt.max_gap = atoi(optarg);
|
||||
else if (c == 'G') opt.max_gap_ref = atoi(optarg);
|
||||
else if (c == 'g') opt.max_gap = (int)mm_parse_num(optarg);
|
||||
else if (c == 'G') max_intron_len = (int)mm_parse_num(optarg);
|
||||
else if (c == 'N') opt.best_n = atoi(optarg);
|
||||
else if (c == 'p') opt.pri_ratio = atof(optarg);
|
||||
else if (c == 'M') opt.mask_level = atof(optarg);
|
||||
|
|
@ -80,6 +93,8 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'B') opt.b = atoi(optarg);
|
||||
else if (c == 'z') opt.zdrop = atoi(optarg);
|
||||
else if (c == 's') opt.min_dp_max = atoi(optarg);
|
||||
else if (c == 'I') batch_size = mm_parse_num(optarg);
|
||||
else if (c == 'K') minibatch_size = (int)mm_parse_num(optarg);
|
||||
else if (c == 0 && long_idx == 0) bucket_bits = atoi(optarg); // --bucket-bits
|
||||
else if (c == 0 && long_idx == 2) keep_name = 0; // --int-rname
|
||||
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
|
||||
|
|
@ -89,6 +104,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
|
||||
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
|
||||
else if (c == 0 && long_idx == 9) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ; // --print-aln-seq
|
||||
else if (c == 0 && long_idx ==10) opt.flag |= MM_F_SPLICE; // --splice
|
||||
else if (c == 'V') {
|
||||
puts(MM_VERSION);
|
||||
return 0;
|
||||
|
|
@ -98,15 +114,6 @@ int main(int argc, char *argv[])
|
|||
} else if (c == 'E') {
|
||||
opt.e = opt.e2 = strtol(optarg, &s, 10);
|
||||
if (*s == ',') opt.e2 = strtol(s + 1, &s, 10);
|
||||
} else if (c == 'I' || c == 'K') {
|
||||
double x;
|
||||
char *p;
|
||||
x = strtod(optarg, &p);
|
||||
if (*p == 'G' || *p == 'g') x *= 1e9;
|
||||
else if (*p == 'M' || *p == 'm') x *= 1e6;
|
||||
else if (*p == 'K' || *p == 'k') x *= 1e3;
|
||||
if (c == 'I') batch_size = (uint64_t)(x + .499);
|
||||
else minibatch_size = (uint64_t)(x + .499);
|
||||
} else if (c == 'x') {
|
||||
if (strcmp(optarg, "ava-ont") == 0) {
|
||||
opt.flag |= MM_F_AVA | MM_F_NO_SELF;
|
||||
|
|
@ -130,9 +137,9 @@ int main(int argc, char *argv[])
|
|||
k = 19, w = 19;
|
||||
opt.a = 1, opt.b = 9, opt.q = 16, opt.q2 = 41, opt.e = 2, opt.e2 = 1, opt.zdrop = 200;
|
||||
opt.min_dp_max = 200;
|
||||
} else if (strcmp(optarg, "cdna") == 0) {
|
||||
} else if (strcmp(optarg, "splice") == 0 || strcmp(optarg, "cdna") == 0) {
|
||||
k = 15, w = 5;
|
||||
opt.flag |= MM_F_CDNA;
|
||||
opt.flag |= MM_F_SPLICE;
|
||||
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 100000;
|
||||
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0;
|
||||
opt.zdrop = 200;
|
||||
|
|
@ -143,6 +150,8 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
}
|
||||
if (w < 0) w = (int)(.6666667 * k + .499);
|
||||
if ((opt.flag & MM_F_SPLICE) && max_intron_len > 0)
|
||||
opt.max_gap_ref = opt.bw = max_intron_len;
|
||||
|
||||
if (argc == optind) {
|
||||
fprintf(stderr, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
|
||||
|
|
@ -163,6 +172,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
|
||||
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
|
||||
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
|
||||
fprintf(stderr, " -G NUM max intron length (only effective following -x splice) [100k]\n");
|
||||
fprintf(stderr, " Alignment:\n");
|
||||
fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
|
||||
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
|
||||
|
|
@ -187,6 +197,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(stderr, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
|
||||
fprintf(stderr, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (PacBio read overlap)\n");
|
||||
fprintf(stderr, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 -K500m --max-chain-skip 25 (ONT read overlap)\n");
|
||||
fprintf(stderr, " splice: -k15 -w5 --splice -g2000 -G100k -A1 -B2 -O2,32 -E1,0 -z200 (long-read spliced aln)\n");
|
||||
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
|
||||
return 1;
|
||||
}
|
||||
|
|
|
|||
6
map.c
6
map.c
|
|
@ -245,7 +245,7 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
i == 0? 0 : ((int32_t)a[i].y - (int32_t)a[i-1].y) - ((int32_t)a[i].x - (int32_t)a[i-1].x));
|
||||
|
||||
max_gap_ref = opt->max_gap_ref >= 0? opt->max_gap_ref : opt->max_gap;
|
||||
n_u = mm_chain_dp(max_gap_ref, opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, !!(opt->flag&MM_F_CDNA), n_a, a, &u, b->km);
|
||||
n_u = mm_chain_dp(max_gap_ref, opt->max_gap, opt->bw, opt->max_chain_skip, opt->min_cnt, opt->min_chain_score, !!(opt->flag&MM_F_SPLICE), n_a, a, &u, b->km);
|
||||
regs = mm_gen_regs(b->km, qlen, n_u, u, a);
|
||||
*n_regs = n_u;
|
||||
|
||||
|
|
@ -258,7 +258,7 @@ mm_reg1_t *mm_map_frag(const mm_mapopt_t *opt, const mm_idx_t *mi, mm_tbuf_t *b,
|
|||
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap
|
||||
mm_set_parent(b->km, opt->mask_level, *n_regs, regs);
|
||||
mm_select_sub(b->km, opt->mask_level, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
|
||||
if (!(opt->flag & MM_F_CDNA))
|
||||
if (!(opt->flag & MM_F_SPLICE))
|
||||
mm_join_long(b->km, opt, qlen, n_regs, regs, a); // TODO: this can be applied to all-vs-all in principle
|
||||
}
|
||||
if (opt->flag & MM_F_CIGAR) {
|
||||
|
|
@ -347,7 +347,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
int intron_thres = -1;
|
||||
for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]);
|
||||
free(s->buf);
|
||||
if (p->opt->flag & MM_F_CDNA)
|
||||
if (p->opt->flag & MM_F_SPLICE)
|
||||
intron_thres = mm_min_intron_len(p->opt->q, p->opt->e, p->opt->q2);
|
||||
if ((p->opt->flag & MM_F_OUT_CS) && !(mm_dbg_flag & MM_DBG_NO_KALLOC)) km = km_init();
|
||||
for (i = 0; i < s->n_seq; ++i) {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@
|
|||
#define MM_F_NO_QUAL 0x10
|
||||
#define MM_F_OUT_CG 0x20
|
||||
#define MM_F_OUT_CS 0x40
|
||||
#define MM_F_CDNA 0x80
|
||||
#define MM_F_SPLICE 0x80
|
||||
|
||||
#define MM_IDX_MAGIC "MMI\2"
|
||||
|
||||
|
|
|
|||
28
minimap2.1
28
minimap2.1
|
|
@ -162,6 +162,12 @@ secondary alignments [5]. This option has no effect when
|
|||
.B -X
|
||||
is applied.
|
||||
.TP
|
||||
.BI -G \ NUM
|
||||
Maximal intron length in the splice mode. This option also changes the
|
||||
bandwidth to
|
||||
.IR NUM .
|
||||
Increasing this option slows down spliced alignment.
|
||||
.TP
|
||||
.BI --max-chain-skip \ INT
|
||||
A heuristics that stops chaining early [50]. Minimap2 uses dynamic programming
|
||||
for chaining. The time complexity is quadratic in the number of seeds. This
|
||||
|
|
@ -277,13 +283,13 @@ Long assembly to reference mapping
|
|||
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200
|
||||
.BR -z200 ).
|
||||
Up to 10% sequence divergence.
|
||||
.TP 8
|
||||
.TP
|
||||
.B ava-pb
|
||||
PacBio all-vs-all overlap mapping
|
||||
.RB ( -Hk19
|
||||
.B -w5 -Xp0 -m100 -K500m -g10000 --max-chain-skip
|
||||
.BR 25 ).
|
||||
.TP 8
|
||||
.TP
|
||||
.B ava-ont
|
||||
Oxford Nanopore all-vs-all overlap mapping
|
||||
.RB ( -k15
|
||||
|
|
@ -292,6 +298,20 @@ Oxford Nanopore all-vs-all overlap mapping
|
|||
Similarly, the major difference from
|
||||
.B ava-pb
|
||||
is that this preset is not using HPC minimizers.
|
||||
.TP
|
||||
.B splice
|
||||
Long-read spliced alignment
|
||||
.RB ( -k15
|
||||
.B -w5 --splice -g2000 -G100k -A1 -B2 -O2,32 -E1,0
|
||||
.BR -z200 ).
|
||||
As of now, minimap2 only finds approximate exon boundaries. The true boundaries
|
||||
are usually within 10bp around the reported positions. In the splice mode,
|
||||
1) long deletions are taken as introns and represented as the
|
||||
.RB ` N '
|
||||
CIGAR operator; 2) long insertions are disabled; 3) deletion and insertion gap
|
||||
costs are different during chaining; 4) the computation of the
|
||||
.RB ` ms '
|
||||
tag ignores introns to demote hits to pseudogenes.
|
||||
.RE
|
||||
.SS Miscellaneous options
|
||||
.TP 10
|
||||
|
|
@ -367,6 +387,10 @@ Minimap2 does not work well with Illumina short reads as of now.
|
|||
*
|
||||
Minimap2 requires SSE2 instructions to compile. It is possible to add
|
||||
non-SSE2 support, but it would make minimap2 slower by several times.
|
||||
.TP
|
||||
*
|
||||
In the splice mode, minimap2 is unable to find the precise exon boundaries.
|
||||
The true bounraries are usually within 10bp around the reported locations.
|
||||
.SH SEE ALSO
|
||||
.PP
|
||||
miniasm(1), minimap(1), bwa(1).
|
||||
|
|
|
|||
Loading…
Reference in New Issue