r671: cleanup command line options

This commit is contained in:
Heng Li 2018-01-31 13:59:52 -05:00
parent 46d6349af4
commit da6947cfa3
5 changed files with 94 additions and 64 deletions

56
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "getopt.h" #include "getopt.h"
#define MM_VERSION "2.7-r670-dirty" #define MM_VERSION "2.7-r671-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>
@ -28,7 +28,7 @@ static struct option long_options[] = {
{ "seed", required_argument, 0, 0 }, { "seed", required_argument, 0, 0 },
{ "no-kalloc", no_argument, 0, 0 }, { "no-kalloc", no_argument, 0, 0 },
{ "print-qname", no_argument, 0, 0 }, { "print-qname", no_argument, 0, 0 },
{ "no-self", no_argument, 0, 0 }, { "no-self", no_argument, 0, 'D' },
{ "print-seeds", no_argument, 0, 0 }, { "print-seeds", no_argument, 0, 0 },
{ "max-chain-skip", required_argument, 0, 0 }, { "max-chain-skip", required_argument, 0, 0 },
{ "min-dp-len", required_argument, 0, 0 }, { "min-dp-len", required_argument, 0, 0 },
@ -37,17 +37,19 @@ static struct option long_options[] = {
{ "cost-non-gt-ag", required_argument, 0, 'C' }, { "cost-non-gt-ag", required_argument, 0, 'C' },
{ "no-long-join", no_argument, 0, 0 }, { "no-long-join", no_argument, 0, 0 },
{ "sr", no_argument, 0, 0 }, { "sr", no_argument, 0, 0 },
{ "frag", optional_argument, 0, 0 }, { "frag", required_argument, 0, 0 },
{ "secondary", optional_argument, 0, 0 }, { "secondary", required_argument, 0, 0 },
{ "cs", optional_argument, 0, 0 }, { "cs", optional_argument, 0, 0 },
{ "end-bonus", required_argument, 0, 0 }, { "end-bonus", required_argument, 0, 0 },
{ "no-pairing", no_argument, 0, 0 }, { "no-pairing", no_argument, 0, 0 },
{ "splice-flank", optional_argument, 0, 0 }, { "splice-flank", required_argument, 0, 0 },
{ "idx-no-seq", no_argument, 0, 0 }, { "idx-no-seq", no_argument, 0, 0 },
{ "end-seed-pen", required_argument, 0, 0 }, // 21 { "end-seed-pen", required_argument, 0, 0 }, // 21
{ "for-only", no_argument, 0, 0 }, // 22 { "for-only", no_argument, 0, 0 }, // 22
{ "rev-only", no_argument, 0, 0 }, // 23 { "rev-only", no_argument, 0, 0 }, // 23
{ "heap-sort", optional_argument, 0, 0 }, // 24 { "heap-sort", required_argument, 0, 0 }, // 24
{ "all-chain", no_argument, 0, 'P' },
{ "dual", required_argument, 0, 0 }, // 26
{ "help", no_argument, 0, 'h' }, { "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' }, { "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' }, { "version", no_argument, 0, 'V' },
@ -70,9 +72,22 @@ static inline int64_t mm_parse_num(const char *str)
return (int64_t)(x + .499); return (int64_t)(x + .499);
} }
static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set)
{
if (yes_to_set) {
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag &= ~flag;
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
} else {
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag &= ~flag;
else if (strcmp(arg, "no") == 0 || strcmp(arg, "n") == 0) opt->flag |= flag;
else fprintf(stderr, "[WARNING]\033[1;31m option '--%s' only accepts 'yes' or 'no'.\033[0m\n", long_options[long_idx].name);
}
}
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
const char *opt_str = "2aSw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:"; const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:";
mm_mapopt_t opt; mm_mapopt_t opt;
mm_idxopt_t ipt; mm_idxopt_t ipt;
int i, c, n_threads = 3, long_idx; int i, c, n_threads = 3, long_idx;
@ -111,7 +126,9 @@ int main(int argc, char *argv[])
else if (c == 'p') opt.pri_ratio = atof(optarg); else if (c == 'p') opt.pri_ratio = atof(optarg);
else if (c == 'M') opt.mask_level = atof(optarg); else if (c == 'M') opt.mask_level = atof(optarg);
else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR; else if (c == 'c') opt.flag |= MM_F_OUT_CG | MM_F_CIGAR;
else if (c == 'X') opt.flag |= MM_F_AVA | MM_F_NO_SELF; else if (c == 'D') opt.flag |= MM_F_NO_DIAG;
else if (c == 'P') opt.flag |= MM_F_ALL_CHAINS;
else if (c == 'X') opt.flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN; // -D -P --no-long-join --dual=no
else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR; else if (c == 'a') opt.flag |= MM_F_OUT_SAM | MM_F_CIGAR;
else if (c == 'Q') opt.flag |= MM_F_NO_QUAL; else if (c == 'Q') opt.flag |= MM_F_NO_QUAL;
else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP; else if (c == 'Y') opt.flag |= MM_F_SOFTCLIP;
@ -133,7 +150,6 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed else if (c == 0 && long_idx == 2) opt.seed = atoi(optarg); // --seed
else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc else if (c == 0 && long_idx == 3) mm_dbg_flag |= MM_DBG_NO_KALLOC; // --no-kalloc
else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname else if (c == 0 && long_idx == 4) mm_dbg_flag |= MM_DBG_PRINT_QNAME; // --print-qname
else if (c == 0 && long_idx == 5) opt.flag |= MM_F_NO_SELF; // --no-self
else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed else if (c == 0 && long_idx == 6) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_SEED, n_threads = 1; // --print-seed
else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip else if (c == 0 && long_idx == 7) opt.max_chain_skip = atoi(optarg); // --max-chain-skip
else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len else if (c == 0 && long_idx == 8) opt.min_ksw_len = atoi(optarg); // --min-dp-len
@ -148,13 +164,9 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only else if (c == 0 && long_idx ==22) opt.flag |= MM_F_FOR_ONLY; // --for-only
else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only else if (c == 0 && long_idx ==23) opt.flag |= MM_F_REV_ONLY; // --rev-only
else if (c == 0 && long_idx == 14) { // --frag else if (c == 0 && long_idx == 14) { // --frag
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
opt.flag |= MM_F_FRAG_MODE;
else opt.flag &= ~MM_F_FRAG_MODE;
} else if (c == 0 && long_idx == 15) { // --secondary } else if (c == 0 && long_idx == 15) { // --secondary
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) yes_or_no(&opt, MM_F_NO_PRINT_2ND, long_idx, optarg, 0);
opt.flag &= ~MM_F_NO_PRINT_2ND;
else opt.flag |= MM_F_NO_PRINT_2ND;
} else if (c == 0 && long_idx == 16) { // --cs } else if (c == 0 && long_idx == 16) { // --cs
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR; opt.flag |= MM_F_OUT_CS | MM_F_CIGAR;
if (optarg == 0 || strcmp(optarg, "short") == 0) { if (optarg == 0 || strcmp(optarg, "short") == 0) {
@ -167,13 +179,11 @@ int main(int argc, char *argv[])
fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n"); fprintf(stderr, "[WARNING]\033[1;31m --cs only takes 'short' or 'long'. Invalid values are assumed to be 'short'.\033[0m\n");
} }
} else if (c == 0 && long_idx == 19) { // --splice-flank } else if (c == 0 && long_idx == 19) { // --splice-flank
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) yes_or_no(&opt, MM_F_SPLICE_FLANK, long_idx, optarg, 1);
opt.flag |= MM_F_SPLICE_FLANK;
else opt.flag &= ~MM_F_SPLICE_FLANK;
} else if (c == 0 && long_idx == 24) { // --heap-sort } else if (c == 0 && long_idx == 24) { // --heap-sort
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0) yes_or_no(&opt, MM_F_HEAP_SORT, long_idx, optarg, 1);
opt.flag |= MM_F_HEAP_SORT; } else if (c == 0 && long_idx == 26) { // --dual
else opt.flag &= ~MM_F_HEAP_SORT; yes_or_no(&opt, MM_F_NO_DUAL, long_idx, optarg, 0);
} else if (c == 'S') { } else if (c == 'S') {
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG; opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
if (mm_verbose >= 2) if (mm_verbose >= 2)
@ -261,8 +271,8 @@ int main(int argc, char *argv[])
fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n"); fprintf(fp_help, " map-ont: -k15 (Oxford Nanopore vs reference mapping)\n");
fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n"); fprintf(fp_help, " asm5: -k19 -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 (asm to ref mapping; break at 5%% div.)\n");
fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n"); fprintf(fp_help, " asm10: -k19 -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 (asm to ref mapping; break at 10%% div.)\n");
fprintf(fp_help, " ava-pb: -Hk19 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n"); fprintf(fp_help, " ava-pb: -Hk19 -Xw5 -m100 -g10000 --max-chain-skip 25 (PacBio read overlap)\n");
fprintf(fp_help, " ava-ont: -k15 -w5 -Xp0 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n"); fprintf(fp_help, " ava-ont: -k15 -Xw5 -m100 -g10000 --max-chain-skip 25 (ONT read overlap)\n");
fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n"); fprintf(fp_help, " splice: long-read spliced alignment (see minimap2.1 for details)\n");
fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n"); fprintf(fp_help, " sr: short single-end reads without splicing (see minimap2.1 for details)\n");
fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); fprintf(fp_help, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");

12
map.c
View File

@ -116,15 +116,15 @@ static mm_match_t *collect_matches(void *km, int *_n_m, int max_occ, const mm_id
static inline int skip_seed(int flag, uint64_t r, const mm_match_t *q, const char *qname, int qlen, const mm_idx_t *mi, int *is_self) static inline int skip_seed(int flag, uint64_t r, const mm_match_t *q, const char *qname, int qlen, const mm_idx_t *mi, int *is_self)
{ {
*is_self = 0; *is_self = 0;
if (qname && (flag & (MM_F_NO_SELF|MM_F_AVA))) { if (qname && (flag & (MM_F_NO_DIAG|MM_F_NO_DUAL))) {
const mm_idx_seq_t *s = &mi->seq[r>>32]; const mm_idx_seq_t *s = &mi->seq[r>>32];
int cmp; int cmp;
cmp = strcmp(qname, s->name); cmp = strcmp(qname, s->name);
if ((flag&MM_F_NO_SELF) && cmp == 0 && s->len == qlen) { if ((flag&MM_F_NO_DIAG) && cmp == 0 && s->len == qlen) {
if ((uint32_t)r>>1 == (q->q_pos>>1)) return 1; // avoid the diagnonal anchors if ((uint32_t)r>>1 == (q->q_pos>>1)) return 1; // avoid the diagnonal anchors
if ((r&1) == (q->q_pos&1)) *is_self = 1; // this flag is used to avoid spurious extension on self chain if ((r&1) == (q->q_pos&1)) *is_self = 1; // this flag is used to avoid spurious extension on self chain
} }
if ((flag&MM_F_AVA) && cmp > 0) // all-vs-all mode: map once if ((flag&MM_F_NO_DUAL) && cmp > 0) // all-vs-all mode: map once
return 1; return 1;
} }
if (flag & (MM_F_FOR_ONLY|MM_F_REV_ONLY)) { if (flag & (MM_F_FOR_ONLY|MM_F_REV_ONLY)) {
@ -237,11 +237,11 @@ static mm128_t *collect_seed_hits(void *km, const mm_mapopt_t *opt, int max_occ,
static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a) static void chain_post(const mm_mapopt_t *opt, int max_chain_gap_ref, const mm_idx_t *mi, void *km, int qlen, int n_segs, const int *qlens, int *n_regs, mm_reg1_t *regs, mm128_t *a)
{ {
if (!(opt->flag & MM_F_AVA)) { // don't choose primary mapping(s) for read overlap if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b); mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); if (n_segs <= 1) mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs); else mm_select_sub_multi(km, opt->pri_ratio, 0.2f, 0.7f, max_chain_gap_ref, mi->k*2, opt->best_n, n_segs, qlens, n_regs, regs);
if (!(opt->flag & MM_F_SPLICE) && !(opt->flag & MM_F_SR) && !(opt->flag & MM_F_NO_LJOIN)) if (!(opt->flag & (MM_F_SPLICE|MM_F_SR|MM_F_NO_LJOIN))) // long join not working well without primary chains
mm_join_long(km, opt, qlen, n_regs, regs, a); mm_join_long(km, opt, qlen, n_regs, regs, a);
} }
} }
@ -250,7 +250,7 @@ static mm_reg1_t *align_regs(const mm_mapopt_t *opt, const mm_idx_t *mi, void *k
{ {
if (!(opt->flag & MM_F_CIGAR)) return regs; if (!(opt->flag & MM_F_CIGAR)) return regs;
regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs() regs = mm_align_skeleton(km, opt, mi, qlen, seq, n_regs, regs, a); // this calls mm_filter_regs()
if (!(opt->flag & MM_F_AVA)) { if (!(opt->flag & MM_F_ALL_CHAINS)) { // don't choose primary mapping(s)
mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b); mm_set_parent(km, opt->mask_level, *n_regs, regs, opt->a * 2 + opt->b);
mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs); mm_select_sub(km, opt->pri_ratio, mi->k*2, opt->best_n, n_regs, regs);
mm_set_sam_pri(*n_regs, regs); mm_set_sam_pri(*n_regs, regs);

View File

@ -5,20 +5,20 @@
#include <stdio.h> #include <stdio.h>
#include <sys/types.h> #include <sys/types.h>
#define MM_F_NO_SELF 0x001 #define MM_F_NO_DIAG 0x001 // no exact diagonal hit
#define MM_F_AVA 0x002 #define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
#define MM_F_CIGAR 0x004 #define MM_F_CIGAR 0x004
#define MM_F_OUT_SAM 0x008 #define MM_F_OUT_SAM 0x008
#define MM_F_NO_QUAL 0x010 #define MM_F_NO_QUAL 0x010
#define MM_F_OUT_CG 0x020 #define MM_F_OUT_CG 0x020
#define MM_F_OUT_CS 0x040 #define MM_F_OUT_CS 0x040
#define MM_F_SPLICE 0x080 // splice mode #define MM_F_SPLICE 0x080 // splice mode
#define MM_F_SPLICE_FOR 0x100 // match GT-AG #define MM_F_SPLICE_FOR 0x100 // match GT-AG
#define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG #define MM_F_SPLICE_REV 0x200 // match CT-AC, the reverse complement of GT-AG
#define MM_F_NO_LJOIN 0x400 #define MM_F_NO_LJOIN 0x400
#define MM_F_OUT_CS_LONG 0x800 #define MM_F_OUT_CS_LONG 0x800
#define MM_F_SR 0x1000 #define MM_F_SR 0x1000
#define MM_F_FRAG_MODE 0x2000 #define MM_F_FRAG_MODE 0x2000
#define MM_F_NO_PRINT_2ND 0x4000 #define MM_F_NO_PRINT_2ND 0x4000
#define MM_F_2_IO_THREADS 0x8000 #define MM_F_2_IO_THREADS 0x8000
#define MM_F_LONG_CIGAR 0x10000 #define MM_F_LONG_CIGAR 0x10000
@ -28,6 +28,7 @@
#define MM_F_FOR_ONLY 0x100000 #define MM_F_FOR_ONLY 0x100000
#define MM_F_REV_ONLY 0x200000 #define MM_F_REV_ONLY 0x200000
#define MM_F_HEAP_SORT 0x400000 #define MM_F_HEAP_SORT 0x400000
#define MM_F_ALL_CHAINS 0x800000
#define MM_I_HPC 0x1 #define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2 #define MM_I_NO_SEQ 0x2

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "26 January 2018" "minimap2-2.7-dirty (r664)" "Bioinformatics tools" .TH minimap2 1 "31 January 2018" "minimap2-2.8-dirty (r671)" "Bioinformatics tools"
.SH NAME .SH NAME
.PP .PP
minimap2 - mapping and alignment between collections of DNA sequences minimap2 - mapping and alignment between collections of DNA sequences
@ -129,7 +129,7 @@ Ignore top
fraction of most frequent minimizers [0.0002] fraction of most frequent minimizers [0.0002]
.TP .TP
.BI -g \ INT .BI -g \ INT
Stop chain enlongation if there are no minimizers in Stop chain enlongation if there are no minimizers within
.IR INT -bp .IR INT -bp
[10000]. [10000].
.TP .TP
@ -148,11 +148,28 @@ Discard chains with chaining score
[40]. Chaining score equals the approximate number of matching bases minus a [40]. Chaining score equals the approximate number of matching bases minus a
concave gap penalty. It is computed with dynamic programming. concave gap penalty. It is computed with dynamic programming.
.TP .TP
.B -D
If query sequence name/length are identical to the target name/length, ignore
diagonal anchors. This option also reduces DP-based extension along the
diagonal.
.TP
.B -P
Retain all chains and don't attempt to set primary chains. Options
.B -p
and
.B -N
have no effect when this option is in use.
.TP
.BR --dual = yes | no
During chaining, whether to skip pairs wherein the query name is
lexicographically greater than the target name [yes]
.TP
.B -X .B -X
Perform all-vs-all mapping. In this mode, if the query sequence name is Equivalent to
lexicographically larger than the target sequence name, the hits between them .RB ' -DP
will be suppressed; if the query sequence name is the same as the target name, .BR --dual = no
diagonal minimizer hits will also be suppressed. .BR --no-long-join '.
Primarily used for all-vs-all read overlapping.
.TP .TP
.BI -p \ FLOAT .BI -p \ FLOAT
Minimal secondary-to-primary score ratio to output secondary mappings [0.8]. Minimal secondary-to-primary score ratio to output secondary mappings [0.8].
@ -162,6 +179,9 @@ the chain with a lower score is secondary to the chain with a higher score.
If the ratio of the scores is below If the ratio of the scores is below
.IR FLOAT , .IR FLOAT ,
the secondary chain will not be outputted or extended with DP alignment later. the secondary chain will not be outputted or extended with DP alignment later.
This option has no effect when
.B -X
is applied.
.TP .TP
.BI -N \ INT .BI -N \ INT
Output at most Output at most
@ -179,7 +199,7 @@ Increasing this option slows down spliced alignment. [200k]
.TP .TP
.BI -F \ NUM .BI -F \ NUM
Maximum fragment length (aka insert size; effective with Maximum fragment length (aka insert size; effective with
.BR -xsr / --frag) .BR -xsr / --frag = yes )
[800] [800]
.TP .TP
.BI -M \ FLOAT .BI -M \ FLOAT
@ -209,7 +229,7 @@ applies a second round of chaining with a higher minimizer occurrence threshold
if no good chain is found. In addition, minimap2 attempts to patch gaps between if no good chain is found. In addition, minimap2 attempts to patch gaps between
seeds with ungapped alignment. seeds with ungapped alignment.
.TP .TP
.BR --frag [= no | yes ] .BR --frag = no | yes
Whether to enable the fragment mode [no] Whether to enable the fragment mode [no]
.TP .TP
.B --for-only .B --for-only
@ -220,7 +240,7 @@ strand of the reference and the second read to the reverse stand.
.B --rev-only .B --rev-only
Only map to the reverse complement strand of the reference sequences. Only map to the reverse complement strand of the reference sequences.
.TP .TP
.BR --heap-sort [= no | yes ] .BR --heap-sort = no | yes
If yes, sort anchors with heap merge, instead of radix sort. Heap merge is If yes, sort anchors with heap merge, instead of radix sort. Heap merge is
faster for short reads, but slower for long reads. [no] faster for short reads, but slower for long reads. [no]
.SS Alignment options .SS Alignment options
@ -272,14 +292,13 @@ no attempt to match GT-AG [n]
.BI --end-bonus \ INT .BI --end-bonus \ INT
Score bonus when alignment extends to the end of the query sequence [0]. Score bonus when alignment extends to the end of the query sequence [0].
.TP .TP
.BR --splice-flank [= yes | no ] .BR --splice-flank = yes | no
Assume the next base to a Assume the next base to a
.B GT .B GT
donor site tends to be A/G (91% in human and 92% in mouse) and the preceding donor site tends to be A/G (91% in human and 92% in mouse) and the preceding
base to a base to a
.B AG .B AG
acceptor tends to be C/T [yes with acceptor tends to be C/T [no].
.BR --splice ].
This trend is evolutionarily conservative, all the way to S. cerevisiae This trend is evolutionarily conservative, all the way to S. cerevisiae
(PMID:18688272). Specifying this option generally leads to higher junction (PMID:18688272). Specifying this option generally leads to higher junction
accuracy by several percents, so it is applied by default with accuracy by several percents, so it is applied by default with
@ -369,7 +388,7 @@ K/M/G/k/m/g suffix is accepted. A large
helps load balancing in the multi-threading mode, at the cost of increased helps load balancing in the multi-threading mode, at the cost of increased
memory. memory.
.TP .TP
.BR --secondary [= yes | no ] .BR --secondary = yes | no
Whether to output secondary alignments [yes] Whether to output secondary alignments [yes]
.TP .TP
.B --version .B --version
@ -416,13 +435,13 @@ Up to 10% sequence divergence.
.B ava-pb .B ava-pb
PacBio all-vs-all overlap mapping PacBio all-vs-all overlap mapping
.RB ( -Hk19 .RB ( -Hk19
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip .B -Xw5 -m100 -g10000 --max-chain-skip
.BR 25 ). .BR 25 ).
.TP .TP
.B ava-ont .B ava-ont
Oxford Nanopore all-vs-all overlap mapping Oxford Nanopore all-vs-all overlap mapping
.RB ( -k15 .RB ( -k15
.B -w5 -Xp0 -m100 -g10000 --max-chain-skip .B -Xw5 -m100 -g10000 --max-chain-skip
.BR 25 ). .BR 25 ).
Similarly, the major difference from Similarly, the major difference from
.B ava-pb .B ava-pb
@ -444,8 +463,8 @@ tag ignores introns to demote hits to pseudogenes.
.B sr .B sr
Short single-end reads without splicing Short single-end reads without splicing
.RB ( -k21 .RB ( -k21
.B -w11 --sr --frag -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20 .B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
.B -s40 -g200 -2K50m --heap-sort .B -s40 -g200 -2K50m --heap-sort=yes
.BR --secondary=no ). .BR --secondary=no ).
.RE .RE
.SS Miscellaneous options .SS Miscellaneous options
@ -539,8 +558,8 @@ where seed positions may be suboptimal. This should not be a big concern
because even the optimal alignment may be wrong in such regions. because even the optimal alignment may be wrong in such regions.
.TP .TP
* *
Minimap2 requires SSE2 instructions to compile. It is possible to add Minimap2 requires SSE2 or NEON instructions to compile. It is possible to add
non-SSE2 support, but it would make minimap2 slower by several times. non-SSE2/NEON support, but it would make minimap2 slower by several times.
.SH SEE ALSO .SH SEE ALSO
.PP .PP
miniasm(1), minimap(1), bwa(1). miniasm(1), minimap(1), bwa(1).

View File

@ -58,11 +58,11 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mm_mapopt_init(mo); mm_mapopt_init(mo);
} else if (strcmp(preset, "ava-ont") == 0) { } else if (strcmp(preset, "ava-ont") == 0) {
io->flag = 0, io->k = 15, io->w = 5; io->flag = 0, io->k = 15, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
} else if (strcmp(preset, "ava-pb") == 0) { } else if (strcmp(preset, "ava-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19, io->w = 5; io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
mo->flag |= MM_F_AVA | MM_F_NO_SELF; mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25; mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) { } else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
io->flag |= MM_I_HPC, io->k = 19; io->flag |= MM_I_HPC, io->k = 19;