r1034: changed multiple defaults; updated manpage
This commit is contained in:
parent
bbb4f97e52
commit
6c96078ed0
5
main.c
5
main.c
|
|
@ -7,7 +7,7 @@
|
||||||
#include "mmpriv.h"
|
#include "mmpriv.h"
|
||||||
#include "ketopt.h"
|
#include "ketopt.h"
|
||||||
|
|
||||||
#define MM_VERSION "2.18-r1028-dirty"
|
#define MM_VERSION "2.18-r1034-dirty"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
@ -339,7 +339,8 @@ int main(int argc, char *argv[])
|
||||||
fprintf(fp_help, " --version show version number\n");
|
fprintf(fp_help, " --version show version number\n");
|
||||||
fprintf(fp_help, " Preset:\n");
|
fprintf(fp_help, " Preset:\n");
|
||||||
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
||||||
fprintf(fp_help, " - map-pb/map-ont - PacBio/Nanopore vs reference mapping\n");
|
fprintf(fp_help, " - map-pb/map-ont - PacBio CLR/Nanopore vs reference mapping\n");
|
||||||
|
fprintf(fp_help, " - map-hifi - PacBio HiFi reads vs reference mapping\n");
|
||||||
fprintf(fp_help, " - ava-pb/ava-ont - PacBio/Nanopore read overlap\n");
|
fprintf(fp_help, " - ava-pb/ava-ont - PacBio/Nanopore read overlap\n");
|
||||||
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
||||||
fprintf(fp_help, " - splice/splice:hq - long-read/Pacbio-CCS spliced alignment\n");
|
fprintf(fp_help, " - splice/splice:hq - long-read/Pacbio-CCS spliced alignment\n");
|
||||||
|
|
|
||||||
98
minimap2.1
98
minimap2.1
|
|
@ -145,18 +145,25 @@ or
|
||||||
.B -xsr
|
.B -xsr
|
||||||
mode, which sets the threshold for a second round of seeding.
|
mode, which sets the threshold for a second round of seeding.
|
||||||
.TP
|
.TP
|
||||||
.BI --min-occ-floor \ INT
|
.BI -U \ INT1 [, INT2 ]
|
||||||
Force minimap2 to always use k-mers occurring
|
Lower and upper bounds of k-mer occurrences [10,1000000]. The final k-mer occurrence threshold is
|
||||||
.I INT
|
.RI max{ INT1 ,\ min{ INT2 ,
|
||||||
times or less [0]. In effect, the max occurrence threshold is set to
|
.BR -f }}.
|
||||||
the
|
This option prevents excessively small or large
|
||||||
.RI max{ INT ,
|
.B -f
|
||||||
.BR -f }.
|
estimated from the input reference. It deprecates
|
||||||
|
.B --min-occ-floor
|
||||||
|
in earlier versions of minimap2.
|
||||||
.TP
|
.TP
|
||||||
.BI -g \ INT
|
.BI -e \ INT
|
||||||
|
Sample a high-frequency minimizer every
|
||||||
|
.I INT
|
||||||
|
basepairs [500].
|
||||||
|
.TP
|
||||||
|
.BI -g \ NUM
|
||||||
Stop chain enlongation if there are no minimizers within
|
Stop chain enlongation if there are no minimizers within
|
||||||
.IR INT -bp
|
.IR NUM -bp
|
||||||
[10000].
|
[10k].
|
||||||
.TP
|
.TP
|
||||||
.BI -r \ INT
|
.BI -r \ INT
|
||||||
Bandwidth used in chaining and DP-based alignment [500]. This option
|
Bandwidth used in chaining and DP-based alignment [500]. This option
|
||||||
|
|
@ -234,6 +241,10 @@ Mark as secondary a chain that overlaps with a better chain by
|
||||||
.I FLOAT
|
.I FLOAT
|
||||||
or more of the shorter chain [0.5]
|
or more of the shorter chain [0.5]
|
||||||
.TP
|
.TP
|
||||||
|
.BR --rmq = no | yes
|
||||||
|
Use the minigraph chaining algorithm [no]. The minigraph algorithm is better
|
||||||
|
for aligning contigs through long INDELs.
|
||||||
|
.TP
|
||||||
.B --hard-mask-level
|
.B --hard-mask-level
|
||||||
Honor option
|
Honor option
|
||||||
.B -M
|
.B -M
|
||||||
|
|
@ -412,7 +423,7 @@ alignment.
|
||||||
.BI --cap-sw-mem \ NUM
|
.BI --cap-sw-mem \ NUM
|
||||||
Skip alignment if the DP matrix size is above
|
Skip alignment if the DP matrix size is above
|
||||||
.IR NUM .
|
.IR NUM .
|
||||||
Set 0 to disable [0].
|
Set 0 to disable [100m].
|
||||||
.SS Input/output options
|
.SS Input/output options
|
||||||
.TP 10
|
.TP 10
|
||||||
.B -a
|
.B -a
|
||||||
|
|
@ -523,66 +534,47 @@ Available
|
||||||
.I STR
|
.I STR
|
||||||
are:
|
are:
|
||||||
.RS
|
.RS
|
||||||
.TP 9
|
.TP 10
|
||||||
.B map-pb
|
|
||||||
PacBio/Oxford Nanopore read to reference mapping
|
|
||||||
.RB ( -Hk19 )
|
|
||||||
.TP
|
|
||||||
.B map-ont
|
.B map-ont
|
||||||
Slightly more sensitive for Oxford Nanopore to reference mapping
|
Align noisy long reads of ~10% error rate to a reference genome. This is the
|
||||||
.RB ( -k15 ).
|
default mode.
|
||||||
For PacBio reads, HPC minimizers consistently leads to faster performance and
|
|
||||||
more sensitive results in comparison to normal minimizers. For Oxford Nanopore
|
|
||||||
data, normal minimizers are better, though not much. The effectiveness of HPC
|
|
||||||
is determined by the sequencing error mode.
|
|
||||||
.TP
|
.TP
|
||||||
.B map-hifi
|
.B map-hifi
|
||||||
PacBio HiFi reads to reference mapping
|
Align PacBio high-fidelity (HiFi) reads to a reference genome
|
||||||
.RB ( -k19
|
.RB ( -k19
|
||||||
.B -w10 -A1 -B4 -O6,26 -E2,1 -s200 -e100 -g10k
|
.B -w19 -U50,500 -A1 -B4 -O6,26 -E2,1
|
||||||
.BR -U100,500 ).
|
.BR -s200 ).
|
||||||
|
.TP
|
||||||
|
.B map-pb
|
||||||
|
Align older PacBio continuous long (CLR) reads to a reference genome
|
||||||
|
.RB ( -Hk19 ).
|
||||||
.TP
|
.TP
|
||||||
.B asm5
|
.B asm5
|
||||||
Long assembly to reference mapping
|
Long assembly to reference mapping
|
||||||
.RB ( -k19
|
.RB ( -k19
|
||||||
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 -N50
|
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B19 -O39,81 -E3,1 -s200 -z200
|
||||||
.BR --min-occ-floor=100 ).
|
.BR -N50 ).
|
||||||
Typically, the alignment will not extend to regions with 5% or higher sequence
|
Typically, the alignment will not extend to regions with 5% or higher sequence
|
||||||
divergence. Only use this preset if the average divergence is far below 5%.
|
divergence. Only use this preset if the average divergence is far below 5%.
|
||||||
.TP
|
.TP
|
||||||
.B asm10
|
.B asm10
|
||||||
Long assembly to reference mapping
|
Long assembly to reference mapping
|
||||||
.RB ( -k19
|
.RB ( -k19
|
||||||
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 -N50
|
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B9 -O16,41 -E2,1 -s200 -z200
|
||||||
.BR --min-occ-floor=100 ).
|
.BR -N50 ).
|
||||||
Up to 10% sequence divergence.
|
Up to 10% sequence divergence.
|
||||||
.TP
|
.TP
|
||||||
.B asm20
|
.B asm20
|
||||||
Long assembly to reference mapping
|
Long assembly to reference mapping
|
||||||
.RB ( -k19
|
.RB ( -k19
|
||||||
.B -w10 -A1 -B4 -O6,26 -E2,1 -s200 -z200 -N50
|
.B -w10 -U50,500 --rmq -r100k --no-long-join -A1 -B4 -O6,26 -E2,1 -s200 -z200
|
||||||
.BR --min-occ-floor=100 ).
|
.BR -N50 ).
|
||||||
Up to 20% sequence divergence.
|
Up to 20% sequence divergence.
|
||||||
.TP
|
.TP
|
||||||
.B ava-pb
|
|
||||||
PacBio all-vs-all overlap mapping
|
|
||||||
.RB ( -Hk19
|
|
||||||
.B -Xw5 -m100 -g10000 --max-chain-skip
|
|
||||||
.BR 25 ).
|
|
||||||
.TP
|
|
||||||
.B ava-ont
|
|
||||||
Oxford Nanopore all-vs-all overlap mapping
|
|
||||||
.RB ( -k15
|
|
||||||
.B -Xw5 -m100 -g10000 -r2000 --max-chain-skip
|
|
||||||
.BR 25 ).
|
|
||||||
Similarly, the major difference from
|
|
||||||
.B ava-pb
|
|
||||||
is that this preset is not using HPC minimizers.
|
|
||||||
.TP
|
|
||||||
.B splice
|
.B splice
|
||||||
Long-read spliced alignment
|
Long-read spliced alignment
|
||||||
.RB ( -k15
|
.RB ( -k15
|
||||||
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9
|
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
|
||||||
.BR --splice-flank=yes ).
|
.BR --splice-flank=yes ).
|
||||||
In the splice mode, 1) long deletions are taken as introns and represented as
|
In the splice mode, 1) long deletions are taken as introns and represented as
|
||||||
the
|
the
|
||||||
|
|
@ -604,6 +596,18 @@ Short single-end reads without splicing
|
||||||
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
|
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
|
||||||
.B -s40 -g200 -2K50m --heap-sort=yes
|
.B -s40 -g200 -2K50m --heap-sort=yes
|
||||||
.BR --secondary=no ).
|
.BR --secondary=no ).
|
||||||
|
.TP
|
||||||
|
.B ava-pb
|
||||||
|
PacBio CLR all-vs-all overlap mapping
|
||||||
|
.RB ( -Hk19
|
||||||
|
.B -Xw5 -e0
|
||||||
|
.BR -m100 ).
|
||||||
|
.TP
|
||||||
|
.B ava-ont
|
||||||
|
Oxford Nanopore all-vs-all overlap mapping
|
||||||
|
.RB ( -k15
|
||||||
|
.B -Xw5 -e0 -m100
|
||||||
|
.BR -r2k ).
|
||||||
.RE
|
.RE
|
||||||
.SS Miscellaneous options
|
.SS Miscellaneous options
|
||||||
.TP 10
|
.TP 10
|
||||||
|
|
|
||||||
25
options.c
25
options.c
|
|
@ -16,7 +16,8 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
||||||
memset(opt, 0, sizeof(mm_mapopt_t));
|
memset(opt, 0, sizeof(mm_mapopt_t));
|
||||||
opt->seed = 11;
|
opt->seed = 11;
|
||||||
opt->mid_occ_frac = 2e-4f;
|
opt->mid_occ_frac = 2e-4f;
|
||||||
opt->max_mid_occ = 1000000000;
|
opt->min_mid_occ = 10;
|
||||||
|
opt->max_mid_occ = 1000000;
|
||||||
opt->sdust_thres = 0; // no SDUST masking
|
opt->sdust_thres = 0; // no SDUST masking
|
||||||
|
|
||||||
opt->min_cnt = 3;
|
opt->min_cnt = 3;
|
||||||
|
|
@ -63,12 +64,13 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||||
{
|
{
|
||||||
if ((opt->flag & MM_F_SPLICE_FOR) || (opt->flag & MM_F_SPLICE_REV))
|
if ((opt->flag & MM_F_SPLICE_FOR) || (opt->flag & MM_F_SPLICE_REV))
|
||||||
opt->flag |= MM_F_SPLICE;
|
opt->flag |= MM_F_SPLICE;
|
||||||
if (opt->mid_occ <= 0)
|
if (opt->mid_occ <= 0) {
|
||||||
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
||||||
if (opt->mid_occ < opt->min_mid_occ)
|
if (opt->mid_occ < opt->min_mid_occ)
|
||||||
opt->mid_occ = opt->min_mid_occ;
|
opt->mid_occ = opt->min_mid_occ;
|
||||||
if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
|
if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
|
||||||
opt->mid_occ = opt->max_mid_occ;
|
opt->mid_occ = opt->max_mid_occ;
|
||||||
|
}
|
||||||
if (mm_verbose >= 3)
|
if (mm_verbose >= 3)
|
||||||
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
||||||
}
|
}
|
||||||
|
|
@ -88,26 +90,27 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
||||||
} else if (strcmp(preset, "ava-ont") == 0) {
|
} else if (strcmp(preset, "ava-ont") == 0) {
|
||||||
io->flag = 0, io->k = 15, io->w = 5;
|
io->flag = 0, io->k = 15, io->w = 5;
|
||||||
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
||||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
|
||||||
mo->bw = 2000;
|
mo->bw = 2000;
|
||||||
|
mo->occ_dist = 0;
|
||||||
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
||||||
io->flag |= MM_I_HPC, io->k = 19;
|
io->flag |= MM_I_HPC, io->k = 19;
|
||||||
} else if (strcmp(preset, "ava-pb") == 0) {
|
} else if (strcmp(preset, "ava-pb") == 0) {
|
||||||
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
|
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
|
||||||
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
||||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
|
||||||
|
mo->occ_dist = 0;
|
||||||
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
|
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
|
||||||
io->flag = 0, io->k = 19, io->w = 19;
|
io->flag = 0, io->k = 19, io->w = 19;
|
||||||
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
|
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
|
||||||
mo->max_gap = 10000;
|
|
||||||
mo->occ_dist = 500;
|
mo->occ_dist = 500;
|
||||||
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
|
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
||||||
mo->min_dp_max = 200;
|
mo->min_dp_max = 200;
|
||||||
} else if (strncmp(preset, "asm", 3) == 0) {
|
} else if (strncmp(preset, "asm", 3) == 0) {
|
||||||
io->flag = 0, io->k = 19, io->w = 19;
|
io->flag = 0, io->k = 19, io->w = 19;
|
||||||
mo->bw = 100000;
|
mo->bw = 100000;
|
||||||
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
|
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
|
||||||
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
|
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
||||||
mo->min_dp_max = 200;
|
mo->min_dp_max = 200;
|
||||||
mo->best_n = 50;
|
mo->best_n = 50;
|
||||||
if (strcmp(preset, "asm5") == 0) {
|
if (strcmp(preset, "asm5") == 0) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue