r1034: changed multiple defaults; updated manpage
This commit is contained in:
parent
bbb4f97e52
commit
6c96078ed0
5
main.c
5
main.c
|
|
@ -7,7 +7,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "ketopt.h"
|
||||
|
||||
#define MM_VERSION "2.18-r1028-dirty"
|
||||
#define MM_VERSION "2.18-r1034-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -339,7 +339,8 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " --version show version number\n");
|
||||
fprintf(fp_help, " Preset:\n");
|
||||
fprintf(fp_help, " -x STR preset (always applied before other options; see minimap2.1 for details) []\n");
|
||||
fprintf(fp_help, " - map-pb/map-ont - PacBio/Nanopore vs reference mapping\n");
|
||||
fprintf(fp_help, " - map-pb/map-ont - PacBio CLR/Nanopore vs reference mapping\n");
|
||||
fprintf(fp_help, " - map-hifi - PacBio HiFi reads vs reference mapping\n");
|
||||
fprintf(fp_help, " - ava-pb/ava-ont - PacBio/Nanopore read overlap\n");
|
||||
fprintf(fp_help, " - asm5/asm10/asm20 - asm-to-ref mapping, for ~0.1/1/5%% sequence divergence\n");
|
||||
fprintf(fp_help, " - splice/splice:hq - long-read/Pacbio-CCS spliced alignment\n");
|
||||
|
|
|
|||
98
minimap2.1
98
minimap2.1
|
|
@ -145,18 +145,25 @@ or
|
|||
.B -xsr
|
||||
mode, which sets the threshold for a second round of seeding.
|
||||
.TP
|
||||
.BI --min-occ-floor \ INT
|
||||
Force minimap2 to always use k-mers occurring
|
||||
.I INT
|
||||
times or less [0]. In effect, the max occurrence threshold is set to
|
||||
the
|
||||
.RI max{ INT ,
|
||||
.BR -f }.
|
||||
.BI -U \ INT1 [, INT2 ]
|
||||
Lower and upper bounds of k-mer occurrences [10,1000000]. The final k-mer occurrence threshold is
|
||||
.RI max{ INT1 ,\ min{ INT2 ,
|
||||
.BR -f }}.
|
||||
This option prevents excessively small or large
|
||||
.B -f
|
||||
estimated from the input reference. It deprecates
|
||||
.B --min-occ-floor
|
||||
in earlier versions of minimap2.
|
||||
.TP
|
||||
.BI -g \ INT
|
||||
.BI -e \ INT
|
||||
Sample a high-frequency minimizer every
|
||||
.I INT
|
||||
basepairs [500].
|
||||
.TP
|
||||
.BI -g \ NUM
|
||||
Stop chain enlongation if there are no minimizers within
|
||||
.IR INT -bp
|
||||
[10000].
|
||||
.IR NUM -bp
|
||||
[10k].
|
||||
.TP
|
||||
.BI -r \ INT
|
||||
Bandwidth used in chaining and DP-based alignment [500]. This option
|
||||
|
|
@ -234,6 +241,10 @@ Mark as secondary a chain that overlaps with a better chain by
|
|||
.I FLOAT
|
||||
or more of the shorter chain [0.5]
|
||||
.TP
|
||||
.BR --rmq = no | yes
|
||||
Use the minigraph chaining algorithm [no]. The minigraph algorithm is better
|
||||
for aligning contigs through long INDELs.
|
||||
.TP
|
||||
.B --hard-mask-level
|
||||
Honor option
|
||||
.B -M
|
||||
|
|
@ -412,7 +423,7 @@ alignment.
|
|||
.BI --cap-sw-mem \ NUM
|
||||
Skip alignment if the DP matrix size is above
|
||||
.IR NUM .
|
||||
Set 0 to disable [0].
|
||||
Set 0 to disable [100m].
|
||||
.SS Input/output options
|
||||
.TP 10
|
||||
.B -a
|
||||
|
|
@ -523,66 +534,47 @@ Available
|
|||
.I STR
|
||||
are:
|
||||
.RS
|
||||
.TP 9
|
||||
.B map-pb
|
||||
PacBio/Oxford Nanopore read to reference mapping
|
||||
.RB ( -Hk19 )
|
||||
.TP
|
||||
.TP 10
|
||||
.B map-ont
|
||||
Slightly more sensitive for Oxford Nanopore to reference mapping
|
||||
.RB ( -k15 ).
|
||||
For PacBio reads, HPC minimizers consistently leads to faster performance and
|
||||
more sensitive results in comparison to normal minimizers. For Oxford Nanopore
|
||||
data, normal minimizers are better, though not much. The effectiveness of HPC
|
||||
is determined by the sequencing error mode.
|
||||
Align noisy long reads of ~10% error rate to a reference genome. This is the
|
||||
default mode.
|
||||
.TP
|
||||
.B map-hifi
|
||||
PacBio HiFi reads to reference mapping
|
||||
Align PacBio high-fidelity (HiFi) reads to a reference genome
|
||||
.RB ( -k19
|
||||
.B -w10 -A1 -B4 -O6,26 -E2,1 -s200 -e100 -g10k
|
||||
.BR -U100,500 ).
|
||||
.B -w19 -U50,500 -A1 -B4 -O6,26 -E2,1
|
||||
.BR -s200 ).
|
||||
.TP
|
||||
.B map-pb
|
||||
Align older PacBio continuous long (CLR) reads to a reference genome
|
||||
.RB ( -Hk19 ).
|
||||
.TP
|
||||
.B asm5
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w19 -A1 -B19 -O39,81 -E3,1 -s200 -z200 -N50
|
||||
.BR --min-occ-floor=100 ).
|
||||
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B19 -O39,81 -E3,1 -s200 -z200
|
||||
.BR -N50 ).
|
||||
Typically, the alignment will not extend to regions with 5% or higher sequence
|
||||
divergence. Only use this preset if the average divergence is far below 5%.
|
||||
.TP
|
||||
.B asm10
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w19 -A1 -B9 -O16,41 -E2,1 -s200 -z200 -N50
|
||||
.BR --min-occ-floor=100 ).
|
||||
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B9 -O16,41 -E2,1 -s200 -z200
|
||||
.BR -N50 ).
|
||||
Up to 10% sequence divergence.
|
||||
.TP
|
||||
.B asm20
|
||||
Long assembly to reference mapping
|
||||
.RB ( -k19
|
||||
.B -w10 -A1 -B4 -O6,26 -E2,1 -s200 -z200 -N50
|
||||
.BR --min-occ-floor=100 ).
|
||||
.B -w10 -U50,500 --rmq -r100k --no-long-join -A1 -B4 -O6,26 -E2,1 -s200 -z200
|
||||
.BR -N50 ).
|
||||
Up to 20% sequence divergence.
|
||||
.TP
|
||||
.B ava-pb
|
||||
PacBio all-vs-all overlap mapping
|
||||
.RB ( -Hk19
|
||||
.B -Xw5 -m100 -g10000 --max-chain-skip
|
||||
.BR 25 ).
|
||||
.TP
|
||||
.B ava-ont
|
||||
Oxford Nanopore all-vs-all overlap mapping
|
||||
.RB ( -k15
|
||||
.B -Xw5 -m100 -g10000 -r2000 --max-chain-skip
|
||||
.BR 25 ).
|
||||
Similarly, the major difference from
|
||||
.B ava-pb
|
||||
is that this preset is not using HPC minimizers.
|
||||
.TP
|
||||
.B splice
|
||||
Long-read spliced alignment
|
||||
.RB ( -k15
|
||||
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9
|
||||
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
|
||||
.BR --splice-flank=yes ).
|
||||
In the splice mode, 1) long deletions are taken as introns and represented as
|
||||
the
|
||||
|
|
@ -604,6 +596,18 @@ Short single-end reads without splicing
|
|||
.B -w11 --sr --frag=yes -A2 -B8 -O12,32 -E2,1 -r50 -p.5 -N20 -f1000,5000 -n2 -m20
|
||||
.B -s40 -g200 -2K50m --heap-sort=yes
|
||||
.BR --secondary=no ).
|
||||
.TP
|
||||
.B ava-pb
|
||||
PacBio CLR all-vs-all overlap mapping
|
||||
.RB ( -Hk19
|
||||
.B -Xw5 -e0
|
||||
.BR -m100 ).
|
||||
.TP
|
||||
.B ava-ont
|
||||
Oxford Nanopore all-vs-all overlap mapping
|
||||
.RB ( -k15
|
||||
.B -Xw5 -e0 -m100
|
||||
.BR -r2k ).
|
||||
.RE
|
||||
.SS Miscellaneous options
|
||||
.TP 10
|
||||
|
|
|
|||
25
options.c
25
options.c
|
|
@ -16,7 +16,8 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
memset(opt, 0, sizeof(mm_mapopt_t));
|
||||
opt->seed = 11;
|
||||
opt->mid_occ_frac = 2e-4f;
|
||||
opt->max_mid_occ = 1000000000;
|
||||
opt->min_mid_occ = 10;
|
||||
opt->max_mid_occ = 1000000;
|
||||
opt->sdust_thres = 0; // no SDUST masking
|
||||
|
||||
opt->min_cnt = 3;
|
||||
|
|
@ -63,12 +64,13 @@ void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
|||
{
|
||||
if ((opt->flag & MM_F_SPLICE_FOR) || (opt->flag & MM_F_SPLICE_REV))
|
||||
opt->flag |= MM_F_SPLICE;
|
||||
if (opt->mid_occ <= 0)
|
||||
if (opt->mid_occ <= 0) {
|
||||
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
||||
if (opt->mid_occ < opt->min_mid_occ)
|
||||
opt->mid_occ = opt->min_mid_occ;
|
||||
if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
|
||||
opt->mid_occ = opt->max_mid_occ;
|
||||
if (opt->mid_occ < opt->min_mid_occ)
|
||||
opt->mid_occ = opt->min_mid_occ;
|
||||
if (opt->max_mid_occ > opt->min_mid_occ && opt->mid_occ > opt->max_mid_occ)
|
||||
opt->mid_occ = opt->max_mid_occ;
|
||||
}
|
||||
if (mm_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s::%.3f*%.2f] mid_occ = %d\n", __func__, realtime() - mm_realtime0, cputime() / (realtime() - mm_realtime0), opt->mid_occ);
|
||||
}
|
||||
|
|
@ -88,26 +90,27 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|||
} else if (strcmp(preset, "ava-ont") == 0) {
|
||||
io->flag = 0, io->k = 15, io->w = 5;
|
||||
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
|
||||
mo->bw = 2000;
|
||||
mo->occ_dist = 0;
|
||||
} else if (strcmp(preset, "map10k") == 0 || strcmp(preset, "map-pb") == 0) {
|
||||
io->flag |= MM_I_HPC, io->k = 19;
|
||||
} else if (strcmp(preset, "ava-pb") == 0) {
|
||||
io->flag |= MM_I_HPC, io->k = 19, io->w = 5;
|
||||
mo->flag |= MM_F_ALL_CHAINS | MM_F_NO_DIAG | MM_F_NO_DUAL | MM_F_NO_LJOIN;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_gap = 10000, mo->max_chain_skip = 25;
|
||||
mo->min_chain_score = 100, mo->pri_ratio = 0.0f, mo->max_chain_skip = 25;
|
||||
mo->occ_dist = 0;
|
||||
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
|
||||
io->flag = 0, io->k = 19, io->w = 19;
|
||||
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
|
||||
mo->max_gap = 10000;
|
||||
mo->occ_dist = 500;
|
||||
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
|
||||
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
||||
mo->min_dp_max = 200;
|
||||
} else if (strncmp(preset, "asm", 3) == 0) {
|
||||
io->flag = 0, io->k = 19, io->w = 19;
|
||||
mo->bw = 100000;
|
||||
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
|
||||
mo->min_mid_occ = 100, mo->max_mid_occ = 500;
|
||||
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
|
||||
mo->min_dp_max = 200;
|
||||
mo->best_n = 50;
|
||||
if (strcmp(preset, "asm5") == 0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue