r1052: default -g to 5k

This commit is contained in:
Heng Li 2021-05-24 16:46:16 -04:00
parent 34a41197d7
commit 41d7ccb191
3 changed files with 11 additions and 13 deletions

2
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.18-r1051-dirty"
#define MM_VERSION "2.18-r1052-dirty"
#ifdef __linux__
#include <sys/resource.h>

View File

@ -165,8 +165,8 @@ Stop chain enlongation if there are no minimizers within
.IR NUM -bp
[10k].
.TP
.BI -r \ INT
Bandwidth used in chaining and DP-based alignment [500]. This option
.BI -r \ NUM
Bandwidth used in chaining and DP-based alignment [500,20k]. This option
approximately controls the maximum gap size.
.TP
.BI -n \ INT
@ -279,10 +279,6 @@ Disable the long gap patching heuristic. When this option is applied, the
maximum alignment gap is mostly controlled by
.BR -r .
.TP
.BI --lj-min-ratio \ FLOAT
Fraction of query sequence length required to bridge a long gap [0.5]. A
smaller value helps to recover longer gaps, at the cost of more false gaps.
.TP
.B --splice
Enable the splice alignment mode.
.TP
@ -542,7 +538,7 @@ default mode.
.B map-hifi
Align PacBio high-fidelity (HiFi) reads to a reference genome
.RB ( -k19
.B -w19 -U50,500 -A1 -B4 -O6,26 -E2,1
.B -w19 -U50,500 -g10k -A1 -B4 -O6,26 -E2,1
.BR -s200 ).
.TP
.B map-pb
@ -552,7 +548,7 @@ Align older PacBio continuous long (CLR) reads to a reference genome
.B asm5
Long assembly to reference mapping
.RB ( -k19
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B19 -O39,81 -E3,1 -s200 -z200
.B -w19 -U50,500 --rmq -r100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
.BR -N50 ).
Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%.
@ -560,21 +556,21 @@ divergence. Only use this preset if the average divergence is far below 5%.
.B asm10
Long assembly to reference mapping
.RB ( -k19
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B9 -O16,41 -E2,1 -s200 -z200
.B -w19 -U50,500 --rmq -r100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
.BR -N50 ).
Up to 10% sequence divergence.
.TP
.B asm20
Long assembly to reference mapping
.RB ( -k19
.B -w10 -U50,500 --rmq -r100k --no-long-join -A1 -B4 -O6,26 -E2,1 -s200 -z200
.B -w10 -U50,500 --rmq -r100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
.BR -N50 ).
Up to 20% sequence divergence.
.TP
.B splice
Long-read spliced alignment
.RB ( -k15
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
.B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
.BR --splice-flank=yes ).
In the splice mode, 1) long deletions are taken as introns and represented as
the

View File

@ -23,7 +23,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->min_cnt = 3;
opt->min_chain_score = 40;
opt->bw = 500, opt->bw_long = 20000;
opt->max_gap = 10000;
opt->max_gap = 5000;
opt->max_gap_ref = -1;
opt->max_chain_skip = 25;
opt->max_chain_iter = 5000;
@ -100,6 +100,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mo->occ_dist = 0;
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->max_gap = 10000;
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
mo->occ_dist = 500;
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
@ -107,6 +108,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
} else if (strncmp(preset, "asm", 3) == 0) {
io->flag = 0, io->k = 19, io->w = 19;
mo->bw = mo->bw_long = 100000;
mo->max_gap = 10000;
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
mo->min_mid_occ = 50, mo->max_mid_occ = 500;
mo->min_dp_max = 200;