r1052: default -g to 5k

This commit is contained in:
Heng Li 2021-05-24 16:46:16 -04:00
parent 34a41197d7
commit 41d7ccb191
3 changed files with 11 additions and 13 deletions

2
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h" #include "mmpriv.h"
#include "ketopt.h" #include "ketopt.h"
#define MM_VERSION "2.18-r1051-dirty" #define MM_VERSION "2.18-r1052-dirty"
#ifdef __linux__ #ifdef __linux__
#include <sys/resource.h> #include <sys/resource.h>

View File

@ -165,8 +165,8 @@ Stop chain enlongation if there are no minimizers within
.IR NUM -bp .IR NUM -bp
[10k]. [10k].
.TP .TP
.BI -r \ INT .BI -r \ NUM
Bandwidth used in chaining and DP-based alignment [500]. This option Bandwidth used in chaining and DP-based alignment [500,20k]. This option
approximately controls the maximum gap size. approximately controls the maximum gap size.
.TP .TP
.BI -n \ INT .BI -n \ INT
@ -279,10 +279,6 @@ Disable the long gap patching heuristic. When this option is applied, the
maximum alignment gap is mostly controlled by maximum alignment gap is mostly controlled by
.BR -r . .BR -r .
.TP .TP
.BI --lj-min-ratio \ FLOAT
Fraction of query sequence length required to bridge a long gap [0.5]. A
smaller value helps to recover longer gaps, at the cost of more false gaps.
.TP
.B --splice .B --splice
Enable the splice alignment mode. Enable the splice alignment mode.
.TP .TP
@ -542,7 +538,7 @@ default mode.
.B map-hifi .B map-hifi
Align PacBio high-fidelity (HiFi) reads to a reference genome Align PacBio high-fidelity (HiFi) reads to a reference genome
.RB ( -k19 .RB ( -k19
.B -w19 -U50,500 -A1 -B4 -O6,26 -E2,1 .B -w19 -U50,500 -g10k -A1 -B4 -O6,26 -E2,1
.BR -s200 ). .BR -s200 ).
.TP .TP
.B map-pb .B map-pb
@ -552,7 +548,7 @@ Align older PacBio continuous long (CLR) reads to a reference genome
.B asm5 .B asm5
Long assembly to reference mapping Long assembly to reference mapping
.RB ( -k19 .RB ( -k19
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B19 -O39,81 -E3,1 -s200 -z200 .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B19 -O39,81 -E3,1 -s200 -z200
.BR -N50 ). .BR -N50 ).
Typically, the alignment will not extend to regions with 5% or higher sequence Typically, the alignment will not extend to regions with 5% or higher sequence
divergence. Only use this preset if the average divergence is far below 5%. divergence. Only use this preset if the average divergence is far below 5%.
@ -560,21 +556,21 @@ divergence. Only use this preset if the average divergence is far below 5%.
.B asm10 .B asm10
Long assembly to reference mapping Long assembly to reference mapping
.RB ( -k19 .RB ( -k19
.B -w19 -U50,500 --rmq -r100k --no-long-join -A1 -B9 -O16,41 -E2,1 -s200 -z200 .B -w19 -U50,500 --rmq -r100k -g10k -A1 -B9 -O16,41 -E2,1 -s200 -z200
.BR -N50 ). .BR -N50 ).
Up to 10% sequence divergence. Up to 10% sequence divergence.
.TP .TP
.B asm20 .B asm20
Long assembly to reference mapping Long assembly to reference mapping
.RB ( -k19 .RB ( -k19
.B -w10 -U50,500 --rmq -r100k --no-long-join -A1 -B4 -O6,26 -E2,1 -s200 -z200 .B -w10 -U50,500 --rmq -r100k -g10k -A1 -B4 -O6,26 -E2,1 -s200 -z200
.BR -N50 ). .BR -N50 ).
Up to 20% sequence divergence. Up to 20% sequence divergence.
.TP .TP
.B splice .B splice
Long-read spliced alignment Long-read spliced alignment
.RB ( -k15 .RB ( -k15
.B -w5 --splice -g2000 -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0 .B -w5 --splice -g2k -G200k -A1 -B2 -O2,32 -E1,0 -C9 -z200 -ub --junc-bonus=9 --cap-sw-mem=0
.BR --splice-flank=yes ). .BR --splice-flank=yes ).
In the splice mode, 1) long deletions are taken as introns and represented as In the splice mode, 1) long deletions are taken as introns and represented as
the the

View File

@ -23,7 +23,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->min_cnt = 3; opt->min_cnt = 3;
opt->min_chain_score = 40; opt->min_chain_score = 40;
opt->bw = 500, opt->bw_long = 20000; opt->bw = 500, opt->bw_long = 20000;
opt->max_gap = 10000; opt->max_gap = 5000;
opt->max_gap_ref = -1; opt->max_gap_ref = -1;
opt->max_chain_skip = 25; opt->max_chain_skip = 25;
opt->max_chain_iter = 5000; opt->max_chain_iter = 5000;
@ -100,6 +100,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
mo->occ_dist = 0; mo->occ_dist = 0;
} else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) { } else if (strcmp(preset, "map-hifi") == 0 || strcmp(preset, "map-ccs") == 0) {
io->flag = 0, io->k = 19, io->w = 19; io->flag = 0, io->k = 19, io->w = 19;
mo->max_gap = 10000;
mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1; mo->a = 1, mo->b = 4, mo->q = 6, mo->q2 = 26, mo->e = 2, mo->e2 = 1;
mo->occ_dist = 500; mo->occ_dist = 500;
mo->min_mid_occ = 50, mo->max_mid_occ = 500; mo->min_mid_occ = 50, mo->max_mid_occ = 500;
@ -107,6 +108,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
} else if (strncmp(preset, "asm", 3) == 0) { } else if (strncmp(preset, "asm", 3) == 0) {
io->flag = 0, io->k = 19, io->w = 19; io->flag = 0, io->k = 19, io->w = 19;
mo->bw = mo->bw_long = 100000; mo->bw = mo->bw_long = 100000;
mo->max_gap = 10000;
mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN; mo->flag |= MM_F_RMQ | MM_F_NO_LJOIN;
mo->min_mid_occ = 50, mo->max_mid_occ = 500; mo->min_mid_occ = 50, mo->max_mid_occ = 500;
mo->min_dp_max = 200; mo->min_dp_max = 200;