r894: added --max-qlen to filter out long query

This commit is contained in:
Heng Li 2018-12-12 12:27:32 -05:00
parent eef1cee9b7
commit ea2b1c5b2a
4 changed files with 14 additions and 2 deletions

4
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.14-r892-dirty"
#define MM_VERSION "2.14-r894-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -61,6 +61,7 @@ static ko_longopt_t long_options[] = {
{ "no-end-flt", ko_no_argument, 335 },
{ "hard-mask-level",ko_no_argument, 336 },
{ "cap-sw-mem", ko_required_argument, 337 },
{ "max-qlen", ko_required_argument, 338 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@ -192,6 +193,7 @@ int main(int argc, char *argv[])
else if (c == 335) opt.flag |= MM_F_NO_END_FLT; // --no-end-flt
else if (c == 336) opt.flag |= MM_F_HARD_MLEVEL; // --hard-mask-level
else if (c == 337) opt.max_sw_mat = mm_parse_num(o.arg); // --cap-sw-mat
else if (c == 338) opt.max_qlen = mm_parse_num(o.arg); // --max-qlen
else if (c == 314) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, o.longidx, o.arg, 1);
} else if (c == 315) { // --secondary

1
map.c
View File

@ -284,6 +284,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
qlen_sum += qlens[i], n_regs[i] = 0, regs[i] = 0;
if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return;
if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return;
hash = qname? __ac_X31_hash_string(qname) : 0;
hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed);

View File

@ -107,6 +107,8 @@ typedef struct {
int sdust_thres; // score threshold for SDUST; 0 to disable
int flag; // see MM_F_* macros
int max_qlen; // max query length
int bw; // bandwidth
int max_gap, max_gap_ref; // break a chain if there are no minimizers in a max_gap window
int max_frag_len;

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "5 November 2018" "minimap2-2.14-dirty (r891)" "Bioinformatics tools"
.TH minimap2 1 "12 December 2018" "minimap2-2.14-dirty (r894)" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
@ -458,6 +458,13 @@ memory.
.BR --secondary = yes | no
Whether to output secondary alignments [yes]
.TP
.BI --max-qlen \ NUM
Filter out query sequences longer than
.IR NUM .
.TP
.B --paf-no-hit
In PAF, output query name and length for an unmapped sequence.
.TP
.B --version
Print version number to stdout
.SS Preset options