r1117: added --no-hash-name for deterministic

This commit is contained in:
Heng Li 2021-11-24 16:49:48 -05:00
parent fcaadc22b7
commit c8f0a35c40
4 changed files with 8 additions and 2 deletions

4
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.23-r1116-dirty"
#define MM_VERSION "2.23-r1117-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -77,6 +77,7 @@ static ko_longopt_t long_options[] = {
{ "q-occ-frac", ko_required_argument, 350 },
{ "chain-skip-scale",ko_required_argument,351 },
{ "print-chains", ko_no_argument, 352 },
{ "no-hash-name", ko_no_argument, 353 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@ -235,6 +236,7 @@ int main(int argc, char *argv[])
else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc
else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac
else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains
else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name
else if (c == 330) {
fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n");
} else if (c == 314) { // --frag

2
map.c
View File

@ -248,7 +248,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char **
if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return;
if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return;
hash = qname? __ac_X31_hash_string(qname) : 0;
hash = qname && !(opt->flag & MM_F_NO_HASH_NAME)? __ac_X31_hash_string(qname) : 0;
hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed);
hash = __ac_Wang_hash(hash);

View File

@ -39,6 +39,7 @@
#define MM_F_RMQ (0x80000000LL)
#define MM_F_QSTRAND (0x100000000LL)
#define MM_F_NO_INV (0x200000000LL)
#define MM_F_NO_HASH_NAME (0x400000000LL)
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2

View File

@ -318,6 +318,9 @@ faster for short reads, but slower for long reads. [no]
.B --no-pairing
Treat two reads in a pair as independent reads. The mate related fields in SAM
are still properly populated.
.TP
.B --no-hash-name
Produce the same alignment for identical sequences regardless of their sequence names.
.SS Alignment options
.TP 10
.BI -A \ INT