From c8f0a35c4078f2430712da8c4ad4e3a696fe51f0 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 24 Nov 2021 16:49:48 -0500 Subject: [PATCH] r1117: added --no-hash-name for deterministic --- main.c | 4 +++- map.c | 2 +- minimap.h | 1 + minimap2.1 | 3 +++ 4 files changed, 8 insertions(+), 2 deletions(-) diff --git a/main.c b/main.c index 67d6a29..900bd08 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.23-r1116-dirty" +#define MM_VERSION "2.23-r1117-dirty" #ifdef __linux__ #include @@ -77,6 +77,7 @@ static ko_longopt_t long_options[] = { { "q-occ-frac", ko_required_argument, 350 }, { "chain-skip-scale",ko_required_argument,351 }, { "print-chains", ko_no_argument, 352 }, + { "no-hash-name", ko_no_argument, 353 }, { "help", ko_no_argument, 'h' }, { "max-intron-len", ko_required_argument, 'G' }, { "version", ko_no_argument, 'V' }, @@ -235,6 +236,7 @@ int main(int argc, char *argv[]) else if (c == 349) opt.cap_kalloc = mm_parse_num(o.arg); // --cap-kalloc else if (c == 350) opt.q_occ_frac = atof(o.arg); // --q-occ-frac else if (c == 352) mm_dbg_flag |= MM_DBG_PRINT_CHAIN; // --print-chains + else if (c == 353) opt.flag |= MM_F_NO_HASH_NAME; // --no-hash-name else if (c == 330) { fprintf(stderr, "[WARNING] \033[1;31m --lj-min-ratio has been deprecated.\033[0m\n"); } else if (c == 314) { // --frag diff --git a/map.c b/map.c index 5c31208..5311468 100644 --- a/map.c +++ b/map.c @@ -248,7 +248,7 @@ void mm_map_frag(const mm_idx_t *mi, int n_segs, const int *qlens, const char ** if (qlen_sum == 0 || n_segs <= 0 || n_segs > MM_MAX_SEG) return; if (opt->max_qlen > 0 && qlen_sum > opt->max_qlen) return; - hash = qname? __ac_X31_hash_string(qname) : 0; + hash = qname && !(opt->flag & MM_F_NO_HASH_NAME)? __ac_X31_hash_string(qname) : 0; hash ^= __ac_Wang_hash(qlen_sum) + __ac_Wang_hash(opt->seed); hash = __ac_Wang_hash(hash); diff --git a/minimap.h b/minimap.h index 5a0ff2f..13e12e0 100644 --- a/minimap.h +++ b/minimap.h @@ -39,6 +39,7 @@ #define MM_F_RMQ (0x80000000LL) #define MM_F_QSTRAND (0x100000000LL) #define MM_F_NO_INV (0x200000000LL) +#define MM_F_NO_HASH_NAME (0x400000000LL) #define MM_I_HPC 0x1 #define MM_I_NO_SEQ 0x2 diff --git a/minimap2.1 b/minimap2.1 index c321e60..7d0c2c5 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -318,6 +318,9 @@ faster for short reads, but slower for long reads. [no] .B --no-pairing Treat two reads in a pair as independent reads. The mate related fields in SAM are still properly populated. +.TP +.B --no-hash-name +Produce the same alignment for identical sequences regardless of their sequence names. .SS Alignment options .TP 10 .BI -A \ INT