diff --git a/align.c b/align.c index 4825c03..65b64f5 100644 --- a/align.c +++ b/align.c @@ -237,10 +237,11 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t r->p = p; } -static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx) +static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx, int log_gap) { uint32_t k, l; - int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0; + int32_t qshift, tshift, toff = 0, qoff = 0; + double s = 0.0, max = 0.0; mm_extra_t *p = r->p; if (p == 0) return; mm_fix_cigar(r, qseq, tseq, &qshift, &tshift); @@ -265,7 +266,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts for (l = 0; l < len; ++l) if (qseq[qoff + l] > 3) ++n_ambi; r->blen += len - n_ambi, p->n_ambi += n_ambi; - s -= q + e * len; + if (log_gap) s -= q + (double)e * mg_log2(1.0 + len); + else s -= q + e * len; if (s < 0) s = 0; qoff += len; } else if (op == MM_CIGAR_DEL) { @@ -273,14 +275,15 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts for (l = 0; l < len; ++l) if (tseq[toff + l] > 3) ++n_ambi; r->blen += len - n_ambi, p->n_ambi += n_ambi; - s -= q + e * len; + if (log_gap) s -= q + (double)e * mg_log2(1.0 + len); + else s -= q + e * len; if (s < 0) s = 0; toff += len; } else if (op == MM_CIGAR_N_SKIP) { toff += len; } } - p->dp_max = max; + p->dp_max = (int32_t)(max + .499); assert(qoff == r->qe - r->qs && toff == r->re - r->rs); if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned } @@ -813,7 +816,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int mm_idx_getseq(mi, rid, rs1, re1, tseq); qseq = &qseq0[r->rev][qs1]; } - mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX); + mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP)); if (rev && r->p->trans_strand) r->p->trans_strand ^= 3; // flip to the read strand } @@ -872,7 +875,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i } r_inv->rs = r1->re + t_off; r_inv->re = r_inv->rs + ez->max_t + 1; - mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX); + mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP)); ret = 1; end_align1_inv: kfree(km, tseq); diff --git a/lchain.c b/lchain.c index 220e3e9..a1615ac 100644 --- a/lchain.c +++ b/lchain.c @@ -6,16 +6,6 @@ #include "kalloc.h" #include "krmq.h" -static inline float mg_log2(float x) // NB: this doesn't work when x<2 -{ - union { float f; uint32_t i; } z = { x }; - float log_2 = ((z.i >> 23) & 255) - 128; - z.i &= ~(255 << 23); - z.i += 127 << 23; - log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f; - return log_2; -} - uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_) { mm128_t *z; diff --git a/main.c b/main.c index 063c945..53656f7 100644 --- a/main.c +++ b/main.c @@ -7,7 +7,7 @@ #include "mmpriv.h" #include "ketopt.h" -#define MM_VERSION "2.21-r1074-qs-dirty" +#define MM_VERSION "2.21-dev-r1076-dirty" #ifdef __linux__ #include @@ -73,6 +73,7 @@ static ko_longopt_t long_options[] = { { "mask-len", ko_required_argument, 346 }, { "rmq", ko_optional_argument, 347 }, { "qstrand", ko_no_argument, 348 }, + { "log-gap", ko_required_argument, 349 }, { "help", ko_no_argument, 'h' }, { "max-intron-len", ko_required_argument, 'G' }, { "version", ko_no_argument, 'V' }, @@ -101,7 +102,7 @@ static inline int64_t mm_parse_num(const char *str) return mm_parse_num2(str, 0); } -static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set) +static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set) { if (yes_to_set) { if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag; @@ -252,6 +253,8 @@ int main(int argc, char *argv[]) yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0); } else if (c == 347) { // --rmq yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1); + } else if (c == 349) { // --log-gap + yes_or_no(&opt, MM_F_NO_LOG_GAP, o.longidx, o.arg, 0); } else if (c == 'S') { opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG; if (mm_verbose >= 2) diff --git a/minimap.h b/minimap.h index f75e7de..f2cdfb4 100644 --- a/minimap.h +++ b/minimap.h @@ -36,9 +36,10 @@ #define MM_F_NO_END_FLT 0x10000000 #define MM_F_HARD_MLEVEL 0x20000000 #define MM_F_SAM_HIT_ONLY 0x40000000 -#define MM_F_RMQ 0x80000000LL +#define MM_F_RMQ (0x80000000LL) #define MM_F_QSTRAND (0x100000000LL) #define MM_F_NO_INV (0x200000000LL) +#define MM_F_NO_LOG_GAP (0x400000000LL) #define MM_I_HPC 0x1 #define MM_I_NO_SEQ 0x2 diff --git a/mmpriv.h b/mmpriv.h index cb9618b..6d0ed04 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -110,6 +110,16 @@ void mm_err_puts(const char *str); void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp); void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp); +static inline float mg_log2(float x) // NB: this doesn't work when x<2 +{ + union { float f; uint32_t i; } z = { x }; + float log_2 = ((z.i >> 23) & 255) - 128; + z.i &= ~(255 << 23); + z.i += 127 << 23; + log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f; + return log_2; +} + #ifdef __cplusplus } #endif