dev-r1076: log gap penalty

This commit is contained in:
Heng Li 2021-07-17 18:23:59 -04:00
parent 52fafe0fed
commit 2546999639
5 changed files with 27 additions and 20 deletions

17
align.c
View File

@ -237,10 +237,11 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
r->p = p;
}
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx)
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx, int log_gap)
{
uint32_t k, l;
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
int32_t qshift, tshift, toff = 0, qoff = 0;
double s = 0.0, max = 0.0;
mm_extra_t *p = r->p;
if (p == 0) return;
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
@ -265,7 +266,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
for (l = 0; l < len; ++l)
if (qseq[qoff + l] > 3) ++n_ambi;
r->blen += len - n_ambi, p->n_ambi += n_ambi;
s -= q + e * len;
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
else s -= q + e * len;
if (s < 0) s = 0;
qoff += len;
} else if (op == MM_CIGAR_DEL) {
@ -273,14 +275,15 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
for (l = 0; l < len; ++l)
if (tseq[toff + l] > 3) ++n_ambi;
r->blen += len - n_ambi, p->n_ambi += n_ambi;
s -= q + e * len;
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
else s -= q + e * len;
if (s < 0) s = 0;
toff += len;
} else if (op == MM_CIGAR_N_SKIP) {
toff += len;
}
}
p->dp_max = max;
p->dp_max = (int32_t)(max + .499);
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
}
@ -813,7 +816,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
mm_idx_getseq(mi, rid, rs1, re1, tseq);
qseq = &qseq0[r->rev][qs1];
}
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX);
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
if (rev && r->p->trans_strand)
r->p->trans_strand ^= 3; // flip to the read strand
}
@ -872,7 +875,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
}
r_inv->rs = r1->re + t_off;
r_inv->re = r_inv->rs + ez->max_t + 1;
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX);
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
ret = 1;
end_align1_inv:
kfree(km, tseq);

View File

@ -6,16 +6,6 @@
#include "kalloc.h"
#include "krmq.h"
static inline float mg_log2(float x) // NB: this doesn't work when x<2
{
union { float f; uint32_t i; } z = { x };
float log_2 = ((z.i >> 23) & 255) - 128;
z.i &= ~(255 << 23);
z.i += 127 << 23;
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
return log_2;
}
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
{
mm128_t *z;

7
main.c
View File

@ -7,7 +7,7 @@
#include "mmpriv.h"
#include "ketopt.h"
#define MM_VERSION "2.21-r1074-qs-dirty"
#define MM_VERSION "2.21-dev-r1076-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -73,6 +73,7 @@ static ko_longopt_t long_options[] = {
{ "mask-len", ko_required_argument, 346 },
{ "rmq", ko_optional_argument, 347 },
{ "qstrand", ko_no_argument, 348 },
{ "log-gap", ko_required_argument, 349 },
{ "help", ko_no_argument, 'h' },
{ "max-intron-len", ko_required_argument, 'G' },
{ "version", ko_no_argument, 'V' },
@ -101,7 +102,7 @@ static inline int64_t mm_parse_num(const char *str)
return mm_parse_num2(str, 0);
}
static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set)
static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set)
{
if (yes_to_set) {
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
@ -252,6 +253,8 @@ int main(int argc, char *argv[])
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
} else if (c == 347) { // --rmq
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
} else if (c == 349) { // --log-gap
yes_or_no(&opt, MM_F_NO_LOG_GAP, o.longidx, o.arg, 0);
} else if (c == 'S') {
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
if (mm_verbose >= 2)

View File

@ -36,9 +36,10 @@
#define MM_F_NO_END_FLT 0x10000000
#define MM_F_HARD_MLEVEL 0x20000000
#define MM_F_SAM_HIT_ONLY 0x40000000
#define MM_F_RMQ 0x80000000LL
#define MM_F_RMQ (0x80000000LL)
#define MM_F_QSTRAND (0x100000000LL)
#define MM_F_NO_INV (0x200000000LL)
#define MM_F_NO_LOG_GAP (0x400000000LL)
#define MM_I_HPC 0x1
#define MM_I_NO_SEQ 0x2

View File

@ -110,6 +110,16 @@ void mm_err_puts(const char *str);
void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
static inline float mg_log2(float x) // NB: this doesn't work when x<2
{
union { float f; uint32_t i; } z = { x };
float log_2 = ((z.i >> 23) & 255) - 128;
z.i &= ~(255 << 23);
z.i += 127 << 23;
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
return log_2;
}
#ifdef __cplusplus
}
#endif