dev-r1076: log gap penalty
This commit is contained in:
parent
52fafe0fed
commit
2546999639
17
align.c
17
align.c
|
|
@ -237,10 +237,11 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
|||
r->p = p;
|
||||
}
|
||||
|
||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx)
|
||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx, int log_gap)
|
||||
{
|
||||
uint32_t k, l;
|
||||
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
||||
int32_t qshift, tshift, toff = 0, qoff = 0;
|
||||
double s = 0.0, max = 0.0;
|
||||
mm_extra_t *p = r->p;
|
||||
if (p == 0) return;
|
||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||
|
|
@ -265,7 +266,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
for (l = 0; l < len; ++l)
|
||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
|
||||
else s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
qoff += len;
|
||||
} else if (op == MM_CIGAR_DEL) {
|
||||
|
|
@ -273,14 +275,15 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
|||
for (l = 0; l < len; ++l)
|
||||
if (tseq[toff + l] > 3) ++n_ambi;
|
||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||
s -= q + e * len;
|
||||
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
|
||||
else s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
toff += len;
|
||||
} else if (op == MM_CIGAR_N_SKIP) {
|
||||
toff += len;
|
||||
}
|
||||
}
|
||||
p->dp_max = max;
|
||||
p->dp_max = (int32_t)(max + .499);
|
||||
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
||||
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
||||
}
|
||||
|
|
@ -813,7 +816,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||
qseq = &qseq0[r->rev][qs1];
|
||||
}
|
||||
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
|
||||
if (rev && r->p->trans_strand)
|
||||
r->p->trans_strand ^= 3; // flip to the read strand
|
||||
}
|
||||
|
|
@ -872,7 +875,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
}
|
||||
r_inv->rs = r1->re + t_off;
|
||||
r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
|
||||
ret = 1;
|
||||
end_align1_inv:
|
||||
kfree(km, tseq);
|
||||
|
|
|
|||
10
lchain.c
10
lchain.c
|
|
@ -6,16 +6,6 @@
|
|||
#include "kalloc.h"
|
||||
#include "krmq.h"
|
||||
|
||||
static inline float mg_log2(float x) // NB: this doesn't work when x<2
|
||||
{
|
||||
union { float f; uint32_t i; } z = { x };
|
||||
float log_2 = ((z.i >> 23) & 255) - 128;
|
||||
z.i &= ~(255 << 23);
|
||||
z.i += 127 << 23;
|
||||
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
|
||||
return log_2;
|
||||
}
|
||||
|
||||
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
|
||||
{
|
||||
mm128_t *z;
|
||||
|
|
|
|||
7
main.c
7
main.c
|
|
@ -7,7 +7,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "ketopt.h"
|
||||
|
||||
#define MM_VERSION "2.21-r1074-qs-dirty"
|
||||
#define MM_VERSION "2.21-dev-r1076-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -73,6 +73,7 @@ static ko_longopt_t long_options[] = {
|
|||
{ "mask-len", ko_required_argument, 346 },
|
||||
{ "rmq", ko_optional_argument, 347 },
|
||||
{ "qstrand", ko_no_argument, 348 },
|
||||
{ "log-gap", ko_required_argument, 349 },
|
||||
{ "help", ko_no_argument, 'h' },
|
||||
{ "max-intron-len", ko_required_argument, 'G' },
|
||||
{ "version", ko_no_argument, 'V' },
|
||||
|
|
@ -101,7 +102,7 @@ static inline int64_t mm_parse_num(const char *str)
|
|||
return mm_parse_num2(str, 0);
|
||||
}
|
||||
|
||||
static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set)
|
||||
static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set)
|
||||
{
|
||||
if (yes_to_set) {
|
||||
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
|
||||
|
|
@ -252,6 +253,8 @@ int main(int argc, char *argv[])
|
|||
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
||||
} else if (c == 347) { // --rmq
|
||||
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
||||
} else if (c == 349) { // --log-gap
|
||||
yes_or_no(&opt, MM_F_NO_LOG_GAP, o.longidx, o.arg, 0);
|
||||
} else if (c == 'S') {
|
||||
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
||||
if (mm_verbose >= 2)
|
||||
|
|
|
|||
|
|
@ -36,9 +36,10 @@
|
|||
#define MM_F_NO_END_FLT 0x10000000
|
||||
#define MM_F_HARD_MLEVEL 0x20000000
|
||||
#define MM_F_SAM_HIT_ONLY 0x40000000
|
||||
#define MM_F_RMQ 0x80000000LL
|
||||
#define MM_F_RMQ (0x80000000LL)
|
||||
#define MM_F_QSTRAND (0x100000000LL)
|
||||
#define MM_F_NO_INV (0x200000000LL)
|
||||
#define MM_F_NO_LOG_GAP (0x400000000LL)
|
||||
|
||||
#define MM_I_HPC 0x1
|
||||
#define MM_I_NO_SEQ 0x2
|
||||
|
|
|
|||
10
mmpriv.h
10
mmpriv.h
|
|
@ -110,6 +110,16 @@ void mm_err_puts(const char *str);
|
|||
void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
|
||||
void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
|
||||
|
||||
static inline float mg_log2(float x) // NB: this doesn't work when x<2
|
||||
{
|
||||
union { float f; uint32_t i; } z = { x };
|
||||
float log_2 = ((z.i >> 23) & 255) - 128;
|
||||
z.i &= ~(255 << 23);
|
||||
z.i += 127 << 23;
|
||||
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
|
||||
return log_2;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue