dev-r1076: log gap penalty
This commit is contained in:
parent
52fafe0fed
commit
2546999639
17
align.c
17
align.c
|
|
@ -237,10 +237,11 @@ static void mm_update_cigar_eqx(mm_reg1_t *r, const uint8_t *qseq, const uint8_t
|
||||||
r->p = p;
|
r->p = p;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx)
|
static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int is_eqx, int log_gap)
|
||||||
{
|
{
|
||||||
uint32_t k, l;
|
uint32_t k, l;
|
||||||
int32_t s = 0, max = 0, qshift, tshift, toff = 0, qoff = 0;
|
int32_t qshift, tshift, toff = 0, qoff = 0;
|
||||||
|
double s = 0.0, max = 0.0;
|
||||||
mm_extra_t *p = r->p;
|
mm_extra_t *p = r->p;
|
||||||
if (p == 0) return;
|
if (p == 0) return;
|
||||||
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
mm_fix_cigar(r, qseq, tseq, &qshift, &tshift);
|
||||||
|
|
@ -265,7 +266,8 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
||||||
for (l = 0; l < len; ++l)
|
for (l = 0; l < len; ++l)
|
||||||
if (qseq[qoff + l] > 3) ++n_ambi;
|
if (qseq[qoff + l] > 3) ++n_ambi;
|
||||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||||
s -= q + e * len;
|
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
|
||||||
|
else s -= q + e * len;
|
||||||
if (s < 0) s = 0;
|
if (s < 0) s = 0;
|
||||||
qoff += len;
|
qoff += len;
|
||||||
} else if (op == MM_CIGAR_DEL) {
|
} else if (op == MM_CIGAR_DEL) {
|
||||||
|
|
@ -273,14 +275,15 @@ static void mm_update_extra(mm_reg1_t *r, const uint8_t *qseq, const uint8_t *ts
|
||||||
for (l = 0; l < len; ++l)
|
for (l = 0; l < len; ++l)
|
||||||
if (tseq[toff + l] > 3) ++n_ambi;
|
if (tseq[toff + l] > 3) ++n_ambi;
|
||||||
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
r->blen += len - n_ambi, p->n_ambi += n_ambi;
|
||||||
s -= q + e * len;
|
if (log_gap) s -= q + (double)e * mg_log2(1.0 + len);
|
||||||
|
else s -= q + e * len;
|
||||||
if (s < 0) s = 0;
|
if (s < 0) s = 0;
|
||||||
toff += len;
|
toff += len;
|
||||||
} else if (op == MM_CIGAR_N_SKIP) {
|
} else if (op == MM_CIGAR_N_SKIP) {
|
||||||
toff += len;
|
toff += len;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
p->dp_max = max;
|
p->dp_max = (int32_t)(max + .499);
|
||||||
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
assert(qoff == r->qe - r->qs && toff == r->re - r->rs);
|
||||||
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
if (is_eqx) mm_update_cigar_eqx(r, qseq, tseq); // NB: it has to be called here as changes to qseq and tseq are not returned
|
||||||
}
|
}
|
||||||
|
|
@ -813,7 +816,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
||||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||||
qseq = &qseq0[r->rev][qs1];
|
qseq = &qseq0[r->rev][qs1];
|
||||||
}
|
}
|
||||||
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
mm_update_extra(r, qseq, tseq, mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
|
||||||
if (rev && r->p->trans_strand)
|
if (rev && r->p->trans_strand)
|
||||||
r->p->trans_strand ^= 3; // flip to the read strand
|
r->p->trans_strand ^= 3; // flip to the read strand
|
||||||
}
|
}
|
||||||
|
|
@ -872,7 +875,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
||||||
}
|
}
|
||||||
r_inv->rs = r1->re + t_off;
|
r_inv->rs = r1->re + t_off;
|
||||||
r_inv->re = r_inv->rs + ez->max_t + 1;
|
r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||||
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX);
|
mm_update_extra(r_inv, &qseq[q_off], &tseq[t_off], mat, opt->q, opt->e, opt->flag & MM_F_EQX, !(opt->flag&MM_F_NO_LOG_GAP));
|
||||||
ret = 1;
|
ret = 1;
|
||||||
end_align1_inv:
|
end_align1_inv:
|
||||||
kfree(km, tseq);
|
kfree(km, tseq);
|
||||||
|
|
|
||||||
10
lchain.c
10
lchain.c
|
|
@ -6,16 +6,6 @@
|
||||||
#include "kalloc.h"
|
#include "kalloc.h"
|
||||||
#include "krmq.h"
|
#include "krmq.h"
|
||||||
|
|
||||||
static inline float mg_log2(float x) // NB: this doesn't work when x<2
|
|
||||||
{
|
|
||||||
union { float f; uint32_t i; } z = { x };
|
|
||||||
float log_2 = ((z.i >> 23) & 255) - 128;
|
|
||||||
z.i &= ~(255 << 23);
|
|
||||||
z.i += 127 << 23;
|
|
||||||
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
|
|
||||||
return log_2;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
|
uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_t *p, int32_t *v, int32_t *t, int32_t min_cnt, int32_t min_sc, int32_t *n_u_, int32_t *n_v_)
|
||||||
{
|
{
|
||||||
mm128_t *z;
|
mm128_t *z;
|
||||||
|
|
|
||||||
7
main.c
7
main.c
|
|
@ -7,7 +7,7 @@
|
||||||
#include "mmpriv.h"
|
#include "mmpriv.h"
|
||||||
#include "ketopt.h"
|
#include "ketopt.h"
|
||||||
|
|
||||||
#define MM_VERSION "2.21-r1074-qs-dirty"
|
#define MM_VERSION "2.21-dev-r1076-dirty"
|
||||||
|
|
||||||
#ifdef __linux__
|
#ifdef __linux__
|
||||||
#include <sys/resource.h>
|
#include <sys/resource.h>
|
||||||
|
|
@ -73,6 +73,7 @@ static ko_longopt_t long_options[] = {
|
||||||
{ "mask-len", ko_required_argument, 346 },
|
{ "mask-len", ko_required_argument, 346 },
|
||||||
{ "rmq", ko_optional_argument, 347 },
|
{ "rmq", ko_optional_argument, 347 },
|
||||||
{ "qstrand", ko_no_argument, 348 },
|
{ "qstrand", ko_no_argument, 348 },
|
||||||
|
{ "log-gap", ko_required_argument, 349 },
|
||||||
{ "help", ko_no_argument, 'h' },
|
{ "help", ko_no_argument, 'h' },
|
||||||
{ "max-intron-len", ko_required_argument, 'G' },
|
{ "max-intron-len", ko_required_argument, 'G' },
|
||||||
{ "version", ko_no_argument, 'V' },
|
{ "version", ko_no_argument, 'V' },
|
||||||
|
|
@ -101,7 +102,7 @@ static inline int64_t mm_parse_num(const char *str)
|
||||||
return mm_parse_num2(str, 0);
|
return mm_parse_num2(str, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const char *arg, int yes_to_set)
|
static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const char *arg, int yes_to_set)
|
||||||
{
|
{
|
||||||
if (yes_to_set) {
|
if (yes_to_set) {
|
||||||
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
|
if (strcmp(arg, "yes") == 0 || strcmp(arg, "y") == 0) opt->flag |= flag;
|
||||||
|
|
@ -252,6 +253,8 @@ int main(int argc, char *argv[])
|
||||||
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
yes_or_no(&opt, MM_F_NO_DUAL, o.longidx, o.arg, 0);
|
||||||
} else if (c == 347) { // --rmq
|
} else if (c == 347) { // --rmq
|
||||||
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
yes_or_no(&opt, MM_F_RMQ, o.longidx, o.arg, 1);
|
||||||
|
} else if (c == 349) { // --log-gap
|
||||||
|
yes_or_no(&opt, MM_F_NO_LOG_GAP, o.longidx, o.arg, 0);
|
||||||
} else if (c == 'S') {
|
} else if (c == 'S') {
|
||||||
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
opt.flag |= MM_F_OUT_CS | MM_F_CIGAR | MM_F_OUT_CS_LONG;
|
||||||
if (mm_verbose >= 2)
|
if (mm_verbose >= 2)
|
||||||
|
|
|
||||||
|
|
@ -36,9 +36,10 @@
|
||||||
#define MM_F_NO_END_FLT 0x10000000
|
#define MM_F_NO_END_FLT 0x10000000
|
||||||
#define MM_F_HARD_MLEVEL 0x20000000
|
#define MM_F_HARD_MLEVEL 0x20000000
|
||||||
#define MM_F_SAM_HIT_ONLY 0x40000000
|
#define MM_F_SAM_HIT_ONLY 0x40000000
|
||||||
#define MM_F_RMQ 0x80000000LL
|
#define MM_F_RMQ (0x80000000LL)
|
||||||
#define MM_F_QSTRAND (0x100000000LL)
|
#define MM_F_QSTRAND (0x100000000LL)
|
||||||
#define MM_F_NO_INV (0x200000000LL)
|
#define MM_F_NO_INV (0x200000000LL)
|
||||||
|
#define MM_F_NO_LOG_GAP (0x400000000LL)
|
||||||
|
|
||||||
#define MM_I_HPC 0x1
|
#define MM_I_HPC 0x1
|
||||||
#define MM_I_NO_SEQ 0x2
|
#define MM_I_NO_SEQ 0x2
|
||||||
|
|
|
||||||
10
mmpriv.h
10
mmpriv.h
|
|
@ -110,6 +110,16 @@ void mm_err_puts(const char *str);
|
||||||
void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
|
void mm_err_fwrite(const void *p, size_t size, size_t nitems, FILE *fp);
|
||||||
void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
|
void mm_err_fread(void *p, size_t size, size_t nitems, FILE *fp);
|
||||||
|
|
||||||
|
static inline float mg_log2(float x) // NB: this doesn't work when x<2
|
||||||
|
{
|
||||||
|
union { float f; uint32_t i; } z = { x };
|
||||||
|
float log_2 = ((z.i >> 23) & 255) - 128;
|
||||||
|
z.i &= ~(255 << 23);
|
||||||
|
z.i += 127 << 23;
|
||||||
|
log_2 += (-0.34484843f * z.f + 2.02466578f) * z.f - 0.67487759f;
|
||||||
|
return log_2;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue