r499: end bonus working; DP region needs improve!
This commit is contained in:
parent
ca632f907b
commit
7345621759
36
align.c
36
align.c
|
|
@ -186,7 +186,7 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
|
|||
}
|
||||
}
|
||||
|
||||
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int flag, ksw_extz_t *ez)
|
||||
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int end_bonus, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
int zdrop = opt->zdrop;
|
||||
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
|
||||
|
|
@ -202,7 +202,7 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
|
|||
else if (opt->q == opt->q2 && opt->e == opt->e2)
|
||||
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, flag, ez);
|
||||
else
|
||||
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, flag, ez);
|
||||
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
|
||||
}
|
||||
|
||||
static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)
|
||||
|
|
@ -378,12 +378,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
rs0 = (int32_t)a[r->as].x + 1 - (int32_t)(a[r->as].y>>32&0xff);
|
||||
qs0 = (int32_t)a[r->as].y + 1 - (int32_t)(a[r->as].y>>32&0xff);
|
||||
if (r->as > 0 && a[r->as - 1].x>>32 == a[r->as].x>>32) {
|
||||
rs1 = (int32_t)a[r->as - 1].x + 1;
|
||||
qs1 = (int32_t)a[r->as - 1].y + 1;
|
||||
} else rs1 = qs1 = 0; // no adjacent previous chain on the same chr
|
||||
rs1 = (int32_t)a[r->as - 1].x + 1 - (int32_t)(a[r->as - 1].y>>32&0xff);
|
||||
} else rs1 = 0; // no adjacent previous chain on the same chr
|
||||
if (qs > 0 && rs > 0) { // actually this is always true
|
||||
l = qs < max_end_ext? qs : max_end_ext;
|
||||
qs1 = qs1 > qs - l? qs1 : qs - l; // choose between the max_end_ext bound and the previous seed bound
|
||||
qs1 = qs - l;
|
||||
qs0 = qs0 < qs1? qs0 : qs1; // at least include qs0
|
||||
l += l * opt->a > opt->q? (l * opt->a - opt->q) / opt->e : 0;
|
||||
l = l < max_end_ext? l : max_end_ext;
|
||||
|
|
@ -396,12 +395,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
re0 = (int32_t)a[r->as + r->cnt - 1].x + 1;
|
||||
qe0 = (int32_t)a[r->as + r->cnt - 1].y + 1;
|
||||
if (r->as + r->cnt < n_a && a[r->as + r->cnt].x>>32 == a[r->as + r->cnt - 1].x>>32) {
|
||||
re1 = (int32_t)a[r->as + r->cnt].x + 1 - (int32_t)(a[r->as + r->cnt].y>>32&0xff);
|
||||
qe1 = (int32_t)a[r->as + r->cnt].y + 1 - (int32_t)(a[r->as + r->cnt].y>>32&0xff);
|
||||
} else re1 = mi->seq[rid].len, qe1 = qlen; // no adjacent next chain on the same chr
|
||||
re1 = (int32_t)a[r->as + r->cnt].x + 1;
|
||||
} else re1 = mi->seq[rid].len; // no adjacent next chain on the same chr
|
||||
if (qe < qlen && re < mi->seq[rid].len) {
|
||||
l = qlen - qe < max_end_ext? qlen - qe : max_end_ext;
|
||||
qe1 = qe1 < qe + l? qe1 : qe + l; // choose between the max_end_ext bound and the next seed bound
|
||||
qe1 = qe + l;
|
||||
qe0 = qe0 > qe1? qe0 : qe1; // at least include qe0
|
||||
l += l * opt->a > opt->q? (l * opt->a - opt->q) / opt->e : 0;
|
||||
l = l < max_end_ext? l : max_end_ext;
|
||||
|
|
@ -418,13 +416,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
mm_idx_getseq(mi, rid, rs0, rs, tseq);
|
||||
mm_seq_rev(qs - qs0, qseq);
|
||||
mm_seq_rev(rs - rs0, tseq);
|
||||
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, opt->end_bonus, extra_flag|KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
|
||||
if (ez->n_cigar > 0) {
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
r->p->dp_score += ez->max;
|
||||
}
|
||||
rs1 = rs - (ez->max_t + 1);
|
||||
qs1 = qs - (ez->max_q + 1);
|
||||
rs1 = rs - (ez->reach_end? ez->mqe_t + 1 : ez->max_t + 1);
|
||||
qs1 = qs - (ez->reach_end? qs - qs0 : ez->max_q + 1);
|
||||
mm_seq_rev(qs - qs0, qseq);
|
||||
} else rs1 = rs, qs1 = qs;
|
||||
re1 = rs, qe1 = qs;
|
||||
|
|
@ -452,10 +450,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
}
|
||||
ez->cigar = ksw_push_cigar(km, &ez->n_cigar, &ez->m_cigar, ez->cigar, 0, qe - qs);
|
||||
} else { // perform normal gapped alignment
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, -1, extra_flag|KSW_EZ_APPROX_MAX, ez); // first pass: with approximate Z-drop
|
||||
}
|
||||
if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, extra_flag, ez); // second pass: lift approximate
|
||||
mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, -1, extra_flag, ez); // second pass: lift approximate
|
||||
if (ez->n_cigar > 0)
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle.
|
||||
|
|
@ -477,13 +475,13 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
if (!dropped && qe < qe0 && re < re0) { // right extension
|
||||
qseq = &qseq0[rev][qe];
|
||||
mm_idx_getseq(mi, rid, re, re0, tseq);
|
||||
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
|
||||
mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, opt->end_bonus, extra_flag|KSW_EZ_EXTZ_ONLY, ez);
|
||||
if (ez->n_cigar > 0) {
|
||||
mm_append_cigar(r, ez->n_cigar, ez->cigar);
|
||||
r->p->dp_score += ez->max;
|
||||
}
|
||||
re1 = re + (ez->max_t + 1);
|
||||
qe1 = qe + (ez->max_q + 1);
|
||||
re1 = re + (ez->reach_end? ez->mqe_t + 1 : ez->max_t + 1);
|
||||
qe1 = qe + (ez->reach_end? qe0 - qe : ez->max_q + 1);
|
||||
}
|
||||
assert(qe1 <= qlen);
|
||||
|
||||
|
|
@ -533,7 +531,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
mm_seq_rev(tl, tseq);
|
||||
if (score < opt->min_dp_max) goto end_align1_inv;
|
||||
q_off = ql - (q_off + 1), t_off = tl - (t_off + 1);
|
||||
mm_align_pair(km, opt, ql - q_off, qseq + q_off, tl - t_off, tseq + t_off, mat, (int)(opt->bw * 1.5), KSW_EZ_EXTZ_ONLY, ez);
|
||||
mm_align_pair(km, opt, ql - q_off, qseq + q_off, tl - t_off, tseq + t_off, mat, (int)(opt->bw * 1.5), -1, KSW_EZ_EXTZ_ONLY, ez);
|
||||
if (ez->n_cigar == 0) goto end_align1_inv; // should never be here
|
||||
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
|
||||
r_inv->p->dp_score = ez->max;
|
||||
|
|
|
|||
12
ksw2.h
12
ksw2.h
|
|
@ -26,6 +26,7 @@ typedef struct {
|
|||
int mte, mte_q; // max score when reaching the end of target
|
||||
int score; // max score reaching both ends; may be KSW_NEG_INF
|
||||
int m_cigar, n_cigar;
|
||||
int reach_end;
|
||||
uint32_t *cigar;
|
||||
} ksw_extz_t;
|
||||
|
||||
|
|
@ -46,14 +47,17 @@ typedef struct {
|
|||
* @param flag flag (see KSW_EZ_* macros)
|
||||
* @param ez (out) scores and cigar
|
||||
*/
|
||||
void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
|
||||
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
|
||||
void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
|
||||
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
|
||||
|
||||
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t gapo, int8_t gape, int8_t gapo2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
|
||||
|
|
@ -149,7 +153,7 @@ static inline void ksw_reset_extz(ksw_extz_t *ez)
|
|||
{
|
||||
ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1;
|
||||
ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF;
|
||||
ez->n_cigar = 0, ez->zdropped = 0;
|
||||
ez->n_cigar = 0, ez->zdropped = 0, ez->reach_end = 0;
|
||||
}
|
||||
|
||||
static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, int b, int zdrop, int8_t e)
|
||||
|
|
|
|||
|
|
@ -64,18 +64,18 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
}
|
||||
|
||||
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez);
|
||||
unsigned simd;
|
||||
simd = x86_simd();
|
||||
if (simd & SIMD_SSE4_1)
|
||||
ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez);
|
||||
else if (simd & SIMD_SSE2)
|
||||
ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, end_bonus, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -17,14 +17,14 @@
|
|||
#ifdef KSW_CPU_DISPATCH
|
||||
#ifdef __SSE4_1__
|
||||
void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
|
||||
#else
|
||||
void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
|
||||
#endif
|
||||
#else
|
||||
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int end_bonus, int flag, ksw_extz_t *ez)
|
||||
#endif // ~KSW_CPU_DISPATCH
|
||||
{
|
||||
#define __dp_code_block1 \
|
||||
|
|
@ -380,10 +380,14 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
if (!approx_max) kfree(km, H);
|
||||
if (with_cigar) { // backtrack
|
||||
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
|
||||
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
|
||||
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY)) {
|
||||
ksw_backtrack(km, 1, rev_cigar, 0, (uint8_t*)p, off, off_end, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
||||
else if (ez->max_t >= 0 && ez->max_q >= 0)
|
||||
} else if (!ez->zdropped && (flag&KSW_EZ_EXTZ_ONLY) && ez->mqe + end_bonus > ez->max) {
|
||||
ez->reach_end = 1;
|
||||
ksw_backtrack(km, 1, rev_cigar, 0, (uint8_t*)p, off, off_end, n_col_*16, ez->mqe_t, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
||||
} else if (ez->max_t >= 0 && ez->max_q >= 0) {
|
||||
ksw_backtrack(km, 1, rev_cigar, 0, (uint8_t*)p, off, off_end, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
|
||||
}
|
||||
kfree(km, mem2); kfree(km, off);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -6,7 +6,7 @@
|
|||
#include "mmpriv.h"
|
||||
#include "getopt.h"
|
||||
|
||||
#define MM_VERSION "2.2-r498-dirty"
|
||||
#define MM_VERSION "2.2-r499-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -40,6 +40,7 @@ static struct option long_options[] = {
|
|||
{ "frag", optional_argument, 0, 0 },
|
||||
{ "print-2nd", optional_argument, 0, 0 },
|
||||
{ "cs", optional_argument, 0, 0 },
|
||||
{ "end-bonus", required_argument, 0, 0 },
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
|
|
@ -130,6 +131,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx ==11) opt.noncan = atoi(optarg); // --cost-non-gt-ag
|
||||
else if (c == 0 && long_idx ==12) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
|
||||
else if (c == 0 && long_idx ==13) opt.flag |= MM_F_SR; // --sr
|
||||
else if (c == 0 && long_idx ==17) opt.end_bonus = atoi(optarg); // --end-bonus
|
||||
else if (c == 0 && long_idx == 14) { // --frag
|
||||
if (optarg == 0 || strcmp(optarg, "yes") == 0 || strcmp(optarg, "y") == 0)
|
||||
opt.flag |= MM_F_FRAG_MODE;
|
||||
|
|
|
|||
2
map.c
2
map.c
|
|
@ -33,6 +33,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
|
||||
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
||||
opt->zdrop = 400;
|
||||
opt->end_bonus = -1;
|
||||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||
opt->min_ksw_len = 200;
|
||||
opt->mini_batch_size = 200000000;
|
||||
|
|
@ -86,6 +87,7 @@ int mm_set_opt(const char *preset, mm_idxopt_t *io, mm_mapopt_t *mo)
|
|||
mo->pe_ori = 0<<1|1; // FR
|
||||
mo->a = 2, mo->b = 8, mo->q = 12, mo->e = 2, mo->q2 = 32, mo->e2 = 1;
|
||||
mo->zdrop = 100;
|
||||
mo->end_bonus = 11;
|
||||
mo->max_frag_len = 800;
|
||||
mo->max_gap = 100;
|
||||
mo->bw = 100;
|
||||
|
|
|
|||
|
|
@ -104,6 +104,7 @@ typedef struct {
|
|||
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
|
||||
int noncan; // cost of non-canonical splicing sites
|
||||
int zdrop; // break alignment if alignment score drops too fast along the diagonal
|
||||
int end_bonus;
|
||||
int min_dp_max; // drop an alignment if the score of the max scoring segment is below this threshold
|
||||
int min_ksw_len;
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue