r85: two-round z-drop

This commit is contained in:
Heng Li 2017-06-27 10:36:24 -04:00
parent 490a37a599
commit c02ff4662c
4 changed files with 39 additions and 35 deletions

View File

@ -188,12 +188,14 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
if (i == r->cnt - 1 || qe - qs >= opt->min_ksw_len || re - rs >= opt->min_ksw_len) {
qseq = &qseq0[rev][qs];
mm_idx_getseq(mi, rid, rs, re, tseq);
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez);
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_GLOBAL_ONLY, ez);
if (ez->zdropped || ez->score < 0)
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, 0, ez);
if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar);
mm_update_extra(r->p, qseq, tseq, ez->n_cigar, ez->cigar, 0);
}
if (ez->score == KSW_NEG_INF) { // truncated by Z-drop
if (ez->zdropped) { // truncated by Z-drop
int j;
for (j = i - 1; j >= 0; --j)
if ((int32_t)a[r->as + j].x < re + ez->max_t)

24
ksw2.h
View File

@ -14,7 +14,8 @@
#define KSW_EZ_REV_CIGAR 0x40 // reverse CIGAR in the output
typedef struct {
int max, max_q, max_t; // max extension score and coordinate
uint32_t max:31, zdropped:1;
int max_q, max_t; // max extension coordinate
int mqe, mqe_t; // max score when reaching the end of query
int mte, mte_q; // max score when reaching the end of target
int score; // max score reaching both ends; may be KSW_NEG_INF
@ -112,24 +113,11 @@ static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t
*m_cigar_ = m_cigar, *n_cigar_ = n_cigar, *cigar_ = cigar;
}
static inline int ksw_cigar2score(int8_t m, const int8_t *mat, int8_t q, int8_t e, const uint8_t *query, const uint8_t *target, int n_cigar, const uint32_t *cigar)
static inline void ksw_reset_extz(ksw_extz_t *ez)
{
int i, j, k, l, score;
for (k = 0, score = 0, i = j = 0; k < n_cigar; ++k) {
int op = cigar[k] & 0xf, len = cigar[k] >> 4;
if (op == 0) {
for (l = 0; l < len; ++l)
score += mat[target[i + l] * m + query[j + l]];
i += len, j += len;
} else if (op == 1) {
score -= q + len * e;
j += len;
} else if (op == 2) {
score -= q + len * e;
i += len;
}
}
return score;
ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1;
ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF;
ez->n_cigar = 0, ez->zdropped = 0;
}
#endif

View File

@ -8,6 +8,21 @@
#include <smmintrin.h>
#endif
static inline int apply_zdrop(ksw_extz_t *ez, int32_t H, int r, int t, int zdrop, int8_t e)
{
if (H > (int32_t)ez->max) {
ez->max = H, ez->max_t = t, ez->max_q = r - t;
} else if (t >= ez->max_t && r - t >= ez->max_q) {
int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l;
l = tl > ql? tl - ql : ql - tl;
if (ez->max - H > zdrop + l * e) {
ez->zdropped = 1;
return 1;
}
}
return 0;
}
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
{
#define __dp_code_block1 \
@ -52,9 +67,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
sc_mis_ = _mm_set1_epi8(mat[1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard
ez->max_q = ez->max_t = ez->mqe_t = ez->mte_q = -1;
ez->max = 0, ez->score = ez->mqe = ez->mte = KSW_NEG_INF;
ez->n_cigar = 0;
ksw_reset_extz(ez);
wl = wr = w;
tlen_ = (tlen + 15) / 16;
@ -238,14 +251,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
ez->mte = H[en0], ez->mte_q = r - en;
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
ez->mqe = H[st0], ez->mqe_t = st0;
if (max_H > ez->max) {
ez->max = max_H, ez->max_t = max_t, ez->max_q = r - max_t;
} else if (max_t >= ez->max_t && r - max_t >= ez->max_q) {
int tl = max_t - ez->max_t, ql = (r - max_t) - ez->max_q, l;
l = tl > ql? tl - ql : ql - tl;
if (ez->max - max_H > zdrop + l * e)
break;
}
if (apply_zdrop(ez, max_H, r, max_t, zdrop, e)) break;
if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H[tlen - 1];
if (flag & KSW_EZ_DYN_BAND & 0) { // FIXME: don't use - buggy!
@ -259,9 +265,17 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
}
} else {
if (r > 0) {
if (last_H0_t >= st0 && last_H0_t <= en0)
if (last_H0_t >= st0 && last_H0_t <= en0 && last_H0_t + 1 >= st0 && last_H0_t + 1 <= en0) {
int32_t d0 = v8[last_H0_t] - qe;
int32_t d1 = u8[last_H0_t + 1] - qe;
if (d0 > d1) H0 += d0;
else H0 += d1, ++last_H0_t;
} else if (last_H0_t >= st0 && last_H0_t <= en0) {
H0 += v8[last_H0_t] - qe;
else ++last_H0_t, H0 += u8[last_H0_t] - qe;
} else {
++last_H0_t, H0 += u8[last_H0_t] - qe;
}
if (apply_zdrop(ez, H0, r, last_H0_t, zdrop, e)) break;
} else H0 = v8[0] - qe - qe, last_H0_t = 0;
if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H0;
@ -273,7 +287,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
if (with_max) kfree(km, H);
if (with_cigar) { // backtrack
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
if (ez->score > KSW_NEG_INF && !(flag&KSW_EZ_EXTZ_ONLY))
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
else if (ez->max_t >= 0 && ez->max_q >= 0)
ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);

2
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r79-pre"
#define MM_VERSION "2.0-r85-pre"
void liftrlimit()
{