fixed a band boundary bug in ksw2
This commit is contained in:
parent
b4280d186f
commit
f42790398d
|
|
@ -19,7 +19,7 @@ Minimap2 is a fast sequence mapping and alignment program that can find
|
|||
overlaps between long noisy reads, or map long reads or their assemblies to a
|
||||
reference genome optionally with detailed alignment (i.e. CIGAR). At present,
|
||||
it works efficiently with query sequences from a few kilobases to ~100
|
||||
megabases in length at a error rate ~15%. Minimap2 outputs in the [PAF][paf] or
|
||||
megabases in length at an error rate ~15%. Minimap2 outputs in the [PAF][paf] or
|
||||
the [SAM format][sam]. On limited test data sets, minimap2 is over 20 times
|
||||
faster than most other long-read aligners.
|
||||
|
||||
|
|
|
|||
|
|
@ -42,14 +42,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
a2= _mm_sub_epi8(a2, tmp); \
|
||||
b2= _mm_sub_epi8(b2, tmp);
|
||||
|
||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, long_thres, long_diff;
|
||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc, long_thres, long_diff;
|
||||
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
||||
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
||||
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
||||
__m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_;
|
||||
__m128i *u, *v, *x, *y, *x2, *y2, *s, *p = 0;
|
||||
|
||||
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
||||
ksw_reset_extz(ez);
|
||||
if (m <= 1 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
||||
|
||||
if (q2 + e2 < q + e) t = q, q = q2, q2 = t, t = e, e = e2, e2 = t; // make sure q+e no larger than q2+e2
|
||||
|
||||
zero_ = _mm_set1_epi8(0);
|
||||
q_ = _mm_set1_epi8(q);
|
||||
|
|
@ -60,16 +63,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||
|
||||
ksw_reset_extz(ez);
|
||||
|
||||
wl = wr = w;
|
||||
tlen_ = (tlen + 15) / 16;
|
||||
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
||||
qlen_ = (qlen + 15) / 16;
|
||||
for (t = 1, max_sc = mat[0]; t < m * m; ++t)
|
||||
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
|
||||
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
||||
min_sc = min_sc < mat[t]? min_sc : mat[t];
|
||||
}
|
||||
if (-min_sc > 2 * (q + e)) return; // otherwise, we won't see any mismatches
|
||||
|
||||
long_thres = (q2 - q) / (e - e2) - 1;
|
||||
long_thres = e != e2? (q2 - q) / (e - e2) - 1 : 0;
|
||||
if (q2 + e2 + long_thres * e2 > q + e + long_thres * e)
|
||||
++long_thres;
|
||||
long_diff = long_thres * (e - e2) - (q2 - q) - e2;
|
||||
|
|
@ -164,6 +168,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
z = _mm_max_epi8(z, b);
|
||||
z = _mm_max_epi8(z, a2);
|
||||
z = _mm_max_epi8(z, b2);
|
||||
z = _mm_min_epi8(z, sc_mch_);
|
||||
__dp_code_block2; // save u[] and v[]; update a, b, a2 and b2
|
||||
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_));
|
||||
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_max_epi8(b, zero_), qe_));
|
||||
|
|
@ -178,6 +183,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
|
||||
tmp = _mm_cmpgt_epi8(b2, z);
|
||||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
||||
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||
__dp_code_block2;
|
||||
tmp = _mm_cmpgt_epi8(a, zero_);
|
||||
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_and_si128(tmp, a), qe_));
|
||||
|
|
@ -215,6 +222,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
z = _mm_max_epi8(z, a2);
|
||||
d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
|
||||
z = _mm_max_epi8(z, b2);
|
||||
z = _mm_min_epi8(z, sc_mch_);
|
||||
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
||||
tmp = _mm_cmpgt_epi8(a, z);
|
||||
d = _mm_and_si128(tmp, _mm_set1_epi8(1));
|
||||
|
|
@ -228,6 +236,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
tmp = _mm_cmpgt_epi8(b2, z);
|
||||
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
|
||||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
||||
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||
#endif
|
||||
__dp_code_block2;
|
||||
tmp = _mm_cmpgt_epi8(a, zero_);
|
||||
|
|
@ -270,6 +280,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
z = _mm_max_epi8(z, a2);
|
||||
d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
|
||||
z = _mm_max_epi8(z, b2);
|
||||
z = _mm_min_epi8(z, sc_mch_);
|
||||
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
||||
tmp = _mm_cmpgt_epi8(z, a);
|
||||
d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
|
||||
|
|
@ -283,6 +294,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
tmp = _mm_cmpgt_epi8(z, b2);
|
||||
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
|
||||
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
|
||||
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||
#endif
|
||||
__dp_code_block2;
|
||||
tmp = _mm_cmpgt_epi8(zero_, a);
|
||||
|
|
|
|||
|
|
@ -26,19 +26,21 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
|
||||
#define __dp_code_block2 \
|
||||
z = _mm_max_epu8(z, b); /* z = max(z, b); this works because both are non-negative */ \
|
||||
z = _mm_min_epu8(z, max_sc_); \
|
||||
_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1)); /* u[r][t..t+15] <- z - v[r-1][t-1..t+14] */ \
|
||||
_mm_store_si128(&v[t], _mm_sub_epi8(z, ut)); /* v[r][t..t+15] <- z - u[r-1][t..t+15] */ \
|
||||
z = _mm_sub_epi8(z, q_); \
|
||||
a = _mm_sub_epi8(a, z); \
|
||||
b = _mm_sub_epi8(b, z);
|
||||
|
||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc;
|
||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc;
|
||||
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
||||
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
||||
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
||||
__m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_;
|
||||
__m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_, max_sc_;
|
||||
__m128i *u, *v, *x, *y, *s, *p = 0;
|
||||
|
||||
ksw_reset_extz(ez);
|
||||
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
||||
|
||||
zero_ = _mm_set1_epi8(0);
|
||||
|
|
@ -51,15 +53,17 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
sc_mch_ = _mm_set1_epi8(mat[0]);
|
||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||
|
||||
ksw_reset_extz(ez);
|
||||
max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2);
|
||||
|
||||
wl = wr = w;
|
||||
tlen_ = (tlen + 15) / 16;
|
||||
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
||||
qlen_ = (qlen + 15) / 16;
|
||||
for (t = 1, max_sc = mat[0]; t < m * m; ++t)
|
||||
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
|
||||
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
||||
min_sc = min_sc < mat[t]? min_sc : mat[t];
|
||||
}
|
||||
if (-min_sc > 2 * (q + e)) return; // otherwise, we won't see any mismatches
|
||||
|
||||
mem = (uint8_t*)kcalloc(km, tlen_ * 6 + qlen_ + 1, 16);
|
||||
u = (__m128i*)(((size_t)mem + 15) >> 4 << 4); // 16-byte aligned
|
||||
|
|
|
|||
Loading…
Reference in New Issue