fixed a band boundary bug in ksw2
This commit is contained in:
parent
b4280d186f
commit
f42790398d
|
|
@ -19,7 +19,7 @@ Minimap2 is a fast sequence mapping and alignment program that can find
|
||||||
overlaps between long noisy reads, or map long reads or their assemblies to a
|
overlaps between long noisy reads, or map long reads or their assemblies to a
|
||||||
reference genome optionally with detailed alignment (i.e. CIGAR). At present,
|
reference genome optionally with detailed alignment (i.e. CIGAR). At present,
|
||||||
it works efficiently with query sequences from a few kilobases to ~100
|
it works efficiently with query sequences from a few kilobases to ~100
|
||||||
megabases in length at a error rate ~15%. Minimap2 outputs in the [PAF][paf] or
|
megabases in length at an error rate ~15%. Minimap2 outputs in the [PAF][paf] or
|
||||||
the [SAM format][sam]. On limited test data sets, minimap2 is over 20 times
|
the [SAM format][sam]. On limited test data sets, minimap2 is over 20 times
|
||||||
faster than most other long-read aligners.
|
faster than most other long-read aligners.
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,14 +42,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
a2= _mm_sub_epi8(a2, tmp); \
|
a2= _mm_sub_epi8(a2, tmp); \
|
||||||
b2= _mm_sub_epi8(b2, tmp);
|
b2= _mm_sub_epi8(b2, tmp);
|
||||||
|
|
||||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, long_thres, long_diff;
|
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc, long_thres, long_diff;
|
||||||
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
||||||
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
||||||
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
||||||
__m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_;
|
__m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_;
|
||||||
__m128i *u, *v, *x, *y, *x2, *y2, *s, *p = 0;
|
__m128i *u, *v, *x, *y, *x2, *y2, *s, *p = 0;
|
||||||
|
|
||||||
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
ksw_reset_extz(ez);
|
||||||
|
if (m <= 1 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
||||||
|
|
||||||
|
if (q2 + e2 < q + e) t = q, q = q2, q2 = t, t = e, e = e2, e2 = t; // make sure q+e no larger than q2+e2
|
||||||
|
|
||||||
zero_ = _mm_set1_epi8(0);
|
zero_ = _mm_set1_epi8(0);
|
||||||
q_ = _mm_set1_epi8(q);
|
q_ = _mm_set1_epi8(q);
|
||||||
|
|
@ -60,16 +63,17 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||||
|
|
||||||
ksw_reset_extz(ez);
|
|
||||||
|
|
||||||
wl = wr = w;
|
wl = wr = w;
|
||||||
tlen_ = (tlen + 15) / 16;
|
tlen_ = (tlen + 15) / 16;
|
||||||
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
||||||
qlen_ = (qlen + 15) / 16;
|
qlen_ = (qlen + 15) / 16;
|
||||||
for (t = 1, max_sc = mat[0]; t < m * m; ++t)
|
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
|
||||||
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
||||||
|
min_sc = min_sc < mat[t]? min_sc : mat[t];
|
||||||
|
}
|
||||||
|
if (-min_sc > 2 * (q + e)) return; // otherwise, we won't see any mismatches
|
||||||
|
|
||||||
long_thres = (q2 - q) / (e - e2) - 1;
|
long_thres = e != e2? (q2 - q) / (e - e2) - 1 : 0;
|
||||||
if (q2 + e2 + long_thres * e2 > q + e + long_thres * e)
|
if (q2 + e2 + long_thres * e2 > q + e + long_thres * e)
|
||||||
++long_thres;
|
++long_thres;
|
||||||
long_diff = long_thres * (e - e2) - (q2 - q) - e2;
|
long_diff = long_thres * (e - e2) - (q2 - q) - e2;
|
||||||
|
|
@ -164,6 +168,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
z = _mm_max_epi8(z, b);
|
z = _mm_max_epi8(z, b);
|
||||||
z = _mm_max_epi8(z, a2);
|
z = _mm_max_epi8(z, a2);
|
||||||
z = _mm_max_epi8(z, b2);
|
z = _mm_max_epi8(z, b2);
|
||||||
|
z = _mm_min_epi8(z, sc_mch_);
|
||||||
__dp_code_block2; // save u[] and v[]; update a, b, a2 and b2
|
__dp_code_block2; // save u[] and v[]; update a, b, a2 and b2
|
||||||
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_));
|
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_));
|
||||||
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_max_epi8(b, zero_), qe_));
|
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_max_epi8(b, zero_), qe_));
|
||||||
|
|
@ -178,6 +183,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
|
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
|
||||||
tmp = _mm_cmpgt_epi8(b2, z);
|
tmp = _mm_cmpgt_epi8(b2, z);
|
||||||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
||||||
|
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||||
|
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||||
__dp_code_block2;
|
__dp_code_block2;
|
||||||
tmp = _mm_cmpgt_epi8(a, zero_);
|
tmp = _mm_cmpgt_epi8(a, zero_);
|
||||||
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_and_si128(tmp, a), qe_));
|
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_and_si128(tmp, a), qe_));
|
||||||
|
|
@ -215,6 +222,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
z = _mm_max_epi8(z, a2);
|
z = _mm_max_epi8(z, a2);
|
||||||
d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
|
d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
|
||||||
z = _mm_max_epi8(z, b2);
|
z = _mm_max_epi8(z, b2);
|
||||||
|
z = _mm_min_epi8(z, sc_mch_);
|
||||||
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
||||||
tmp = _mm_cmpgt_epi8(a, z);
|
tmp = _mm_cmpgt_epi8(a, z);
|
||||||
d = _mm_and_si128(tmp, _mm_set1_epi8(1));
|
d = _mm_and_si128(tmp, _mm_set1_epi8(1));
|
||||||
|
|
@ -228,6 +236,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
tmp = _mm_cmpgt_epi8(b2, z);
|
tmp = _mm_cmpgt_epi8(b2, z);
|
||||||
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
|
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
|
||||||
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
|
||||||
|
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||||
|
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||||
#endif
|
#endif
|
||||||
__dp_code_block2;
|
__dp_code_block2;
|
||||||
tmp = _mm_cmpgt_epi8(a, zero_);
|
tmp = _mm_cmpgt_epi8(a, zero_);
|
||||||
|
|
@ -270,6 +280,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
z = _mm_max_epi8(z, a2);
|
z = _mm_max_epi8(z, a2);
|
||||||
d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
|
d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
|
||||||
z = _mm_max_epi8(z, b2);
|
z = _mm_max_epi8(z, b2);
|
||||||
|
z = _mm_min_epi8(z, sc_mch_);
|
||||||
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
|
||||||
tmp = _mm_cmpgt_epi8(z, a);
|
tmp = _mm_cmpgt_epi8(z, a);
|
||||||
d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
|
d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
|
||||||
|
|
@ -283,6 +294,8 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
tmp = _mm_cmpgt_epi8(z, b2);
|
tmp = _mm_cmpgt_epi8(z, b2);
|
||||||
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
|
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
|
||||||
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
|
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
|
||||||
|
tmp = _mm_cmplt_epi8(sc_mch_, z);
|
||||||
|
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
|
||||||
#endif
|
#endif
|
||||||
__dp_code_block2;
|
__dp_code_block2;
|
||||||
tmp = _mm_cmpgt_epi8(zero_, a);
|
tmp = _mm_cmpgt_epi8(zero_, a);
|
||||||
|
|
|
||||||
|
|
@ -26,19 +26,21 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
|
|
||||||
#define __dp_code_block2 \
|
#define __dp_code_block2 \
|
||||||
z = _mm_max_epu8(z, b); /* z = max(z, b); this works because both are non-negative */ \
|
z = _mm_max_epu8(z, b); /* z = max(z, b); this works because both are non-negative */ \
|
||||||
|
z = _mm_min_epu8(z, max_sc_); \
|
||||||
_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1)); /* u[r][t..t+15] <- z - v[r-1][t-1..t+14] */ \
|
_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1)); /* u[r][t..t+15] <- z - v[r-1][t-1..t+14] */ \
|
||||||
_mm_store_si128(&v[t], _mm_sub_epi8(z, ut)); /* v[r][t..t+15] <- z - u[r-1][t..t+15] */ \
|
_mm_store_si128(&v[t], _mm_sub_epi8(z, ut)); /* v[r][t..t+15] <- z - u[r-1][t..t+15] */ \
|
||||||
z = _mm_sub_epi8(z, q_); \
|
z = _mm_sub_epi8(z, q_); \
|
||||||
a = _mm_sub_epi8(a, z); \
|
a = _mm_sub_epi8(a, z); \
|
||||||
b = _mm_sub_epi8(b, z);
|
b = _mm_sub_epi8(b, z);
|
||||||
|
|
||||||
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc;
|
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, min_sc;
|
||||||
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
|
||||||
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
int32_t *H = 0, H0 = 0, last_H0_t = 0;
|
||||||
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
||||||
__m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_;
|
__m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_, max_sc_;
|
||||||
__m128i *u, *v, *x, *y, *s, *p = 0;
|
__m128i *u, *v, *x, *y, *s, *p = 0;
|
||||||
|
|
||||||
|
ksw_reset_extz(ez);
|
||||||
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
|
||||||
|
|
||||||
zero_ = _mm_set1_epi8(0);
|
zero_ = _mm_set1_epi8(0);
|
||||||
|
|
@ -51,15 +53,17 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
||||||
sc_mch_ = _mm_set1_epi8(mat[0]);
|
sc_mch_ = _mm_set1_epi8(mat[0]);
|
||||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||||
|
max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2);
|
||||||
ksw_reset_extz(ez);
|
|
||||||
|
|
||||||
wl = wr = w;
|
wl = wr = w;
|
||||||
tlen_ = (tlen + 15) / 16;
|
tlen_ = (tlen + 15) / 16;
|
||||||
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
|
||||||
qlen_ = (qlen + 15) / 16;
|
qlen_ = (qlen + 15) / 16;
|
||||||
for (t = 1, max_sc = mat[0]; t < m * m; ++t)
|
for (t = 1, max_sc = mat[0], min_sc = mat[1]; t < m * m; ++t) {
|
||||||
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
max_sc = max_sc > mat[t]? max_sc : mat[t];
|
||||||
|
min_sc = min_sc < mat[t]? min_sc : mat[t];
|
||||||
|
}
|
||||||
|
if (-min_sc > 2 * (q + e)) return; // otherwise, we won't see any mismatches
|
||||||
|
|
||||||
mem = (uint8_t*)kcalloc(km, tlen_ * 6 + qlen_ + 1, 16);
|
mem = (uint8_t*)kcalloc(km, tlen_ * 6 + qlen_ + 1, 16);
|
||||||
u = (__m128i*)(((size_t)mem + 15) >> 4 << 4); // 16-byte aligned
|
u = (__m128i*)(((size_t)mem + 15) >> 4 << 4); // 16-byte aligned
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue