r159: use two-piece gap penalty

This commit is contained in:
Heng Li 2017-07-08 10:26:00 -04:00
parent 9823317e8f
commit cc554aee43
9 changed files with 499 additions and 113 deletions

View File

@ -1,17 +1,13 @@
CC= gcc CC= gcc
CFLAGS= -g -Wall -O2 -Wc++-compat CFLAGS= -g -Wall -O2 -Wc++-compat -march=native
CPPFLAGS= -DHAVE_KALLOC CPPFLAGS= -DHAVE_KALLOC
INCLUDES= -I. INCLUDES= -I.
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o misc.o bseq.o sketch.o sdust.o \ OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o misc.o bseq.o \
index.o chain.o align.o hit.o map.o format.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o
PROG= minimap2 PROG= minimap2
PROG_EXTRA= sdust PROG_EXTRA= sdust
LIBS= -lm -lz -lpthread LIBS= -lm -lz -lpthread
ifneq ($(sse4),)
CFLAGS += -msse4
endif
.SUFFIXES:.c .o .SUFFIXES:.c .o
.c.o: .c.o:
@ -34,18 +30,19 @@ clean:
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session*
depend: depend:
(LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(DFLAGS) -- *.c) (LC_ALL=C; export LC_ALL; makedepend -Y -- $(CFLAGS) $(CPPFLAGS) -- *.c)
# DO NOT DELETE # DO NOT DELETE
align.o: minimap.h mmpriv.h bseq.h ksw2.h align.o: minimap.h mmpriv.h bseq.h ksw2.h kalloc.h
bseq.o: bseq.h kseq.h bseq.o: bseq.h kseq.h
chain.o: minimap.h mmpriv.h bseq.h kalloc.h chain.o: minimap.h mmpriv.h bseq.h kalloc.h
format.o: mmpriv.h minimap.h bseq.h format.o: mmpriv.h minimap.h bseq.h
hit.o: mmpriv.h minimap.h bseq.h kalloc.h hit.o: mmpriv.h minimap.h bseq.h kalloc.h
index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h
kalloc.o: kalloc.h kalloc.o: kalloc.h
ksw2_extz2_sse.o: ksw2.h ksw2_extd2_sse.o: ksw2.h kalloc.h
ksw2_extz2_sse.o: ksw2.h kalloc.h
main.o: bseq.h minimap.h mmpriv.h main.o: bseq.h minimap.h mmpriv.h
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h
misc.o: minimap.h ksort.h misc.o: minimap.h ksort.h

21
align.c
View File

@ -124,6 +124,14 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
} }
} }
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int flag, ksw_extz_t *ez)
{
if (opt->q == opt->q2 && opt->e == opt->e2)
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, opt->zdrop, flag, ez);
else
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, opt->zdrop, flag, ez);
}
static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x) static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)
{ {
int64_t i, off0 = mi->seq[rid].offset, off = off0 + x; int64_t i, off0 = mi->seq[rid].offset, off = off0 + x;
@ -199,7 +207,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
mm_idx_getseq(mi, rid, rs0, rs, tseq); mm_idx_getseq(mi, rid, rs0, rs, tseq);
mm_seq_rev(qs - qs0, qseq); mm_seq_rev(qs - qs0, qseq);
mm_seq_rev(rs - rs0, tseq); mm_seq_rev(rs - rs0, tseq);
ksw_extz2_sse(km, qs - qs0, qseq, rs - rs0, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez); mm_align_pair(km, opt, qs - qs0, qseq, rs - rs0, tseq, mat, bw, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez);
if (ez->n_cigar > 0) { if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar); mm_append_cigar(r, ez->n_cigar, ez->cigar);
r->p->dp_score += ez->max; r->p->dp_score += ez->max;
@ -220,14 +228,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
bw1 = qe - qs > re - rs? qe - qs : re - rs; bw1 = qe - qs > re - rs? qe - qs : re - rs;
qseq = &qseq0[rev][qs]; qseq = &qseq0[rev][qs];
mm_idx_getseq(mi, rid, rs, re, tseq); mm_idx_getseq(mi, rid, rs, re, tseq);
#if 0 mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, KSW_EZ_APPROX_MAX, ez);
int k, ql = qe - qs, tl = re - rs;
for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr);
for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr);
#endif
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, KSW_EZ_APPROX_MAX, ez);
if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop)) if (mm_check_zdrop(qseq, tseq, ez->n_cigar, ez->cigar, mat, opt->q, opt->e, opt->zdrop))
ksw_extz2_sse(km, qe - qs, qseq, re - rs, tseq, 5, mat, opt->q, opt->e, bw1, opt->zdrop, 0, ez); mm_align_pair(km, opt, qe - qs, qseq, re - rs, tseq, mat, bw1, 0, ez);
if (ez->n_cigar > 0) if (ez->n_cigar > 0)
mm_append_cigar(r, ez->n_cigar, ez->cigar); mm_append_cigar(r, ez->n_cigar, ez->cigar);
if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle. if (ez->zdropped) { // truncated by Z-drop; TODO: sometimes Z-drop kicks in because the next seed placement is wrong. This can be fixed in principle.
@ -250,7 +253,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
if (!dropped && qe < qe0 && re < re0) { // right extension if (!dropped && qe < qe0 && re < re0) { // right extension
qseq = &qseq0[rev][qe]; qseq = &qseq0[rev][qe];
mm_idx_getseq(mi, rid, re, re0, tseq); mm_idx_getseq(mi, rid, re, re0, tseq);
ksw_extz2_sse(km, qe0 - qe, qseq, re0 - re, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, KSW_EZ_EXTZ_ONLY, ez); mm_align_pair(km, opt, qe0 - qe, qseq, re0 - re, tseq, mat, bw, KSW_EZ_EXTZ_ONLY, ez);
if (ez->n_cigar > 0) { if (ez->n_cigar > 0) {
mm_append_cigar(r, ez->n_cigar, ez->cigar); mm_append_cigar(r, ez->n_cigar, ez->cigar);
r->p->dp_score += ez->max; r->p->dp_score += ez->max;

46
ksw2.h
View File

@ -10,7 +10,6 @@
#define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard #define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard
#define KSW_EZ_APPROX_MAX 0x08 // approximate max; this is faster with sse #define KSW_EZ_APPROX_MAX 0x08 // approximate max; this is faster with sse
#define KSW_EZ_APPROX_DROP 0x10 // approximate Z-drop; faster with sse #define KSW_EZ_APPROX_DROP 0x10 // approximate Z-drop; faster with sse
#define KSW_EZ_DYN_BAND 0x20 // once used, ksw_extz_t::{mqe,mte} may be wrong
#define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension #define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension
#define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output #define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output
@ -48,6 +47,12 @@ extern "C" {
void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); void ksw_extz(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
/** /**
* Global alignment * Global alignment
* *
@ -92,18 +97,23 @@ static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uin
return cigar; return cigar;
} }
// In the backtrack matrix, value p[] has the following structure:
// bit 0-2: which type gets the max - 0 for H, 1 for E, 2 for F, 3 for \tilde{E} and 4 for \tilde{F}
// bit 3/0x08: 1 if a continuation on the E state (bit 5/0x20 for a continuation on \tilde{E})
// bit 4/0x10: 1 if a continuation on the F state (bit 6/0x40 for a continuation on \tilde{F})
static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *p, const int *off, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_) static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *p, const int *off, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_)
{ { // p[] - lower 3 bits: which type gets the max; bit
int n_cigar = 0, m_cigar = *m_cigar_, which = 0, i = i0, j = j0, r; int n_cigar = 0, m_cigar = *m_cigar_, i = i0, j = j0, r, state = 0;
uint32_t *cigar = *cigar_, tmp; uint32_t *cigar = *cigar_, tmp;
while (i >= 0 && j >= 0) { while (i >= 0 && j >= 0) { // at the beginning of the loop, _state_ tells us which state to check
if (is_rot) r = i + j, tmp = p[r * n_col + i - off[r]]; if (is_rot) r = i + j, tmp = p[r * n_col + i - off[r]];
else tmp = p[i * n_col + j - off[i]]; else tmp = p[i * n_col + j - off[i]];
which = tmp >> (which << 1) & 3; if (state == 0) state = tmp & 7; // if requesting the H state, find state one maximizes it.
if (which == 0) which = tmp & 3; else if (!(tmp >> (state + 2) & 1)) state = 0; // if requesting other states, _state_ stays the same if it is a continuation; otherwise, set to H
if (which == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match if (state == 0) state = tmp & 7; // TODO: probably this line can be merged into the "else if" line right above; not 100% sure
else if (which == 1) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion if (state == 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 0, 1), --i, --j; // match
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion else if (state == 1 || state == 3) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, 1), --i; // deletion
else cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, 1), --j; // insertion
} }
if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, i + 1); // first deletion if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, i + 1); // first deletion
if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion
@ -120,4 +130,22 @@ static inline void ksw_reset_extz(ksw_extz_t *ez)
ez->n_cigar = 0, ez->zdropped = 0; ez->n_cigar = 0, ez->zdropped = 0;
} }
static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, int b, int zdrop, int8_t e)
{
int r, t;
if (is_rot) r = a, t = b;
else r = a + b, t = a;
if (H > (int32_t)ez->max) {
ez->max = H, ez->max_t = t, ez->max_q = r - t;
} else if (t >= ez->max_t && r - t >= ez->max_q) {
int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l;
l = tl > ql? tl - ql : ql - tl;
if (zdrop >= 0 && ez->max - H > zdrop + l * e) {
ez->zdropped = 1;
return 1;
}
}
return 0;
}
#endif #endif

379
ksw2_extd2_sse.c 100644
View File

@ -0,0 +1,379 @@
#include <string.h>
#include <stdio.h>
#include "ksw2.h"
#ifdef __SSE2__
#include <emmintrin.h>
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
{
#define __dp_code_block1 \
z = _mm_load_si128(&s[t]); \
xt1 = _mm_load_si128(&x[t]); /* xt1 <- x[r-1][t..t+15] */ \
tmp = _mm_srli_si128(xt1, 15); /* tmp <- x[r-1][t+15] */ \
xt1 = _mm_or_si128(_mm_slli_si128(xt1, 1), x1_); /* xt1 <- x[r-1][t-1..t+14] */ \
x1_ = tmp; \
vt1 = _mm_load_si128(&v[t]); /* vt1 <- v[r-1][t..t+15] */ \
tmp = _mm_srli_si128(vt1, 15); /* tmp <- v[r-1][t+15] */ \
vt1 = _mm_or_si128(_mm_slli_si128(vt1, 1), v1_); /* vt1 <- v[r-1][t-1..t+14] */ \
v1_ = tmp; \
a = _mm_add_epi8(xt1, vt1); /* a <- x[r-1][t-1..t+14] + v[r-1][t-1..t+14] */ \
ut = _mm_load_si128(&u[t]); /* ut <- u[t..t+15] */ \
b = _mm_add_epi8(_mm_load_si128(&y[t]), ut); /* b <- y[r-1][t..t+15] + u[r-1][t..t+15] */ \
x2t1= _mm_load_si128(&x2[t]); \
tmp = _mm_srli_si128(x2t1, 15); \
x2t1= _mm_or_si128(_mm_slli_si128(x2t1, 1), x21_); \
x21_= tmp; \
a2= _mm_add_epi8(x2t1, vt1); \
b2= _mm_add_epi8(_mm_load_si128(&y2[t]), ut);
#define __dp_code_block2 \
_mm_store_si128(&u[t], _mm_sub_epi8(z, vt1)); /* u[r][t..t+15] <- z - v[r-1][t-1..t+14] */ \
_mm_store_si128(&v[t], _mm_sub_epi8(z, ut)); /* v[r][t..t+15] <- z - u[r-1][t..t+15] */ \
tmp = _mm_sub_epi8(z, q_); \
a = _mm_sub_epi8(a, tmp); \
b = _mm_sub_epi8(b, tmp); \
tmp = _mm_sub_epi8(z, q2_); \
a2= _mm_sub_epi8(a2, tmp); \
b2= _mm_sub_epi8(b2, tmp);
int r, t, qe = q + e, n_col_, *off = 0, tlen_, qlen_, last_st, last_en, wl, wr, max_sc, long_thres, long_diff;
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
int32_t *H = 0, H0 = 0, last_H0_t = 0;
uint8_t *qr, *sf, *mem, *mem2 = 0;
__m128i q_, q2_, qe_, qe2_, zero_, sc_mch_, sc_mis_, m1_;
__m128i *u, *v, *x, *y, *x2, *y2, *s, *p = 0;
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
zero_ = _mm_set1_epi8(0);
q_ = _mm_set1_epi8(q);
q2_ = _mm_set1_epi8(q2);
qe_ = _mm_set1_epi8(q + e);
qe2_ = _mm_set1_epi8(q2 + e2);
sc_mch_ = _mm_set1_epi8(mat[0]);
sc_mis_ = _mm_set1_epi8(mat[1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard
ksw_reset_extz(ez);
wl = wr = w;
tlen_ = (tlen + 15) / 16;
n_col_ = ((w + 1 < tlen? (w + 1 < qlen? w + 1 : qlen): tlen) + 15) / 16 + 1;
qlen_ = (qlen + 15) / 16;
for (t = 1, max_sc = mat[0]; t < m * m; ++t)
max_sc = max_sc > mat[t]? max_sc : mat[t];
long_thres = (q2 - q) / (e - e2) - 1;
if (q2 + e2 + long_thres * e2 > q + e + long_thres * e)
++long_thres;
long_diff = long_thres * (e - e2) - (q2 - q) - e2;
mem = (uint8_t*)kcalloc(km, tlen_ * 8 + qlen_ + 1, 16);
u = (__m128i*)(((size_t)mem + 15) >> 4 << 4); // 16-byte aligned
v = u + tlen_, x = v + tlen_, y = x + tlen_, x2 = y + tlen_, y2 = x2 + tlen_;
s = y2 + tlen_, sf = (uint8_t*)(s + tlen_), qr = sf + tlen_ * 16;
memset(u, -q - e, tlen_ * 16);
memset(v, -q - e, tlen_ * 16);
memset(x, -q - e, tlen_ * 16);
memset(y, -q - e, tlen_ * 16);
memset(x2, -q2 - e2, tlen_ * 16);
memset(y2, -q2 - e2, tlen_ * 16);
if (!approx_max) {
H = (int32_t*)kmalloc(km, tlen_ * 16 * 4);
for (t = 0; t < tlen_ * 16; ++t) H[t] = KSW_NEG_INF;
}
if (with_cigar) {
mem2 = (uint8_t*)kmalloc(km, ((qlen + tlen - 1) * n_col_ + 1) * 16);
p = (__m128i*)(((size_t)mem2 + 15) >> 4 << 4);
off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int));
}
for (t = 0; t < qlen; ++t) qr[t] = query[qlen - 1 - t];
memcpy(sf, target, tlen);
for (r = 0, last_st = last_en = -1; r < qlen + tlen - 1; ++r) {
int st = 0, en = tlen - 1, st0, en0, st_, en_;
int8_t x1, x21, v1;
uint8_t *qrr = qr + (qlen - 1 - r), p_en0 = 0;
int8_t *u8 = (int8_t*)u, *v8 = (int8_t*)v, *x8 = (int8_t*)x, *x28 = (int8_t*)x2;
__m128i x1_, x21_, v1_;
// find the boundaries
if (st < r - qlen + 1) st = r - qlen + 1;
if (en > r) en = r;
if (st < (r-wr+1)>>1) st = (r-wr+1)>>1; // take the ceil
if (en > (r+wl)>>1) en = (r+wl)>>1; // take the floor
if (st > en) {
ez->zdropped = 1;
break;
}
st0 = st, en0 = en;
st = st / 16 * 16, en = (en + 16) / 16 * 16 - 1;
// set boundary conditions
if (st > 0) {
if (st - 1 >= last_st && st - 1 <= last_en) {
x1 = x8[st - 1], x21 = x28[st - 1], v1 = v8[st - 1]; // (r-1,s-1) calculated in the last round
} else {
x1 = -q - e, x21 = -q2 - e2;
v1 = -q - e;
}
} else {
x1 = -q - e, x21 = -q2 - e2;
v1 = r == 0? -q - e : r < long_thres? -e : r == long_thres? long_diff : -e2;
}
if (en >= r) {
((int8_t*)y)[r] = -q - e, ((int8_t*)y2)[r] = -q2 - e2;
u8[r] = r == 0? -q - e : r < long_thres? -e : r == long_thres? long_diff : -e2;
}
// loop fission: set scores first
if (!(flag & KSW_EZ_GENERIC_SC)) {
for (t = st0; t <= en0; t += 16) {
__m128i sq, st, tmp, mask;
sq = _mm_loadu_si128((__m128i*)&sf[t]);
st = _mm_loadu_si128((__m128i*)&qrr[t]);
mask = _mm_or_si128(_mm_cmpeq_epi8(sq, m1_), _mm_cmpeq_epi8(st, m1_));
tmp = _mm_cmpeq_epi8(sq, st);
#ifdef __SSE4_1__
tmp = _mm_blendv_epi8(sc_mis_, sc_mch_, tmp);
#else
tmp = _mm_or_si128(_mm_andnot_si128(tmp, sc_mis_), _mm_and_si128(tmp, sc_mch_));
#endif
tmp = _mm_andnot_si128(mask, tmp);
_mm_storeu_si128((__m128i*)((int8_t*)s + t), tmp);
}
} else {
for (t = st0; t <= en0; ++t)
((uint8_t*)s)[t] = mat[sf[t] * m + qrr[t]];
}
// core loop
x1_ = _mm_cvtsi32_si128((uint8_t)x1);
x21_ = _mm_cvtsi32_si128((uint8_t)x21);
v1_ = _mm_cvtsi32_si128((uint8_t)v1);
st_ = st / 16, en_ = en / 16;
if (!with_cigar) { // score only
for (t = st_; t <= en_; ++t) {
__m128i z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
__dp_code_block1;
#ifdef __SSE4_1__
z = _mm_max_epi8(z, a);
z = _mm_max_epi8(z, b);
z = _mm_max_epi8(z, a2);
z = _mm_max_epi8(z, b2);
__dp_code_block2; // save u[] and v[]; update a, b, a2 and b2
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_));
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_max_epi8(b, zero_), qe_));
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_max_epi8(a2, zero_), qe2_));
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_max_epi8(b2, zero_), qe2_));
#else
tmp = _mm_cmpgt_epi8(a, z);
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
tmp = _mm_cmpgt_epi8(b, z);
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
tmp = _mm_cmpgt_epi8(a2, z);
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
tmp = _mm_cmpgt_epi8(b2, z);
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
__dp_code_block2;
tmp = _mm_cmpgt_epi8(a, zero_);
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_and_si128(tmp, a), qe_));
tmp = _mm_cmpgt_epi8(b, zero_);
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_and_si128(tmp, b), qe_));
tmp = _mm_cmpgt_epi8(a2, zero_);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
tmp = _mm_cmpgt_epi8(b2, zero_);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
#endif
}
} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
__m128i *pr = p + r * n_col_ - st_;
off[r] = st;
if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band
int8_t a, a2, z = ((uint8_t*)s)[en0];
a = x8[en0-1] + v8[en0-1];
p_en0 = a > z? 1 : 0;
z = a > z? a : z;
p_en0 |= a - (z - q) > 0? 1<<4 : 0;
a2 = x28[en0-1] + v8[en0-1];
p_en0 = a2 > z? 3 : p_en0;
z = a2 > z? a2 : z;
p_en0 |= a2 - (z - q2) > 0? 1<<6 : 0;
}
for (t = st_; t <= en_; ++t) {
__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
__dp_code_block1;
#ifdef __SSE4_1__
d = _mm_and_si128(_mm_cmpgt_epi8(a, z), _mm_set1_epi8(1)); // d = a > z? 1 : 0
z = _mm_max_epi8(z, a);
d = _mm_blendv_epi8(d, _mm_set1_epi8(2), _mm_cmpgt_epi8(b, z)); // d = b > z? 2 : d
z = _mm_max_epi8(z, b);
d = _mm_blendv_epi8(d, _mm_set1_epi8(3), _mm_cmpgt_epi8(a2, z)); // d = a2 > z? 3 : d
z = _mm_max_epi8(z, a2);
d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
z = _mm_max_epi8(z, b2);
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
tmp = _mm_cmpgt_epi8(a, z);
d = _mm_and_si128(tmp, _mm_set1_epi8(1));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a));
tmp = _mm_cmpgt_epi8(b, z);
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(2)));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b));
tmp = _mm_cmpgt_epi8(a2, z);
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
tmp = _mm_cmpgt_epi8(b2, z);
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
#endif
__dp_code_block2;
tmp = _mm_cmpgt_epi8(a, zero_);
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_and_si128(tmp, a), qe_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x08))); // d = a > 0? 1<<3 : 0
tmp = _mm_cmpgt_epi8(b, zero_);
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_and_si128(tmp, b), qe_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x10))); // d = b > 0? 1<<4 : 0
tmp = _mm_cmpgt_epi8(a2, zero_);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
tmp = _mm_cmpgt_epi8(b2, zero_);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0
_mm_store_si128(&pr[t], d);
}
} else { // gap right-alignment
__m128i *pr = p + r * n_col_ - st_;
off[r] = st;
if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band
int8_t a, a2, z = ((uint8_t*)s)[en0];
a = x8[en0-1] + v8[en0-1];
p_en0 = a >= z? 1 : 0;
z = a >= z? a : z;
p_en0 |= a - (z - q) >= 0? 1<<4 : 0;
a2 = x28[en0-1] + v8[en0-1];
p_en0 = a2 >= z? 3 : p_en0;
z = a2 >= z? a2 : z;
p_en0 |= a2 - (z - q2) >= 0? 1<<6 : 0;
}
for (t = st_; t <= en_; ++t) {
__m128i d, z, a, b, a2, b2, xt1, x2t1, vt1, ut, tmp;
__dp_code_block1;
#ifdef __SSE4_1__
d = _mm_andnot_si128(_mm_cmpgt_epi8(z, a), _mm_set1_epi8(1)); // d = z > a? 0 : 1
z = _mm_max_epi8(z, a);
d = _mm_blendv_epi8(_mm_set1_epi8(2), d, _mm_cmpgt_epi8(z, b)); // d = z > b? d : 2
z = _mm_max_epi8(z, b);
d = _mm_blendv_epi8(_mm_set1_epi8(3), d, _mm_cmpgt_epi8(z, a2)); // d = z > a2? d : 3
z = _mm_max_epi8(z, a2);
d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
z = _mm_max_epi8(z, b2);
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
tmp = _mm_cmpgt_epi8(z, a);
d = _mm_andnot_si128(tmp, _mm_set1_epi8(1));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a));
tmp = _mm_cmpgt_epi8(z, b);
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(2)));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b));
tmp = _mm_cmpgt_epi8(z, a2);
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2));
tmp = _mm_cmpgt_epi8(z, b2);
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
#endif
__dp_code_block2;
tmp = _mm_cmpgt_epi8(zero_, a);
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_andnot_si128(tmp, a), qe_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x08))); // d = a > 0? 1<<3 : 0
tmp = _mm_cmpgt_epi8(zero_, b);
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_andnot_si128(tmp, b), qe_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x10))); // d = b > 0? 1<<4 : 0
tmp = _mm_cmpgt_epi8(zero_, a2);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, a2), qe2_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
tmp = _mm_cmpgt_epi8(zero_, b2);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, b2), qe2_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0
_mm_store_si128(&pr[t], d);
}
}
if (with_cigar && en0 < r && en0 < tlen - 1) ((uint8_t*)(p + r * n_col_))[en0 - st] = p_en0;
if (!approx_max) { // find the exact max with a 32-bit score array
int32_t max_H, max_t;
// compute H[], max_H and max_t
if (r > 0) {
int32_t HH[4], tt[4], en1 = st0 + (en0 - st0) / 4 * 4, i;
__m128i max_H_, max_t_;
max_H = H[en0] = en0 > 0? H[en0-1] + u8[en0] : H[en0] + v8[en0]; // special casing the last element
max_t = en0;
max_H_ = _mm_set1_epi32(max_H);
max_t_ = _mm_set1_epi32(max_t);
for (t = st0; t < en1; t += 4) { // this implements: H[t]+=v8[t]-qe; if(H[t]>max_H) max_H=H[t],max_t=t;
__m128i H1, tmp, t_;
H1 = _mm_loadu_si128((__m128i*)&H[t]);
t_ = _mm_setr_epi32(v8[t], v8[t+1], v8[t+2], v8[t+3]);
H1 = _mm_add_epi32(H1, t_);
_mm_storeu_si128((__m128i*)&H[t], H1);
t_ = _mm_set1_epi32(t);
tmp = _mm_cmpgt_epi32(H1, max_H_);
#ifdef __SSE4_1__
max_H_ = _mm_blendv_epi8(max_H_, H1, tmp);
max_t_ = _mm_blendv_epi8(max_t_, t_, tmp);
#else
max_H_ = _mm_or_si128(_mm_and_si128(tmp, H1), _mm_andnot_si128(tmp, max_H_));
max_t_ = _mm_or_si128(_mm_and_si128(tmp, t_), _mm_andnot_si128(tmp, max_t_));
#endif
}
_mm_storeu_si128((__m128i*)HH, max_H_);
_mm_storeu_si128((__m128i*)tt, max_t_);
for (i = 0; i < 4; ++i)
if (max_H < HH[i]) max_H = HH[i], max_t = tt[i] + i;
for (; t < en0; ++t) { // for the rest of values that haven't been computed with SSE
H[t] += (int32_t)v8[t];
if (H[t] > max_H)
max_H = H[t], max_t = t;
}
} else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
// update ez
if (en0 == tlen - 1 && H[en0] > ez->mte)
ez->mte = H[en0], ez->mte_q = r - en;
if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
ez->mqe = H[st0], ez->mqe_t = st0;
if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e2)) break;
if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H[tlen - 1];
} else { // find approximate max; Z-drop might be inaccurate, too.
if (r > 0) {
if (last_H0_t >= st0 && last_H0_t <= en0 && last_H0_t + 1 >= st0 && last_H0_t + 1 <= en0) {
int32_t d0 = v8[last_H0_t];
int32_t d1 = u8[last_H0_t + 1];
if (d0 > d1) H0 += d0;
else H0 += d1, ++last_H0_t;
} else if (last_H0_t >= st0 && last_H0_t <= en0) {
H0 += v8[last_H0_t];
} else {
++last_H0_t, H0 += u8[last_H0_t];
}
} else H0 = v8[0] - qe, last_H0_t = 0;
if ((flag & KSW_EZ_APPROX_DROP) && ksw_apply_zdrop(ez, 1, H0, r, last_H0_t, zdrop, e2)) break;
if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H0;
}
last_st = st, last_en = en;
//for (t = st0; t <= en0; ++t) printf("(%d,%d)\t(%d,%d,%d,%d)\t%d\n", r, t, ((int8_t*)u)[t], ((int8_t*)v)[t], ((int8_t*)x)[t], ((int8_t*)y)[t], H[t]); // for debugging
}
kfree(km, mem);
if (!approx_max) kfree(km, H);
if (with_cigar) { // backtrack
int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR);
if (!ez->zdropped && !(flag&KSW_EZ_EXTZ_ONLY))
ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
else if (ez->max_t >= 0 && ez->max_q >= 0)
ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar);
kfree(km, mem2); kfree(km, off);
}
}
#endif // __SSE2__

View File

@ -8,21 +8,6 @@
#include <smmintrin.h> #include <smmintrin.h>
#endif #endif
static inline int apply_zdrop(ksw_extz_t *ez, int32_t H, int r, int t, int zdrop, int8_t e)
{
if (H > (int32_t)ez->max) {
ez->max = H, ez->max_t = t, ez->max_q = r - t;
} else if (t >= ez->max_t && r - t >= ez->max_q) {
int tl = t - ez->max_t, ql = (r - t) - ez->max_q, l;
l = tl > ql? tl - ql : ql - tl;
if (ez->max - H > zdrop + l * e) {
ez->zdropped = 1;
return 1;
}
}
return 0;
}
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
{ {
#define __dp_code_block1 \ #define __dp_code_block1 \
@ -51,18 +36,18 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX); int with_cigar = !(flag&KSW_EZ_SCORE_ONLY), approx_max = !!(flag&KSW_EZ_APPROX_MAX);
int32_t *H = 0, H0 = 0, last_H0_t = 0; int32_t *H = 0, H0 = 0, last_H0_t = 0;
uint8_t *qr, *sf, *mem, *mem2 = 0; uint8_t *qr, *sf, *mem, *mem2 = 0;
__m128i q_, qe2_, zero_, flag1_, flag2_, flag4_, flag32_, sc_mch_, sc_mis_, m1_; __m128i q_, qe2_, zero_, flag1_, flag2_, flag8_, flag16_, sc_mch_, sc_mis_, m1_;
__m128i *u, *v, *x, *y, *s, *p = 0; __m128i *u, *v, *x, *y, *s, *p = 0;
if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0 || zdrop < 0) return; if (m <= 0 || qlen <= 0 || tlen <= 0 || w < 0) return;
zero_ = _mm_set1_epi8(0); zero_ = _mm_set1_epi8(0);
q_ = _mm_set1_epi8(q); q_ = _mm_set1_epi8(q);
qe2_ = _mm_set1_epi8((q + e) * 2); qe2_ = _mm_set1_epi8((q + e) * 2);
flag1_ = _mm_set1_epi8(1<<0); flag1_ = _mm_set1_epi8(1);
flag2_ = _mm_set1_epi8(2<<0); flag2_ = _mm_set1_epi8(2);
flag4_ = _mm_set1_epi8(1<<2); flag8_ = _mm_set1_epi8(0x08);
flag32_ = _mm_set1_epi8(2<<4); flag16_ = _mm_set1_epi8(0x10);
sc_mch_ = _mm_set1_epi8(mat[0]); sc_mch_ = _mm_set1_epi8(mat[0]);
sc_mis_ = _mm_set1_epi8(mat[1]); sc_mis_ = _mm_set1_epi8(mat[1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard m1_ = _mm_set1_epi8(m - 1); // wildcard
@ -163,12 +148,12 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment } else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
__m128i *pr = p + r * n_col_ - st_; __m128i *pr = p + r * n_col_ - st_;
off[r] = st; off[r] = st;
if (en0 < r) { // to avoid backtracking out of the band; this assumes a fixed band if (en0 < r && en0 < tlen - 1) { // to avoid backtracking out of the band; this assumes a fixed band
int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe; int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe;
a = x8[en0-1] + v8[en0-1]; a = x8[en0-1] + v8[en0-1];
p_en0 = a > z? 1 : 0; p_en0 = a > z? 1 : 0;
z = a > z? a : z; z = a > z? a : z;
p_en0 |= a - (z - q) > 0? 1<<2 : 0; p_en0 |= a - (z - q) > 0? 0x08 : 0;
} }
for (t = st_; t <= en_; ++t) { for (t = st_; t <= en_; ++t) {
__m128i d, z, a, b, xt1, vt1, ut, tmp; __m128i d, z, a, b, xt1, vt1, ut, tmp;
@ -187,21 +172,21 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
__dp_code_block2; __dp_code_block2;
tmp = _mm_cmpgt_epi8(a, zero_); tmp = _mm_cmpgt_epi8(a, zero_);
_mm_store_si128(&x[t], _mm_and_si128(tmp, a)); _mm_store_si128(&x[t], _mm_and_si128(tmp, a));
d = _mm_or_si128(d, _mm_and_si128(tmp, flag4_)); // d = a > 0? 1<<2 : 0 d = _mm_or_si128(d, _mm_and_si128(tmp, flag8_)); // d = a > 0? 0x08 : 0
tmp = _mm_cmpgt_epi8(b, zero_); tmp = _mm_cmpgt_epi8(b, zero_);
_mm_store_si128(&y[t], _mm_and_si128(tmp, b)); _mm_store_si128(&y[t], _mm_and_si128(tmp, b));
d = _mm_or_si128(d, _mm_and_si128(tmp, flag32_)); // d = b > 0? 2<<4 : 0 d = _mm_or_si128(d, _mm_and_si128(tmp, flag16_)); // d = b > 0? 0x10 : 0
_mm_store_si128(&pr[t], d); _mm_store_si128(&pr[t], d);
} }
} else { // gap right-alignment } else { // gap right-alignment
__m128i *pr = p + r * n_col_ - st_; __m128i *pr = p + r * n_col_ - st_;
off[r] = st; off[r] = st;
if (en0 < r) { if (en0 < r && en0 < tlen - 1) {
int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe; int8_t a, z = ((uint8_t*)s)[en0] + 2 * qe;
a = x8[en0-1] + v8[en0-1]; a = x8[en0-1] + v8[en0-1];
p_en0 = a >= z? 1 : 0; p_en0 = a >= z? 1 : 0;
z = a >= z? a : z; z = a >= z? a : z;
p_en0 |= a - (z - q) >= 0? 1<<2 : 0; p_en0 |= a - (z - q) >= 0? 0x08 : 0;
} }
for (t = st_; t <= en_; ++t) { for (t = st_; t <= en_; ++t) {
__m128i d, z, a, b, xt1, vt1, ut, tmp; __m128i d, z, a, b, xt1, vt1, ut, tmp;
@ -220,14 +205,14 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
__dp_code_block2; __dp_code_block2;
tmp = _mm_cmpgt_epi8(zero_, a); tmp = _mm_cmpgt_epi8(zero_, a);
_mm_store_si128(&x[t], _mm_andnot_si128(tmp, a)); _mm_store_si128(&x[t], _mm_andnot_si128(tmp, a));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, flag4_)); // d = 0 > a? 0 : 1<<2 d = _mm_or_si128(d, _mm_andnot_si128(tmp, flag8_)); // d = 0 > a? 0 : 0x08
tmp = _mm_cmpgt_epi8(zero_, b); tmp = _mm_cmpgt_epi8(zero_, b);
_mm_store_si128(&y[t], _mm_andnot_si128(tmp, b)); _mm_store_si128(&y[t], _mm_andnot_si128(tmp, b));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, flag32_)); // d = 0 > b? 0 : 2<<4 d = _mm_or_si128(d, _mm_andnot_si128(tmp, flag16_)); // d = 0 > b? 0 : 0x10
_mm_store_si128(&pr[t], d); _mm_store_si128(&pr[t], d);
} }
} }
if (with_cigar && en0 < r) ((uint8_t*)(p + r * n_col_))[en0 - st] = p_en0; if (with_cigar && en0 < r && en0 < tlen - 1) ((uint8_t*)(p + r * n_col_))[en0 - st] = p_en0;
if (!approx_max) { // find the exact max with a 32-bit score array if (!approx_max) { // find the exact max with a 32-bit score array
int32_t max_H, max_t; int32_t max_H, max_t;
// compute H[], max_H and max_t // compute H[], max_H and max_t
@ -271,18 +256,9 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
ez->mte = H[en0], ez->mte_q = r - en; ez->mte = H[en0], ez->mte_q = r - en;
if (r - st0 == qlen - 1 && H[st0] > ez->mqe) if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
ez->mqe = H[st0], ez->mqe_t = st0; ez->mqe = H[st0], ez->mqe_t = st0;
if (apply_zdrop(ez, max_H, r, max_t, zdrop, e)) break; if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e)) break;
if (r == qlen + tlen - 2 && en0 == tlen - 1) if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H[tlen - 1]; ez->score = H[tlen - 1];
if (flag & KSW_EZ_DYN_BAND & 0) { // FIXME: don't use - buggy!
int lq, lt, l;
lt = tlen - st0, lq = qlen - (r - st0);
l = lt < lq? lt : lq;
if (H[st0] + l * max_sc < ez->max - zdrop && wr > 1) --wr;
lt = tlen - en0, lq = qlen - (r - en0);
l = lt < lq? lt : lq;
if (H[en0] + l * max_sc < ez->max - zdrop && wl > 1) --wl;
}
} else { // find approximate max; Z-drop might be inaccurate, too. } else { // find approximate max; Z-drop might be inaccurate, too.
if (r > 0) { if (r > 0) {
if (last_H0_t >= st0 && last_H0_t <= en0 && last_H0_t + 1 >= st0 && last_H0_t + 1 <= en0) { if (last_H0_t >= st0 && last_H0_t <= en0 && last_H0_t + 1 >= st0 && last_H0_t + 1 <= en0) {
@ -295,7 +271,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
} else { } else {
++last_H0_t, H0 += u8[last_H0_t] - qe; ++last_H0_t, H0 += u8[last_H0_t] - qe;
} }
if ((flag & KSW_EZ_APPROX_DROP) && apply_zdrop(ez, H0, r, last_H0_t, zdrop, e)) break; if ((flag & KSW_EZ_APPROX_DROP) && ksw_apply_zdrop(ez, 1, H0, r, last_H0_t, zdrop, e)) break;
} else H0 = v8[0] - qe - qe, last_H0_t = 0; } else H0 = v8[0] - qe - qe, last_H0_t = 0;
if (r == qlen + tlen - 2 && en0 == tlen - 1) if (r == qlen + tlen - 2 && en0 == tlen - 1)
ez->score = H0; ez->score = H0;

64
main.c
View File

@ -10,7 +10,7 @@
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#define MM_VERSION "2.0-r158-pre" #define MM_VERSION "2.0-r159-pre"
void liftrlimit() void liftrlimit()
{ {
@ -134,40 +134,40 @@ int main(int argc, char *argv[])
fprintf(stderr, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n"); fprintf(stderr, "Usage: minimap2 [options] <target.fa>|<target.idx> [query.fa] [...]\n");
fprintf(stderr, "Options:\n"); fprintf(stderr, "Options:\n");
fprintf(stderr, " Indexing:\n"); fprintf(stderr, " Indexing:\n");
fprintf(stderr, " -H use homopolymer-compressed k-mer\n"); fprintf(stderr, " -H use homopolymer-compressed k-mer\n");
fprintf(stderr, " -k INT k-mer size (no larger than 28) [%d]\n", k); fprintf(stderr, " -k INT k-mer size (no larger than 28) [%d]\n", k);
fprintf(stderr, " -w INT minizer window size [{-k}*2/3]\n"); fprintf(stderr, " -w INT minizer window size [{-k}*2/3]\n");
fprintf(stderr, " -I NUM split index for every ~NUM input bases [4G]\n"); fprintf(stderr, " -I NUM split index for every ~NUM input bases [4G]\n");
fprintf(stderr, " -d FILE dump index to FILE []\n"); fprintf(stderr, " -d FILE dump index to FILE []\n");
fprintf(stderr, " Mapping:\n"); fprintf(stderr, " Mapping:\n");
fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac); fprintf(stderr, " -f FLOAT filter out top FLOAT fraction of repetitive minimizers [%g]\n", opt.mid_occ_frac);
fprintf(stderr, " -g INT stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap); fprintf(stderr, " -g INT stop chain enlongation if there are no minimizers in INT-bp [%d]\n", opt.max_gap);
fprintf(stderr, " -r INT bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw); fprintf(stderr, " -r INT bandwidth used in chaining and DP-based alignment [%d]\n", opt.bw);
fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt); fprintf(stderr, " -n INT minimal number of minimizers on a chain [%d]\n", opt.min_cnt);
fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score); fprintf(stderr, " -m INT minimal chaining score (matching bases minus log gap penalty) [%d]\n", opt.min_chain_score);
// fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy // fprintf(stderr, " -T INT SDUST threshold; 0 to disable SDUST [%d]\n", opt.sdust_thres); // TODO: this option is never used; might be buggy
fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n"); fprintf(stderr, " -X skip self and dual mappings (for the all-vs-all mode)\n");
fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio); fprintf(stderr, " -p FLOAT min secondary-to-primary score ratio [%g]\n", opt.pri_ratio);
fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n); fprintf(stderr, " -N INT retain at most INT secondary alignments [%d]\n", opt.best_n);
fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio); fprintf(stderr, " -D FLOAT min fraction of minimizer matches [%g]\n", opt.min_seedcov_ratio);
fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n"); fprintf(stderr, " -x STR preset (recommended to be applied before other options) []\n");
fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n"); fprintf(stderr, " ava10k: -Hk19 -w5 -Xp0 -m100 -D.05 (PacBio/ONT all-vs-all read mapping)\n");
fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n"); fprintf(stderr, " map10k: -Hk19 (PacBio/ONT vs reference mapping)\n");
fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n"); fprintf(stderr, " asm1m: -k19 -w19 (intra-species assembly to ref mapping)\n");
fprintf(stderr, " Alignment:\n"); fprintf(stderr, " Alignment:\n");
fprintf(stderr, " -A INT matching score [%d]\n", opt.a); fprintf(stderr, " -A INT matching score [%d]\n", opt.a);
fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b); fprintf(stderr, " -B INT mismatch penalty [%d]\n", opt.b);
fprintf(stderr, " -O INT gap open penalty [%d]\n", opt.q); fprintf(stderr, " -O INT[,INT] gap open penalty [%d,%d]\n", opt.q, opt.q2);
fprintf(stderr, " -E INT gap extension penalty; a k-long gap costs {-O}+k*{-E} [%d]\n", opt.e); fprintf(stderr, " -E INT[,INT] gap extension penalty; a k-long gap costs min{O1+k*E1,O2+k*E2} [%d,%d]\n", opt.e, opt.e2);
fprintf(stderr, " -z INT Z-drop score [%d]\n", opt.zdrop); fprintf(stderr, " -z INT Z-drop score [%d]\n", opt.zdrop);
fprintf(stderr, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max); fprintf(stderr, " -s INT minimal peak DP alignment score [%d]\n", opt.min_dp_max);
fprintf(stderr, " Input/Output:\n"); fprintf(stderr, " Input/Output:\n");
fprintf(stderr, " -Q ignore base quality in the input\n"); fprintf(stderr, " -Q ignore base quality in the input\n");
fprintf(stderr, " -a output in the SAM format (PAF by default)\n"); fprintf(stderr, " -a output in the SAM format (PAF by default)\n");
fprintf(stderr, " -c output CIGAR in PAF\n"); fprintf(stderr, " -c output CIGAR in PAF\n");
fprintf(stderr, " -t INT number of threads [%d]\n", n_threads); fprintf(stderr, " -t INT number of threads [%d]\n", n_threads);
// fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose); // fprintf(stderr, " -v INT verbose level [%d]\n", mm_verbose);
fprintf(stderr, " -V show version number\n"); fprintf(stderr, " -V show version number\n");
fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n"); fprintf(stderr, "\nSee `man ./minimap2.1' for detailed description of command-line options.\n");
return 1; return 1;
} }

4
map.c
View File

@ -30,8 +30,8 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->max_join_short = 2000; opt->max_join_short = 2000;
opt->min_join_flank_sc = 1000; opt->min_join_flank_sc = 1000;
opt->a = 1, opt->b = 2, opt->q = 2, opt->e = 1; opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
opt->zdrop = 200; opt->zdrop = 400;
opt->min_dp_max = opt->min_chain_score; opt->min_dp_max = opt->min_chain_score;
opt->min_ksw_len = 100; opt->min_ksw_len = 100;
} }

View File

@ -85,7 +85,7 @@ typedef struct {
int max_join_long, max_join_short; int max_join_long, max_join_short;
int min_join_flank_sc; int min_join_flank_sc;
int a, b, q, e; // matching score, mismatch, gap-open and gap-ext penalties int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
int zdrop; int zdrop;
int min_dp_max; int min_dp_max;
int min_ksw_len; int min_ksw_len;

View File

@ -192,23 +192,26 @@ Long assembly to reference mapping (-k19 -w19)
.SS Alignment options .SS Alignment options
.TP 10 .TP 10
.BI -A \ INT .BI -A \ INT
Matching score [1] Matching score [2]
.TP .TP
.BI -B \ INT .BI -B \ INT
Mismatching penalty [2] Mismatching penalty [4]
.TP .TP
.BI -O \ INT .BI -O \ INT1[,INT2]
Gap open penalty [2] Gap open penalty [4,24]. If
.I INT2
is not specified, it is set to
.IR INT1 .
.TP .TP
.BI -E \ INT .BI -E \ INT1[,INT2]
Gap extension penalty [1]. A gap of length Gap extension penalty [2,1]. A gap of length
.I l .I k
costs costs
.RI {-O}+{-E}* l . .RI min{ O1 + k * E1 , O2 + k * E2 }.
.TP .TP
.BI -z \ INT .BI -z \ INT
Break an alignment if the running score drops too quickly along the diagonal of Break an alignment if the running score drops too quickly along the diagonal of
the DP matrix (diagonal X-drop, or Z-drop) [200]. Increasing the value improves the DP matrix (diagonal X-drop, or Z-drop) [400]. Increasing the value improves
the contiguity of the alignment at the cost of poor alignment in the middle the contiguity of the alignment at the cost of poor alignment in the middle
(e.g. caused by a long inversion). (e.g. caused by a long inversion).
.TP .TP