diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index 8a8287b..85ba1eb 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -127,35 +127,18 @@ void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const b seq[i] = nst_nt4_table[(int)mseq[i]]; } #ifndef _NO_SSE2 - { - ksw_query_t *q; - ksw_aux_t aux[2]; - // forward Smith-Waterman - aux[0].T = opt->t; aux[0].gapo = opt->q; aux[0].gape = opt->r; aux[1] = aux[0]; - q = ksw_qinit(l_mseq * g_mat[0] < 250? 1 : 2, l_mseq, seq, 5, g_mat); - ksw_sse2(q, end - beg, ref, &aux[0]); - free(q); - if (aux[0].score < opt->t) { - free(seq); - return; - } - ++aux[0].qe; ++aux[0].te; - // reverse Smith-Waterman - seq_reverse(aux[0].qe, seq, 0); - seq_reverse(aux[0].te, ref, 0); - q = ksw_qinit(aux[0].qe * g_mat[0] < 250? 1 : 2, aux[0].qe, seq, 5, g_mat); - ksw_sse2(q, aux[0].te, ref, &aux[1]); - free(q); - ++aux[1].qe; ++aux[1].te; - // write output - a->G = aux[0].score; - a->G2 = aux[0].score2 > aux[1].score2? aux[0].score2 : aux[1].score2; + { // FIXME!!! The following block has not been tested since the update of the ksw library + int flag = KSW_XSUBO | KSW_XSTOP | KSW_XSTART | (l_mseq * g_mat[0] < 250? KSW_XBYTE : 0); + kswr_t aln; + aln = ksw_align(l_mseq, seq, end - beg, ref, 5, g_mat, opt->q, opt->r, flag, 0); + a->G = aln.score; + a->G2 = aln.score2; if (a->G2 < opt->t) a->G2 = 0; if (a->G2) a->flag |= BSW2_FLAG_TANDEM; - a->k = beg + (aux[0].te - aux[1].te); - a->len = aux[1].te; - a->beg = aux[0].qe - aux[1].qe; - a->end = aux[0].qe; + a->k = beg + aln.tb; + a->len = aln.te - aln.tb; + a->beg = aln.qb; + a->end = aln.qe; } #else { @@ -168,6 +151,7 @@ void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const b a->G = aln_local_core(ref, end - beg, seq, l_mseq, &ap, path, 0, opt->t, &a->G2); if (a->G < opt->t) a->G = 0; if (a->G2 < opt->t) a->G2 = 0; + if (a->G2) a->flag |= BSW2_FLAG_TANDEM; a->k = beg + path[0].i - 1; a->len = path[1].i - path[0].i + 1; a->beg = path[0].j - 1; diff --git a/ksw.c b/ksw.c index 08cdf56..4599c6b 100644 --- a/ksw.c +++ b/ksw.c @@ -25,14 +25,8 @@ #include #include -#include "ksw.h" - -#ifndef kroundup32 -#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) -#endif - -#ifndef _NO_SSE2 #include +#include "ksw.h" #ifdef __GNUC__ #define LIKELY(x) __builtin_expect((x),1) @@ -42,26 +36,35 @@ #define UNLIKELY(x) (x) #endif -/*************** - *** SSE2 SW *** - ***************/ +const kswr_t g_defr = { 0, -1, -1, -1, -1, -1, -1 }; -struct _ksw_query_t { +struct _kswq_t { int qlen, slen; uint8_t shift, mdiff, max, size; __m128i *qp, *H0, *H1, *E, *Hmax; }; -ksw_query_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat) +/** + * Initialize the query data structure + * + * @param size Number of bytes used to store a score; valid valures are 1 or 2 + * @param qlen Length of the query sequence + * @param query Query sequence + * @param m Size of the alphabet + * @param mat Scoring matrix in a one-dimension array + * + * @return Query data structure + */ +kswq_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const int8_t *mat) { - ksw_query_t *q; + kswq_t *q; int slen, a, tmp, p; size = size > 1? 2 : 1; p = 8 * (3 - size); // # values per __m128i slen = (qlen + p - 1) / p; // segmented length - q = malloc(sizeof(ksw_query_t) + 256 + 16 * slen * (m + 4)); // a single block of memory - q->qp = (__m128i*)(((size_t)q + sizeof(ksw_query_t) + 15) >> 4 << 4); // align memory + q = (kswq_t*)malloc(sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory + q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory q->H0 = q->qp + slen * m; q->H1 = q->H0 + slen; q->E = q->H1 + slen; @@ -100,11 +103,12 @@ ksw_query_t *ksw_qinit(int size, int qlen, const uint8_t *query, int m, const in return q; } -int ksw_sse2_16(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // the first gap costs -(_o+_e) +kswr_t ksw_u8(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e) { - int slen, i, m_b, n_b, te = -1, gmax = 0; + int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc; uint64_t *b; __m128i zero, gapoe, gape, shift, *H0, *H1, *E, *Hmax; + kswr_t r; #define __max_16(ret, xx) do { \ (xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \ @@ -115,10 +119,13 @@ int ksw_sse2_16(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) / } while (0) // initialization + r = g_defr; + minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000; + endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000; m_b = n_b = 0; b = 0; zero = _mm_set1_epi32(0); - gapoe = _mm_set1_epi8(a->gapo + a->gape); - gape = _mm_set1_epi8(a->gape); + gapoe = _mm_set1_epi8(_gapo + _gape); + gape = _mm_set1_epi8(_gape); shift = _mm_set1_epi8(q->shift); H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax; slen = q->slen; @@ -174,11 +181,11 @@ int ksw_sse2_16(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) / end_loop16: //int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n"); __max_16(imax, max); // imax is the maximum number in max - if (imax >= a->T) { // write the b array; this condition adds branching unfornately + if (imax >= minsc) { // write the b array; this condition adds branching unfornately if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append if (n_b == m_b) { m_b = m_b? m_b<<1 : 8; - b = realloc(b, 8 * m_b); + b = (uint64_t*)realloc(b, 8 * m_b); } b[n_b++] = (uint64_t)imax<<32 | i; } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last @@ -187,34 +194,38 @@ end_loop16: gmax = imax; te = i; // te is the end position on the target for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j)); - if (gmax + q->shift >= 255) break; + if (gmax + q->shift >= 255 || gmax >= endsc) break; } S = H1; H1 = H0; H0 = S; // swap H0 and H1 } - a->score = gmax; a->te = te; - { // get a->qe, the end of query match; find the 2nd best score + r.score = gmax + q->shift < 255? gmax : 255; + r.te = te; + if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score int max = -1, low, high, qlen = slen * 16; uint8_t *t = (uint8_t*)Hmax; - for (i = 0, a->qe = -1; i < qlen; ++i, ++t) - if ((int)*t > max) max = *t, a->qe = i / 16 + i % 16 * slen; + for (i = 0; i < qlen; ++i, ++t) + if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen; //printf("%d,%d\n", max, gmax); - i = (a->score + q->max - 1) / q->max; - low = te - i; high = te + i; - for (i = 0, a->score2 = 0; i < n_b; ++i) { - int e = (int32_t)b[i]; - if ((e < low || e > high) && b[i]>>32 > (uint32_t)a->score2) - a->score2 = b[i]>>32, a->te2 = e; + if (b) { + i = (r.score + q->max - 1) / q->max; + low = te - i; high = te + i; + for (i = 0; i < n_b; ++i) { + int e = (int32_t)b[i]; + if ((e < low || e > high) && b[i]>>32 > (uint32_t)r.score2) + r.score2 = b[i]>>32, r.te2 = e; + } } } free(b); - return a->score + q->shift >= 255? 255 : a->score; + return r; } -int ksw_sse2_8(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // the first gap costs -(_o+_e) +kswr_t ksw_i16(kswq_t *q, int tlen, const uint8_t *target, int _gapo, int _gape, int xtra) // the first gap costs -(_o+_e) { - int slen, i, m_b, n_b, te = -1, gmax = 0; + int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc; uint64_t *b; __m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax; + kswr_t r; #define __max_8(ret, xx) do { \ (xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \ @@ -224,10 +235,13 @@ int ksw_sse2_8(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // } while (0) // initialization + r = g_defr; + minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000; + endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000; m_b = n_b = 0; b = 0; zero = _mm_set1_epi32(0); - gapoe = _mm_set1_epi16(a->gapo + a->gape); - gape = _mm_set1_epi16(a->gape); + gapoe = _mm_set1_epi16(_gapo + _gape); + gape = _mm_set1_epi16(_gape); H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax; slen = q->slen; for (i = 0; i < slen; ++i) { @@ -269,11 +283,11 @@ int ksw_sse2_8(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) // } end_loop8: __max_8(imax, max); - if (imax >= a->T) { + if (imax >= minsc) { if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { if (n_b == m_b) { m_b = m_b? m_b<<1 : 8; - b = realloc(b, 8 * m_b); + b = (uint64_t*)realloc(b, 8 * m_b); } b[n_b++] = (uint64_t)imax<<32 | i; } else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last @@ -282,34 +296,60 @@ end_loop8: gmax = imax; te = i; for (j = 0; LIKELY(j < slen); ++j) _mm_store_si128(Hmax + j, _mm_load_si128(H1 + j)); + if (gmax >= endsc) break; } S = H1; H1 = H0; H0 = S; } - a->score = gmax; a->te = te; + r.score = gmax; r.te = te; { int max = -1, low, high, qlen = slen * 8; uint16_t *t = (uint16_t*)Hmax; - for (i = 0, a->qe = -1; i < qlen; ++i, ++t) - if ((int)*t > max) max = *t, a->qe = i / 8 + i % 8 * slen; - i = (a->score + q->max - 1) / q->max; - low = te - i; high = te + i; - for (i = 0, a->score2 = 0; i < n_b; ++i) { - int e = (int32_t)b[i]; - if ((e < low || e > high) && b[i]>>32 > (uint32_t)a->score2) - a->score2 = b[i]>>32, a->te2 = e; + for (i = 0, r.qe = -1; i < qlen; ++i, ++t) + if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen; + if (b) { + i = (r.score + q->max - 1) / q->max; + low = te - i; high = te + i; + for (i = 0; i < n_b; ++i) { + int e = (int32_t)b[i]; + if ((e < low || e > high) && b[i]>>32 > (uint32_t)r.score2) + r.score2 = b[i]>>32, r.te2 = e; + } } } free(b); - return a->score; + return r; } -int ksw_sse2(ksw_query_t *q, int tlen, const uint8_t *target, ksw_aux_t *a) +static void revseq(int l, uint8_t *s) { - if (q->size == 1) return ksw_sse2_16(q, tlen, target, a); - else return ksw_sse2_8(q, tlen, target, a); + int i, t; + for (i = 0; i < l>>1; ++i) + t = s[i], s[i] = s[l - 1 - i], s[l - 1 - i] = t; } -#endif // _NO_SSE2 +kswr_t ksw_align(int qlen, uint8_t *query, int tlen, uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int xtra, kswq_t **qry) +{ + int size; + kswq_t *q; + kswr_t r, rr; + kswr_t (*func)(kswq_t*, int, const uint8_t*, int, int, int); + + q = (qry && *qry)? *qry : ksw_qinit((xtra&KSW_XBYTE)? 1 : 2, qlen, query, m, mat); + if (qry && *qry == 0) *qry = q; + func = q->size == 2? ksw_i16 : ksw_u8; + size = q->size; + r = func(q, tlen, target, gapo, gape, xtra); + if (qry == 0) free(q); + if ((xtra&KSW_XSTART) == 0 || ((xtra&KSW_XSUBO) && r.score < (xtra&0xffff))) return r; + revseq(r.qe + 1, query); revseq(r.te + 1, target); // +1 because qe/te points to the exact end, not the position after the end + q = ksw_qinit(size, r.qe + 1, query, m, mat); + rr = func(q, tlen, target, gapo, gape, KSW_XSTOP | r.score); + revseq(r.qe + 1, query); revseq(r.te + 1, target); + free(q); + if (r.score == rr.score) + r.tb = r.te - rr.te, r.qb = r.qe - rr.qe; + return r; +} /******************** *** SW extension *** @@ -494,7 +534,7 @@ int ksw_global(int qlen, const uint8_t *query, int tlen, const uint8_t *target, * Main function (not compiled by default) * *******************************************/ -#if defined(_KSW_MAIN) && !defined(_NO_SSE2) +#ifdef _KSW_MAIN #include #include @@ -523,30 +563,33 @@ unsigned char seq_nt4_table[256] = { int main(int argc, char *argv[]) { - int c, sa = 1, sb = 3, i, j, k, forward_only = 0, size = 2; + int c, sa = 1, sb = 3, i, j, k, forward_only = 0, max_rseq = 0; int8_t mat[25]; - ksw_aux_t a; + int gapo = 5, gape = 2, minsc = 0, xtra = KSW_XSTART; + uint8_t *rseq = 0; gzFile fpt, fpq; kseq_t *kst, *ksq; + // parse command line - a.gapo = 5; a.gape = 2; a.T = 10; - while ((c = getopt(argc, argv, "a:b:q:r:ft:s:")) >= 0) { + while ((c = getopt(argc, argv, "a:b:q:r:ft:1")) >= 0) { switch (c) { case 'a': sa = atoi(optarg); break; case 'b': sb = atoi(optarg); break; - case 'q': a.gapo = atoi(optarg); break; - case 'r': a.gape = atoi(optarg); break; - case 't': a.T = atoi(optarg); break; + case 'q': gapo = atoi(optarg); break; + case 'r': gape = atoi(optarg); break; + case 't': minsc = atoi(optarg); break; case 'f': forward_only = 1; break; - case 's': size = atoi(optarg); break; + case '1': xtra |= KSW_XBYTE; break; } } if (optind + 2 > argc) { - fprintf(stderr, "Usage: ksw [-s%d] [-a%d] [-b%d] [-q%d] [-r%d] \n", size, sa, sb, a.gapo, a.gape); + fprintf(stderr, "Usage: ksw [-1] [-f] [-a%d] [-b%d] [-q%d] [-r%d] [-t%d] \n", sa, sb, gapo, gape, minsc); return 1; } + if (minsc > 0xffff) minsc = 0xffff; + if (minsc > 0) xtra |= KSW_XSUBO | minsc; // initialize scoring matrix - for (i = k = 0; i < 5; ++i) { + for (i = k = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) mat[k++] = i == j? sa : -sb; mat[k++] = 0; // ambiguous base @@ -557,34 +600,34 @@ int main(int argc, char *argv[]) fpq = gzopen(argv[optind+1], "r"); ksq = kseq_init(fpq); // all-pair alignment while (kseq_read(ksq) > 0) { - ksw_query_t *q[2]; - for (i = 0; i < ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]]; - q[0] = ksw_qinit(size, ksq->seq.l, (uint8_t*)ksq->seq.s, 5, mat); + kswq_t *q[2] = {0, 0}; + kswr_t r; + for (i = 0; i < (int)ksq->seq.l; ++i) ksq->seq.s[i] = seq_nt4_table[(int)ksq->seq.s[i]]; if (!forward_only) { // reverse - for (i = 0; i < ksq->seq.l/2; ++i) { - int t = ksq->seq.s[i]; - ksq->seq.s[i] = ksq->seq.s[ksq->seq.l-1-i]; - ksq->seq.s[ksq->seq.l-1-i] = t; + if ((int)ksq->seq.m > max_rseq) { + max_rseq = ksq->seq.m; + rseq = (uint8_t*)realloc(rseq, max_rseq); } - for (i = 0; i < ksq->seq.l; ++i) - ksq->seq.s[i] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i]; - q[1] = ksw_qinit(size, ksq->seq.l, (uint8_t*)ksq->seq.s, 5, mat); - } else q[1] = 0; + for (i = 0, j = ksq->seq.l - 1; i < (int)ksq->seq.l; ++i, --j) + rseq[j] = ksq->seq.s[i] == 4? 4 : 3 - ksq->seq.s[i]; + } gzrewind(fpt); kseq_rewind(kst); while (kseq_read(kst) > 0) { - int s; - for (i = 0; i < kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; - s = ksw_sse2(q[0], kst->seq.l, (uint8_t*)kst->seq.s, &a); - printf("%s\t%s\t+\t%d\t%d\t%d\n", ksq->name.s, kst->name.s, s, a.te+1, a.qe+1); - if (q[1]) { - s = ksw_sse2(q[1], kst->seq.l, (uint8_t*)kst->seq.s, &a); - printf("%s\t%s\t-\t%d\t%d\t%d\n", ksq->name.s, kst->name.s, s, a.te+1, a.qe+1); + for (i = 0; i < (int)kst->seq.l; ++i) kst->seq.s[i] = seq_nt4_table[(int)kst->seq.s[i]]; + r = ksw_align(ksq->seq.l, (uint8_t*)ksq->seq.s, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[0]); + if (r.score >= minsc) + printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, r.qb, r.qe+1, r.score, r.score2, r.te2); + if (rseq) { + r = ksw_align(ksq->seq.l, rseq, kst->seq.l, (uint8_t*)kst->seq.s, 5, mat, gapo, gape, xtra, &q[1]); + if (r.score >= minsc) + printf("%s\t%d\t%d\t%s\t%d\t%d\t%d\t%d\t%d\n", kst->name.s, r.tb, r.te+1, ksq->name.s, (int)ksq->seq.l - r.qb, (int)ksq->seq.l - 1 - r.qe, r.score, r.score2, r.te2); } } free(q[0]); free(q[1]); } + free(rseq); kseq_destroy(kst); gzclose(fpt); kseq_destroy(ksq); gzclose(fpq); return 0; } -#endif // _KSW_MAIN +#endif diff --git a/ksw.h b/ksw.h index c7eaabb..5162dc0 100644 --- a/ksw.h +++ b/ksw.h @@ -3,51 +3,64 @@ #include -struct _ksw_query_t; -typedef struct _ksw_query_t ksw_query_t; +#define KSW_XBYTE 0x10000 +#define KSW_XSTOP 0x20000 +#define KSW_XSUBO 0x40000 +#define KSW_XSTART 0x80000 + +struct _kswq_t; +typedef struct _kswq_t kswq_t; typedef struct { - // input - unsigned gapo, gape; // the first gap costs gapo+gape - unsigned T; // threshold - // output - int score, te, qe, score2, te2; -} ksw_aux_t; + int score; // best score + int te, qe; // target end and query end + int score2, te2; // second best score and ending position on the target + int tb, qb; // target start and query start +} kswr_t; #ifdef __cplusplus extern "C" { #endif /** - * Initialize the query data structure + * Aligning two sequences * - * @param size Number of bytes used to store a score; valid valures are 1 or 2 - * @param qlen Length of the query sequence - * @param query Query sequence - * @param m Size of the alphabet - * @param mat Scoring matrix in a one-dimension array + * @param qlen length of the query sequence (typically