r227: use local alignment for INV alignment
This commit is contained in:
parent
da3db3c095
commit
d08b7a0c51
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@ CC= gcc
|
|||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES= -I.
|
||||
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o misc.o bseq.o \
|
||||
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_ll_sse.o misc.o bseq.o \
|
||||
sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust minimap2-lite
|
||||
|
|
|
|||
46
align.c
46
align.c
|
|
@ -357,9 +357,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
|
||||
static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], const mm_reg1_t *r1, const mm_reg1_t *r2, mm_reg1_t *r_inv, ksw_extz_t *ez)
|
||||
{
|
||||
int tl, ql, ret = 0;
|
||||
int tl, ql, score, ret = 0, q_off, t_off;
|
||||
uint8_t *tseq, *qseq;
|
||||
int8_t mat[25];
|
||||
void *qp;
|
||||
|
||||
memset(r_inv, 0, sizeof(mm_reg1_t));
|
||||
if (!(r1->split&1) || !(r2->split&2)) return 0;
|
||||
if (r1->id != r1->parent && r1->parent != MM_PARENT_TMP_PRI) return 0;
|
||||
|
|
@ -369,28 +371,34 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
tl = r2->rs - r1->re;
|
||||
if (ql < 0 || ql > opt->max_gap) return 0;
|
||||
if (tl < 0 || tl > opt->max_gap) return 0;
|
||||
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
|
||||
tseq = (uint8_t*)kmalloc(km, tl);
|
||||
mm_idx_getseq(mi, r1->rid, r1->re, r2->rs, tseq);
|
||||
qseq = &qseq0[!r1->rev][qlen - r2->qs];
|
||||
mm_align_pair(km, opt, ql, qseq, tl, tseq, mat, (int)(opt->bw * 1.5), KSW_EZ_APPROX_MAX, ez);
|
||||
if (ez->n_cigar > 0 && ez->score > 0) {
|
||||
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
|
||||
r_inv->p->dp_score = ez->score;
|
||||
mm_update_extra(r_inv->p, qseq, tseq, mat, opt->q, opt->e);
|
||||
if (r_inv->p->dp_max < opt->min_dp_max) { // discard this hit
|
||||
free(r_inv->p);
|
||||
r_inv->p = 0;
|
||||
} else {
|
||||
r_inv->id = -1;
|
||||
r_inv->parent = MM_PARENT_UNSET;
|
||||
r_inv->inv = 1;
|
||||
r_inv->rev = !r1->rev;
|
||||
r_inv->qs = r1->qe, r_inv->qe = r2->qs;
|
||||
r_inv->rs = r1->re, r_inv->re = r2->rs;
|
||||
ret = 1;
|
||||
}
|
||||
}
|
||||
|
||||
mm_seq_rev(ql, qseq);
|
||||
mm_seq_rev(tl, tseq);
|
||||
qp = ksw_ll_qinit(km, 2, ql, qseq, 5, mat);
|
||||
score = ksw_ll_i16(qp, tl, tseq, opt->q, opt->e, &q_off, &t_off);
|
||||
kfree(km, qp);
|
||||
mm_seq_rev(ql, qseq);
|
||||
mm_seq_rev(tl, tseq);
|
||||
if (score < opt->min_dp_max) goto end_align1_inv;
|
||||
q_off = ql - (q_off + 1), t_off = tl - (t_off + 1);
|
||||
mm_align_pair(km, opt, ql - q_off, qseq + q_off, tl - t_off, tseq + t_off, mat, (int)(opt->bw * 1.5), KSW_EZ_EXTZ_ONLY, ez);
|
||||
if (ez->n_cigar == 0) goto end_align1_inv; // should never be here
|
||||
mm_append_cigar(r_inv, ez->n_cigar, ez->cigar);
|
||||
r_inv->p->dp_score = ez->max;
|
||||
mm_update_extra(r_inv->p, qseq + q_off, tseq + t_off, mat, opt->q, opt->e);
|
||||
r_inv->id = -1;
|
||||
r_inv->parent = MM_PARENT_UNSET;
|
||||
r_inv->inv = 1;
|
||||
r_inv->rev = !r1->rev;
|
||||
r_inv->qs = r1->qe + q_off, r_inv->qe = r_inv->qs + ez->max_q + 1;
|
||||
r_inv->rs = r1->re + t_off, r_inv->re = r_inv->rs + ez->max_t + 1;
|
||||
ret = 1;
|
||||
end_align1_inv:
|
||||
kfree(km, tseq);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
3
ksw2.h
3
ksw2.h
|
|
@ -67,6 +67,9 @@ int ksw_gg(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *ta
|
|||
int ksw_gg2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_);
|
||||
int ksw_gg2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t gapo, int8_t gape, int w, int *m_cigar_, int *n_cigar_, uint32_t **cigar_);
|
||||
|
||||
void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat);
|
||||
int ksw_ll_i16(void *q, int tlen, const uint8_t *target, int gapo, int gape, int *qe, int *te);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,147 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include <emmintrin.h>
|
||||
#include "ksw2.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x),1)
|
||||
#define UNLIKELY(x) __builtin_expect((x),0)
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
int qlen, slen;
|
||||
uint8_t shift, mdiff, max, size;
|
||||
__m128i *qp, *H0, *H1, *E, *Hmax;
|
||||
} kswq_t;
|
||||
|
||||
/**
|
||||
* Initialize the query data structure
|
||||
*
|
||||
* @param size Number of bytes used to store a score; valid valures are 1 or 2
|
||||
* @param qlen Length of the query sequence
|
||||
* @param query Query sequence
|
||||
* @param m Size of the alphabet
|
||||
* @param mat Scoring matrix in a one-dimension array
|
||||
*
|
||||
* @return Query data structure
|
||||
*/
|
||||
void *ksw_ll_qinit(void *km, int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
|
||||
{
|
||||
kswq_t *q;
|
||||
int slen, a, tmp, p;
|
||||
|
||||
size = size > 1? 2 : 1;
|
||||
p = 8 * (3 - size); // # values per __m128i
|
||||
slen = (qlen + p - 1) / p; // segmented length
|
||||
q = (kswq_t*)kmalloc(km, sizeof(kswq_t) + 256 + 16 * slen * (m + 4)); // a single block of memory
|
||||
q->qp = (__m128i*)(((size_t)q + sizeof(kswq_t) + 15) >> 4 << 4); // align memory
|
||||
q->H0 = q->qp + slen * m;
|
||||
q->H1 = q->H0 + slen;
|
||||
q->E = q->H1 + slen;
|
||||
q->Hmax = q->E + slen;
|
||||
q->slen = slen; q->qlen = qlen; q->size = size;
|
||||
// compute shift
|
||||
tmp = m * m;
|
||||
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
|
||||
if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
|
||||
if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
|
||||
}
|
||||
q->max = q->mdiff;
|
||||
q->shift = 256 - q->shift; // NB: q->shift is uint8_t
|
||||
q->mdiff += q->shift; // this is the difference between the min and max scores
|
||||
// An example: p=8, qlen=19, slen=3 and segmentation:
|
||||
// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
|
||||
if (size == 1) {
|
||||
int8_t *t = (int8_t*)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
|
||||
}
|
||||
} else {
|
||||
int16_t *t = (int16_t*)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen? 0 : ma[query[k]]);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
int ksw_ll_i16(void *q_, int tlen, const uint8_t *target, int _gapo, int _gape, int *qe, int *te)
|
||||
{
|
||||
kswq_t *q = (kswq_t*)q_;
|
||||
int slen, i, gmax = 0, qlen8;
|
||||
__m128i zero, gapoe, gape, *H0, *H1, *E, *Hmax;
|
||||
uint16_t *H8;
|
||||
|
||||
#define __max_8(ret, xx) do { \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
|
||||
(ret) = _mm_extract_epi16((xx), 0); \
|
||||
} while (0)
|
||||
|
||||
// initialization
|
||||
*qe = *te = -1;
|
||||
zero = _mm_set1_epi32(0);
|
||||
gapoe = _mm_set1_epi16(_gapo + _gape);
|
||||
gape = _mm_set1_epi16(_gape);
|
||||
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
|
||||
slen = q->slen, qlen8 = slen * 8;
|
||||
memset(E, 0, slen * sizeof(__m128i));
|
||||
memset(H0, 0, slen * sizeof(__m128i));
|
||||
memset(Hmax, 0, slen * sizeof(__m128i));
|
||||
// the core loop
|
||||
for (i = 0; i < tlen; ++i) {
|
||||
int j, k, imax;
|
||||
__m128i e, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||
h = _mm_slli_si128(h, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm_adds_epi16(h, *S++);
|
||||
e = _mm_load_si128(E + j);
|
||||
h = _mm_max_epi16(h, e);
|
||||
h = _mm_max_epi16(h, f);
|
||||
max = _mm_max_epi16(max, h);
|
||||
_mm_store_si128(H1 + j, h);
|
||||
h = _mm_subs_epu16(h, gapoe);
|
||||
e = _mm_subs_epu16(e, gape);
|
||||
e = _mm_max_epi16(e, h);
|
||||
_mm_store_si128(E + j, e);
|
||||
f = _mm_subs_epu16(f, gape);
|
||||
f = _mm_max_epi16(f, h);
|
||||
h = _mm_load_si128(H0 + j);
|
||||
}
|
||||
for (k = 0; LIKELY(k < 16); ++k) {
|
||||
f = _mm_slli_si128(f, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm_load_si128(H1 + j);
|
||||
h = _mm_max_epi16(h, f);
|
||||
_mm_store_si128(H1 + j, h);
|
||||
h = _mm_subs_epu16(h, gapoe);
|
||||
f = _mm_subs_epu16(f, gape);
|
||||
if(UNLIKELY(!_mm_movemask_epi8(_mm_cmpgt_epi16(f, h)))) goto end_loop_i16;
|
||||
}
|
||||
}
|
||||
end_loop_i16:
|
||||
__max_8(imax, max);
|
||||
if (imax >= gmax) {
|
||||
gmax = imax; *te = i;
|
||||
memcpy(Hmax, H1, slen * sizeof(__m128i));
|
||||
}
|
||||
S = H1; H1 = H0; H0 = S;
|
||||
}
|
||||
for (i = 0, H8 = (uint16_t*)Hmax; i < qlen8; ++i)
|
||||
if ((int)H8[i] == gmax) *qe = i / 8 + i % 8 * slen;
|
||||
return gmax;
|
||||
}
|
||||
Loading…
Reference in New Issue