diff --git a/Makefile b/Makefile index 184ab6b..b0d1f2a 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,8 @@ CC= gcc CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function CPPFLAGS= INCLUDES= -I. -OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o index.o map.o +OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \ + index.o map.o ksw2_extz2_sse.o PROG= minimap2 PROG_EXTRA= sdust LIBS= -lm -lz -lpthread diff --git a/align.c b/align.c index 496829a..14b4f3d 100644 --- a/align.c +++ b/align.c @@ -1,7 +1,26 @@ #include +#include #include "minimap.h" #include "ksw2.h" +#ifndef kroundup32 +#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) +#endif + +static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b) +{ + int i, j; + a = a < 0? -a : a; + b = b > 0? -b : b; + for (i = 0; i < m - 1; ++i) { + for (j = 0; j < m - 1; ++j) + mat[i * m + j] = i == j? a : b; + mat[i * m + m - 1] = 0; + } + for (j = 0; j < m; ++j) + mat[(m - 1) * m + j] = 0; +} + static inline void mm_seq_rev(uint32_t len, uint8_t *seq) { uint32_t i; @@ -10,13 +29,31 @@ static inline void mm_seq_rev(uint32_t len, uint8_t *seq) t = seq[i], seq[i] = seq[len - 1 - i], seq[len - 1 - i] = t; } -static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a) +static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc() +{ + if (r->cigar == 0) { + uint32_t m_cigar = n_cigar + 2; + kroundup32(m_cigar); + r->cigar = (mm_cigar_t*)malloc(m_cigar * 4); + r->cigar->n = 0, r->cigar->m = m_cigar; + } else if (r->cigar->n + n_cigar > r->cigar->m - 2) { + kroundup32(r->cigar->m); + r->cigar->m = r->cigar->n + n_cigar; + r->cigar = (mm_cigar_t*)realloc(r->cigar, r->cigar->m * 4); + } + memcpy(r->cigar->cigar + r->cigar->n, cigar, n_cigar * 4); + r->cigar->n += n_cigar; +} + +static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez) { int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63; uint8_t *tseq, *qseq; int32_t i, k, l, rs0, re0, qs0, qe0; int32_t rs, re, qs, qe, ret; - mm_reg1_t r1; + int8_t mat[25]; + + ksw_gen_simple_mat(5, mat, opt->a, opt->b); rs = (int32_t)a[r->as].x + 1; // NB: this is the same as r->{rs,re} re = (int32_t)a[r->as + r->cnt - 1].x + 1; @@ -53,8 +90,11 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int fprintf(stderr, "===> [-1] %d-%d %c (%s:%d-%d) <===\n", qs0, qs, "+-"[rev], mi->seq[rid].name, rs0, rs); for (k = 0; k < tl; ++k) fputc("ACGTN"[tseq[k]], stderr); fputc('\n', stderr); for (k = 0; k < ql; ++k) fputc("ACGTN"[qseq[k]], stderr); fputc('\n', stderr); + ksw_extz2_sse(km, ql, qseq, tl, tseq, 5, mat, opt->q, opt->e, (int)(opt->bw * 1.5 + .499), opt->zdrop, KSW_EZ_EXTZ_ONLY|KSW_EZ_RIGHT|KSW_EZ_REV_CIGAR, ez); mm_seq_rev(ql, qseq); + mm_append_cigar(r, ez->n_cigar, ez->cigar); } + for (i = 0; i < r->cigar->n; ++i) fprintf(stderr, "%d%c", r->cigar->cigar[i]>>4, "MID"[r->cigar->cigar[i]&0xf]); fputc('\n', stderr); /* for (i = 1; i < r->cnt; ++i) { re = (int32_t)a[r->as + i].x + 1; @@ -79,7 +119,9 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int extern unsigned char seq_nt4_table[256]; int i, reg; uint8_t *qseq0[2]; + ksw_extz_t ez; + memset(&ez, 0, sizeof(ksw_extz_t)); qseq0[0] = (uint8_t*)kmalloc(km, qlen); qseq0[1] = (uint8_t*)kmalloc(km, qlen); for (i = 0; i < qlen; ++i) { @@ -89,8 +131,8 @@ void mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int for (reg = 0; reg < n_regs; ++reg) { mm_reg1_t r2; - mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r2, a); + mm_align1(km, opt, mi, qlen, qseq0, ®s[reg], &r2, a, &ez); } - kfree(km, qseq0[0]); kfree(km, qseq0[1]); + kfree(km, qseq0[0]); kfree(km, qseq0[1]); kfree(km, ez.cigar); } diff --git a/ksw2.h b/ksw2.h index b758b7f..a4edc2a 100644 --- a/ksw2.h +++ b/ksw2.h @@ -10,6 +10,8 @@ #define KSW_EZ_GENERIC_SC 0x04 // without this flag: match/mismatch only; last symbol is a wildcard #define KSW_EZ_GLOBAL_ONLY 0x08 // don't record best score and ignore z-drop; significantly faster #define KSW_EZ_DYN_BAND 0x10 // once used, ksw_extz_t::{mqe,mte} may be wrong +#define KSW_EZ_EXTZ_ONLY 0x20 // only perform extension +#define KSW_EZ_REV_CIGAR 0x40 // reverse CIGAR in the output typedef struct { int max, max_q, max_t; // max extension score and coordinate @@ -88,7 +90,7 @@ static inline uint32_t *ksw_push_cigar(void *km, int *n_cigar, int *m_cigar, uin return cigar; } -static inline void ksw_backtrack(void *km, int is_rot, const uint8_t *p, const int *off, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_) +static inline void ksw_backtrack(void *km, int is_rot, int is_rev, const uint8_t *p, const int *off, int n_col, int i0, int j0, int *m_cigar_, int *n_cigar_, uint32_t **cigar_) { int n_cigar = 0, m_cigar = *m_cigar_, which = 0, i = i0, j = j0, r; uint32_t *cigar = *cigar_, tmp; @@ -104,8 +106,9 @@ static inline void ksw_backtrack(void *km, int is_rot, const uint8_t *p, const i } if (i >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 2, i + 1); // first deletion if (j >= 0) cigar = ksw_push_cigar(km, &n_cigar, &m_cigar, cigar, 1, j + 1); // first insertion - for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR - tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp; + if (!is_rev) + for (i = 0; i < n_cigar>>1; ++i) // reverse CIGAR + tmp = cigar[i], cigar[i] = cigar[n_cigar-1-i], cigar[n_cigar-1-i] = tmp; *m_cigar_ = m_cigar, *n_cigar_ = n_cigar, *cigar_ = cigar; } diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index 35666b5..f291331 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -272,8 +272,11 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin kfree(km, mem); if (with_max) kfree(km, H); if (with_cigar) { // backtrack - if (ez->score > KSW_NEG_INF) ksw_backtrack(km, 1, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); - else ksw_backtrack(km, 1, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + int rev_cigar = !!(flag & KSW_EZ_REV_CIGAR); + if (ez->score > KSW_NEG_INF && !(flag&KSW_EZ_EXTZ_ONLY)) + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, tlen-1, qlen-1, &ez->m_cigar, &ez->n_cigar, &ez->cigar); + else if (ez->max_t >= 0 && ez->max_q >= 0) + ksw_backtrack(km, 1, rev_cigar, (uint8_t*)p, off, n_col_*16, ez->max_t, ez->max_q, &ez->m_cigar, &ez->n_cigar, &ez->cigar); kfree(km, mem2); kfree(km, off); } } diff --git a/minimap.h b/minimap.h index 807ce9e..f28d80b 100644 --- a/minimap.h +++ b/minimap.h @@ -42,7 +42,7 @@ typedef struct { } mm_idx_t; typedef struct { - uint32_t n_cigar, m_cigar; + uint32_t n, m; uint32_t cigar[]; } mm_cigar_t;