r281: don't open long gaps on query

This commit is contained in:
Heng Li 2017-08-10 15:04:59 -04:00
parent c59b0781bc
commit 163fa36ee6
5 changed files with 41 additions and 13 deletions

View File

@ -2,8 +2,8 @@ CC= gcc
CFLAGS= -g -Wall -O2 -Wc++-compat CFLAGS= -g -Wall -O2 -Wc++-compat
CPPFLAGS= -DHAVE_KALLOC CPPFLAGS= -DHAVE_KALLOC
INCLUDES= -I. INCLUDES= -I.
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_ll_sse.o misc.o bseq.o \ OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_extd2_noins_sse.o ksw2_ll_sse.o \
sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o
PROG= minimap2 PROG= minimap2
PROG_EXTRA= sdust minimap2-lite PROG_EXTRA= sdust minimap2-lite
LIBS= -lm -lz -lpthread LIBS= -lm -lz -lpthread
@ -33,6 +33,9 @@ libminimap2.a:$(OBJS)
sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h sdust:sdust.c kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
$(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz $(CC) -D_SDUST_MAIN $(CFLAGS) $< kalloc.o -o $@ -lz
ksw2_extd2_noins_sse.o:ksw2_extd2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_NO_LONG_INS $(INCLUDES) $< -o $@
clean: clean:
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session*
@ -45,7 +48,7 @@ align.o: minimap.h mmpriv.h bseq.h ksw2.h kalloc.h
bseq.o: bseq.h kseq.h bseq.o: bseq.h kseq.h
chain.o: minimap.h mmpriv.h bseq.h kalloc.h chain.o: minimap.h mmpriv.h bseq.h kalloc.h
example.o: minimap.h kseq.h example.o: minimap.h kseq.h
format.o: mmpriv.h minimap.h bseq.h format.o: kalloc.h mmpriv.h minimap.h bseq.h
hit.o: mmpriv.h minimap.h bseq.h kalloc.h hit.o: mmpriv.h minimap.h bseq.h kalloc.h
index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h
kalloc.o: kalloc.h kalloc.o: kalloc.h

17
align.c
View File

@ -126,17 +126,18 @@ static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) //
static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int flag, ksw_extz_t *ez) static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint8_t *qseq, int tlen, const uint8_t *tseq, const int8_t *mat, int w, int flag, ksw_extz_t *ez)
{ {
int bw = (opt->flag & MM_F_CDNA)? -1 : w;
if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) { if (mm_dbg_flag & MM_DBG_PRINT_ALN_SEQ) {
int i; int i;
fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, flag=%d, zdrop=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, bw, flag, opt->zdrop); fprintf(stderr, "===> q=(%d,%d), e=(%d,%d), bw=%d, flag=%d, zdrop=%d <===\n", opt->q, opt->q2, opt->e, opt->e2, w, flag, opt->zdrop);
for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); fputc('\n', stderr); for (i = 0; i < tlen; ++i) fputc("ACGTN"[tseq[i]], stderr); fputc('\n', stderr);
for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr); for (i = 0; i < qlen; ++i) fputc("ACGTN"[qseq[i]], stderr); fputc('\n', stderr);
} }
if (opt->q == opt->q2 && opt->e == opt->e2) if (opt->flag & MM_F_CDNA)
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, bw, opt->zdrop, flag, ez); ksw_extd2_noins_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, -1, opt->zdrop, flag, ez);
else if (opt->q == opt->q2 && opt->e == opt->e2)
ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, opt->zdrop, flag, ez);
else else
ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, bw, opt->zdrop, flag, ez); ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, opt->zdrop, flag, ez);
} }
static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x) static inline int mm_get_hplen_back(const mm_idx_t *mi, uint32_t rid, uint32_t x)
@ -255,10 +256,10 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
bw = (int)(opt->bw * 1.5 + 1.); bw = (int)(opt->bw * 1.5 + 1.);
r2->cnt = 0; r2->cnt = 0;
if (!(opt->flag & MM_F_CDNA)) { if (!(opt->flag & MM_F_CDNA))
mm_fix_bad_ends(r, a, opt->bw, &as1, &cnt1); mm_fix_bad_ends(r, a, opt->bw, &as1, &cnt1);
mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10); else as1 = r->as, cnt1 = r->cnt;
} else as1 = r->as, cnt1 = r->cnt; mm_filter_bad_seeds(km, as1, cnt1, a, 10, 40, opt->max_gap>>1, 10);
mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs); mm_adjust_minier(mi, qseq0, &a[as1], &rs, &qs);
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe); mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);

3
ksw2.h
View File

@ -53,6 +53,9 @@ void ksw_extd(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez); int8_t gapo, int8_t gape, int8_t gapo2, int8_t gape2, int w, int zdrop, int flag, ksw_extz_t *ez);
void ksw_extd2_noins_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez); void ksw_extf2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t mch, int8_t mis, int8_t e, int w, int xdrop, ksw_extz_t *ez);
/** /**

View File

@ -10,8 +10,13 @@
#include <smmintrin.h> #include <smmintrin.h>
#endif #endif
#if !defined(KSW_NO_LONG_INS)
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
#else
void ksw_extd2_noins_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
#endif
{ {
#define __dp_code_block1 \ #define __dp_code_block1 \
z = _mm_load_si128(&s[t]); \ z = _mm_load_si128(&s[t]); \
@ -172,7 +177,9 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
z = _mm_max_epi8(z, a); z = _mm_max_epi8(z, a);
z = _mm_max_epi8(z, b); z = _mm_max_epi8(z, b);
z = _mm_max_epi8(z, a2); z = _mm_max_epi8(z, a2);
#ifndef KSW_NO_LONG_INS
z = _mm_max_epi8(z, b2); z = _mm_max_epi8(z, b2);
#endif
z = _mm_min_epi8(z, sc_mch_); z = _mm_min_epi8(z, sc_mch_);
__dp_code_block2; // save u[] and v[]; update a, b, a2 and b2 __dp_code_block2; // save u[] and v[]; update a, b, a2 and b2
_mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_)); _mm_store_si128(&x[t], _mm_sub_epi8(_mm_max_epi8(a, zero_), qe_));
@ -197,8 +204,10 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
_mm_store_si128(&y[t], _mm_sub_epi8(_mm_and_si128(tmp, b), qe_)); _mm_store_si128(&y[t], _mm_sub_epi8(_mm_and_si128(tmp, b), qe_));
tmp = _mm_cmpgt_epi8(a2, zero_); tmp = _mm_cmpgt_epi8(a2, zero_);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_)); _mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
#ifndef KSW_NO_LONG_INS
tmp = _mm_cmpgt_epi8(b2, zero_); tmp = _mm_cmpgt_epi8(b2, zero_);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_)); _mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
#endif
#endif #endif
} }
} else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment } else if (!(flag&KSW_EZ_RIGHT)) { // gap left-alignment
@ -214,8 +223,10 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
z = _mm_max_epi8(z, b); z = _mm_max_epi8(z, b);
d = _mm_blendv_epi8(d, _mm_set1_epi8(3), _mm_cmpgt_epi8(a2, z)); // d = a2 > z? 3 : d d = _mm_blendv_epi8(d, _mm_set1_epi8(3), _mm_cmpgt_epi8(a2, z)); // d = a2 > z? 3 : d
z = _mm_max_epi8(z, a2); z = _mm_max_epi8(z, a2);
#ifndef KSW_NO_LONG_INS
d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d d = _mm_blendv_epi8(d, _mm_set1_epi8(4), _mm_cmpgt_epi8(b2, z)); // d = a2 > z? 3 : d
z = _mm_max_epi8(z, b2); z = _mm_max_epi8(z, b2);
#endif
z = _mm_min_epi8(z, sc_mch_); z = _mm_min_epi8(z, sc_mch_);
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8() #else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
tmp = _mm_cmpgt_epi8(a, z); tmp = _mm_cmpgt_epi8(a, z);
@ -227,9 +238,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
tmp = _mm_cmpgt_epi8(a2, z); tmp = _mm_cmpgt_epi8(a2, z);
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3))); d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(3)));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2)); z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, a2));
#ifndef KSW_NO_LONG_INS
tmp = _mm_cmpgt_epi8(b2, z); tmp = _mm_cmpgt_epi8(b2, z);
d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4))); d = _mm_or_si128(_mm_andnot_si128(tmp, d), _mm_and_si128(tmp, _mm_set1_epi8(4)));
z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2)); z = _mm_or_si128(_mm_andnot_si128(tmp, z), _mm_and_si128(tmp, b2));
#endif
tmp = _mm_cmplt_epi8(sc_mch_, z); tmp = _mm_cmplt_epi8(sc_mch_, z);
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z)); z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
#endif #endif
@ -243,9 +256,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
tmp = _mm_cmpgt_epi8(a2, zero_); tmp = _mm_cmpgt_epi8(a2, zero_);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_)); _mm_store_si128(&x2[t], _mm_sub_epi8(_mm_and_si128(tmp, a2), qe2_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0 d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
#ifndef KSW_NO_LONG_INS
tmp = _mm_cmpgt_epi8(b2, zero_); tmp = _mm_cmpgt_epi8(b2, zero_);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_)); _mm_store_si128(&y2[t], _mm_sub_epi8(_mm_and_si128(tmp, b2), qe2_));
d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0 d = _mm_or_si128(d, _mm_and_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0
#endif
_mm_store_si128(&pr[t], d); _mm_store_si128(&pr[t], d);
} }
} else { // gap right-alignment } else { // gap right-alignment
@ -261,8 +276,10 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
z = _mm_max_epi8(z, b); z = _mm_max_epi8(z, b);
d = _mm_blendv_epi8(_mm_set1_epi8(3), d, _mm_cmpgt_epi8(z, a2)); // d = z > a2? d : 3 d = _mm_blendv_epi8(_mm_set1_epi8(3), d, _mm_cmpgt_epi8(z, a2)); // d = z > a2? d : 3
z = _mm_max_epi8(z, a2); z = _mm_max_epi8(z, a2);
#ifndef KSW_NO_LONG_INS
d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4 d = _mm_blendv_epi8(_mm_set1_epi8(4), d, _mm_cmpgt_epi8(z, b2)); // d = z > b2? d : 4
z = _mm_max_epi8(z, b2); z = _mm_max_epi8(z, b2);
#endif
z = _mm_min_epi8(z, sc_mch_); z = _mm_min_epi8(z, sc_mch_);
#else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8() #else // we need to emulate SSE4.1 intrinsics _mm_max_epi8() and _mm_blendv_epi8()
tmp = _mm_cmpgt_epi8(z, a); tmp = _mm_cmpgt_epi8(z, a);
@ -274,9 +291,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
tmp = _mm_cmpgt_epi8(z, a2); tmp = _mm_cmpgt_epi8(z, a2);
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3))); d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(3)));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2)); z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, a2));
#ifndef KSW_NO_LONG_INS
tmp = _mm_cmpgt_epi8(z, b2); tmp = _mm_cmpgt_epi8(z, b2);
d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4))); d = _mm_or_si128(_mm_and_si128(tmp, d), _mm_andnot_si128(tmp, _mm_set1_epi8(4)));
z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2)); z = _mm_or_si128(_mm_and_si128(tmp, z), _mm_andnot_si128(tmp, b2));
#endif
tmp = _mm_cmplt_epi8(sc_mch_, z); tmp = _mm_cmplt_epi8(sc_mch_, z);
z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z)); z = _mm_or_si128(_mm_and_si128(tmp, sc_mch_), _mm_andnot_si128(tmp, z));
#endif #endif
@ -290,9 +309,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
tmp = _mm_cmpgt_epi8(zero_, a2); tmp = _mm_cmpgt_epi8(zero_, a2);
_mm_store_si128(&x2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, a2), qe2_)); _mm_store_si128(&x2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, a2), qe2_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0 d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x20))); // d = a > 0? 1<<5 : 0
#ifndef KSW_NO_LONG_INS
tmp = _mm_cmpgt_epi8(zero_, b2); tmp = _mm_cmpgt_epi8(zero_, b2);
_mm_store_si128(&y2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, b2), qe2_)); _mm_store_si128(&y2[t], _mm_sub_epi8(_mm_andnot_si128(tmp, b2), qe2_));
d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0 d = _mm_or_si128(d, _mm_andnot_si128(tmp, _mm_set1_epi8(0x40))); // d = b > 0? 1<<6 : 0
#endif
_mm_store_si128(&pr[t], d); _mm_store_si128(&pr[t], d);
} }
} }

4
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h" #include "minimap.h"
#include "mmpriv.h" #include "mmpriv.h"
#define MM_VERSION "2.0-r280-dirty" #define MM_VERSION "2.0-r281-dirty"
void liftrlimit() void liftrlimit()
{ {
@ -134,7 +134,7 @@ int main(int argc, char *argv[])
k = 15, w = 5; k = 15, w = 5;
opt.flag |= MM_F_CDNA; opt.flag |= MM_F_CDNA;
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 100000; opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 100000;
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 70, opt.e2 = 0; opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 42, opt.e2 = 0;
opt.zdrop = 200; opt.zdrop = 200;
} else { } else {
fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg); fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg);