diff --git a/Makefile b/Makefile index 6ed7bae..85d5ea5 100644 --- a/Makefile +++ b/Makefile @@ -1,15 +1,16 @@ CC= gcc CFLAGS= -g -Wall -O2 -Wc++-compat CPPFLAGS= -DHAVE_KALLOC -INCLUDES= -I. -OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o ksw2_ll_sse.o \ - misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o +INCLUDES= +OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o ksw2_ll_sse.o PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread ifeq ($(sse2only),) - CFLAGS+=-msse4 + OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o +else + OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o endif .SUFFIXES:.c .o @@ -33,6 +34,27 @@ libminimap2.a:$(OBJS) sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h $(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz +ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + +ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + +ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + +ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + +ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + +ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + +ksw2_dispatch.o:ksw2_dispatch.c ksw2.h + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + clean: rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session* diff --git a/NEWS.md b/NEWS.md index 5bc0d76..7b04ab6 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,26 @@ +Release 2.1.1-r341 (6 September 2017) +------------------------------------- + +This is a maintenance release that is expected to output identical alignment to +v2.1. Detailed changes include: + + * Support CPU dispatch. By default, minimap2 is compiled with both SSE2 and + SSE4 based implementation of alignment and automatically chooses the right + one at runtime. This avoids unexpected errors on older CPUs (#21). + + * Improved Windows support as is requested by Oxford Nanopore (#19). Minimap2 + now avoids variable-length stacked arrays, eliminates alloca(), ships with + getopt_long() and provides timing functions implemented with Windows APIs. + + * Fixed a potential segmentation fault when specifying -k/-w/-H with + multi-part index (#23). + + * Fixed two memory leaks in example.c + +(2.1.1: 6 September 2017, r341) + + + Release 2.1-r311 (25 August 2017) --------------------------------- diff --git a/ksw2.h b/ksw2.h index 5e43970..fa22fe6 100644 --- a/ksw2.h +++ b/ksw2.h @@ -169,5 +169,4 @@ static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a, } return 0; } - #endif diff --git a/ksw2_dispatch.c b/ksw2_dispatch.c new file mode 100644 index 0000000..b125392 --- /dev/null +++ b/ksw2_dispatch.c @@ -0,0 +1,97 @@ +#ifdef KSW_CPU_DISPATCH +#include +#include "ksw2.h" + +#define SIMD_SSE 0x1 +#define SIMD_SSE2 0x2 +#define SIMD_SSE3 0x4 +#define SIMD_SSSE3 0x8 +#define SIMD_SSE4_1 0x10 +#define SIMD_SSE4_2 0x20 +#define SIMD_AVX 0x40 +#define SIMD_AVX2 0x80 +#define SIMD_AVX512F 0x100 + +#ifndef _MSC_VER +// adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h +void __cpuidex(int cpuid[4], int func_id, int subfunc_id) +{ +#if defined(__x86_64__) + asm volatile ("cpuid" + : "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) + : "0" (func_id), "2" (subfunc_id)); +#else // on 32bit, ebx can NOT be used as PIC code + asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1" + : "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3]) + : "0" (func_id), "2" (subfunc_id)); +#endif +} +#endif + +int x86_simd(void) +{ + int flag = 0, cpuid[4], max_id; + __cpuidex(cpuid, 0, 0); + max_id = cpuid[0]; + if (max_id == 0) return 0; + __cpuidex(cpuid, 1, 0); + if (cpuid[3]>>25&1) flag |= SIMD_SSE; + if (cpuid[3]>>26&1) flag |= SIMD_SSE2; + if (cpuid[2]>>0 &1) flag |= SIMD_SSE3; + if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3; + if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1; + if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2; + if (cpuid[2]>>28&1) flag |= SIMD_AVX; + if (max_id >= 7) { + __cpuidex(cpuid, 7, 0); + if (cpuid[1]>>5 &1) flag |= SIMD_AVX2; + if (cpuid[1]>>16&1) flag |= SIMD_AVX512F; + } + return flag; +} + +void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) + ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); + else if (simd & SIMD_SSE2) + ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); + else abort(); +} + +void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) + ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); + else if (simd & SIMD_SSE2) + ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); + else abort(); +} + +void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +{ + extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); + extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) + ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); + else if (simd & SIMD_SSE2) + ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); + else abort(); +} +#endif diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index 56cd8cd..ca4af4d 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -6,12 +6,26 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_load_si128(&s[t]); \ diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index 6decd2d..66149d2 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -6,12 +6,26 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, + int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_load_si128(&s[t]); \ diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index f21f184..3889955 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -5,11 +5,23 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif +#ifdef KSW_CPU_DISPATCH +#ifdef __SSE4_1__ +void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#else +void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif +#else void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) +#endif // ~KSW_CPU_DISPATCH { #define __dp_code_block1 \ z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \ diff --git a/main.c b/main.c index b99fa5d..6db80fe 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.1-r335-dirty" +#define MM_VERSION "2.1.1-r341" #ifdef __linux__ #include diff --git a/minimap2.1 b/minimap2.1 index 61a0bdb..6cef237 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -1,4 +1,4 @@ -.TH minimap2 1 "25 August 2017" "minimap2-2.1-r311" "Bioinformatics tools" +.TH minimap2 1 "6 September 2017" "minimap2-2.1.1-r341" "Bioinformatics tools" .SH NAME .PP minimap2 - mapping and alignment between collections of DNA sequences