From 46e8b6a4f95d6776e4c1c20832d9bccd1802b632 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Sun, 3 Sep 2017 20:29:24 -0400 Subject: [PATCH] r338: portable CPU dispatch, which is the default working with gcc, icc, clang and msvc. --- Makefile | 17 +++++++---------- ksw2_dispatch.c | 48 ++++++++++++++++++++++++++++++++++++++++++------ ksw2_extd2_sse.c | 4 ++++ ksw2_exts2_sse.c | 4 ++++ ksw2_extz2_sse.c | 4 ++++ main.c | 2 +- 6 files changed, 62 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 66b37ce..85d5ea5 100644 --- a/Makefile +++ b/Makefile @@ -7,12 +7,9 @@ PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread -ifneq ($(cpu_dispatch),) +ifeq ($(sse2only),) OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o else -ifeq ($(sse2only),) - CFLAGS+=-msse4 -endif OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o endif @@ -38,22 +35,22 @@ sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h $(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@ + $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@ + $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ diff --git a/ksw2_dispatch.c b/ksw2_dispatch.c index 681460e..99de9a0 100644 --- a/ksw2_dispatch.c +++ b/ksw2_dispatch.c @@ -2,13 +2,45 @@ #include #include "ksw2.h" +#define SIMD_SSE 0x1 +#define SIMD_SSE2 0x2 +#define SIMD_SSE3 0x4 +#define SIMD_SSE4_1 0x8 +#define SIMD_SSE4_2 0x10 +#define SIMD_AVX 0x20 +#define SIMD_AVX2 0x40 +#define SIMD_AVX512F 0x80 + +unsigned x86_simd(void) +{ + unsigned eax, ebx, ecx, edx, flag = 0; +#ifdef _MSC_VER + int cpuid[4]; + __cpuid(cpuid, 1); + eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3]; +#else + asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (1)); +#endif + if (edx>>25&1) flag |= SIMD_SSE; + if (edx>>26&1) flag |= SIMD_SSE2; + if (ecx>>0 &1) flag |= SIMD_SSE3; + if (ecx>>19&1) flag |= SIMD_SSE4_1; + if (ecx>>20&1) flag |= SIMD_SSE4_2; + if (ecx>>28&1) flag |= SIMD_AVX; + if (ebx>>5 &1) flag |= SIMD_AVX2; + if (ebx>>16&1) flag |= SIMD_AVX512F; + return flag; +} + void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez) { extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez); - if (__builtin_cpu_supports("sse4.1")) + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); - else if (__builtin_cpu_supports("sse2")) + else if (simd & SIMD_SSE2) ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez); else abort(); } @@ -20,9 +52,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez); - if (__builtin_cpu_supports("sse4.1")) + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); - else if (__builtin_cpu_supports("sse2")) + else if (simd & SIMD_SSE2) ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez); else abort(); } @@ -34,9 +68,11 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez); - if (__builtin_cpu_supports("sse4.1")) + unsigned simd; + simd = x86_simd(); + if (simd & SIMD_SSE4_1) ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); - else if (__builtin_cpu_supports("sse2")) + else if (simd & SIMD_SSE2) ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez); else abort(); } diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index 9a64e49..ca4af4d 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -6,6 +6,10 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index 7a64905..66149d2 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -6,6 +6,10 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index 18a3d2b..3889955 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -5,6 +5,10 @@ #ifdef __SSE2__ #include +#ifdef KSW_SSE2_ONLY +#undef __SSE4_1__ +#endif + #ifdef __SSE4_1__ #include #endif diff --git a/main.c b/main.c index e838560..03493ab 100644 --- a/main.c +++ b/main.c @@ -6,7 +6,7 @@ #include "mmpriv.h" #include "getopt.h" -#define MM_VERSION "2.1-r337-dirty" +#define MM_VERSION "2.1-r338-dirty" #ifdef __linux__ #include