r338: portable CPU dispatch, which is the default
working with gcc, icc, clang and msvc.
This commit is contained in:
parent
3c997ca016
commit
46e8b6a4f9
17
Makefile
17
Makefile
|
|
@ -7,12 +7,9 @@ PROG= minimap2
|
|||
PROG_EXTRA= sdust minimap2-lite
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
||||
ifneq ($(cpu_dispatch),)
|
||||
ifeq ($(sse2only),)
|
||||
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
||||
else
|
||||
ifeq ($(sse2only),)
|
||||
CFLAGS+=-msse4
|
||||
endif
|
||||
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
|
||||
endif
|
||||
|
||||
|
|
@ -38,22 +35,22 @@ sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
|
|||
$(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz
|
||||
|
||||
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
|
||||
|
||||
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
||||
|
|
|
|||
|
|
@ -2,13 +2,45 @@
|
|||
#include <stdlib.h>
|
||||
#include "ksw2.h"
|
||||
|
||||
#define SIMD_SSE 0x1
|
||||
#define SIMD_SSE2 0x2
|
||||
#define SIMD_SSE3 0x4
|
||||
#define SIMD_SSE4_1 0x8
|
||||
#define SIMD_SSE4_2 0x10
|
||||
#define SIMD_AVX 0x20
|
||||
#define SIMD_AVX2 0x40
|
||||
#define SIMD_AVX512F 0x80
|
||||
|
||||
unsigned x86_simd(void)
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx, flag = 0;
|
||||
#ifdef _MSC_VER
|
||||
int cpuid[4];
|
||||
__cpuid(cpuid, 1);
|
||||
eax = cpuid[0], ebx = cpuid[1], ecx = cpuid[2], edx = cpuid[3];
|
||||
#else
|
||||
asm volatile("cpuid" : "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx) : "a" (1));
|
||||
#endif
|
||||
if (edx>>25&1) flag |= SIMD_SSE;
|
||||
if (edx>>26&1) flag |= SIMD_SSE2;
|
||||
if (ecx>>0 &1) flag |= SIMD_SSE3;
|
||||
if (ecx>>19&1) flag |= SIMD_SSE4_1;
|
||||
if (ecx>>20&1) flag |= SIMD_SSE4_2;
|
||||
if (ecx>>28&1) flag |= SIMD_AVX;
|
||||
if (ebx>>5 &1) flag |= SIMD_AVX2;
|
||||
if (ebx>>16&1) flag |= SIMD_AVX512F;
|
||||
return flag;
|
||||
}
|
||||
|
||||
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
unsigned simd;
|
||||
simd = x86_simd();
|
||||
if (simd & SIMD_SSE4_1)
|
||||
ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
else if (simd & SIMD_SSE2)
|
||||
ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
|
@ -20,9 +52,11 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
unsigned simd;
|
||||
simd = x86_simd();
|
||||
if (simd & SIMD_SSE4_1)
|
||||
ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
else if (simd & SIMD_SSE2)
|
||||
ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
|
@ -34,9 +68,11 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
unsigned simd;
|
||||
simd = x86_simd();
|
||||
if (simd & SIMD_SSE4_1)
|
||||
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
else if (simd & SIMD_SSE2)
|
||||
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef KSW_SSE2_ONLY
|
||||
#undef __SSE4_1__
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,6 +6,10 @@
|
|||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef KSW_SSE2_ONLY
|
||||
#undef __SSE4_1__
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@
|
|||
#ifdef __SSE2__
|
||||
#include <emmintrin.h>
|
||||
|
||||
#ifdef KSW_SSE2_ONLY
|
||||
#undef __SSE4_1__
|
||||
#endif
|
||||
|
||||
#ifdef __SSE4_1__
|
||||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue