r337: support CPU dispatch for gcc-4.8+
using __builtin_cpu_supports()
This commit is contained in:
parent
101b8bb97d
commit
3c997ca016
31
Makefile
31
Makefile
|
|
@ -1,16 +1,20 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -O2 -Wc++-compat
|
||||
CPPFLAGS= -DHAVE_KALLOC
|
||||
INCLUDES= -I.
|
||||
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o ksw2_ll_sse.o \
|
||||
misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o
|
||||
INCLUDES=
|
||||
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o ksw2_ll_sse.o
|
||||
PROG= minimap2
|
||||
PROG_EXTRA= sdust minimap2-lite
|
||||
LIBS= -lm -lz -lpthread
|
||||
|
||||
ifneq ($(cpu_dispatch),)
|
||||
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
|
||||
else
|
||||
ifeq ($(sse2only),)
|
||||
CFLAGS+=-msse4
|
||||
endif
|
||||
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
|
||||
endif
|
||||
|
||||
.SUFFIXES:.c .o
|
||||
|
||||
|
|
@ -33,6 +37,27 @@ libminimap2.a:$(OBJS)
|
|||
sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
|
||||
$(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz
|
||||
|
||||
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
|
||||
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
|
||||
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
|
||||
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
|
||||
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -msse4 $< -o $@
|
||||
|
||||
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) -mno-sse4 -msse2 $< -o $@
|
||||
|
||||
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
|
||||
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
|
||||
|
||||
clean:
|
||||
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session*
|
||||
|
||||
|
|
|
|||
1
ksw2.h
1
ksw2.h
|
|
@ -169,5 +169,4 @@ static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a,
|
|||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
#ifdef KSW_CPU_DISPATCH
|
||||
#include <stdlib.h>
|
||||
#include "ksw2.h"
|
||||
|
||||
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
||||
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
|
||||
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
|
||||
{
|
||||
extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
|
||||
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
|
||||
if (__builtin_cpu_supports("sse4.1"))
|
||||
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
|
||||
else if (__builtin_cpu_supports("sse2"))
|
||||
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
|
||||
else abort();
|
||||
}
|
||||
#endif
|
||||
|
|
@ -10,8 +10,18 @@
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef KSW_CPU_DISPATCH
|
||||
#ifdef __SSE4_1__
|
||||
void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#else
|
||||
void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif
|
||||
#else
|
||||
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif // ~KSW_CPU_DISPATCH
|
||||
{
|
||||
#define __dp_code_block1 \
|
||||
z = _mm_load_si128(&s[t]); \
|
||||
|
|
|
|||
|
|
@ -10,8 +10,18 @@
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef KSW_CPU_DISPATCH
|
||||
#ifdef __SSE4_1__
|
||||
void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#else
|
||||
void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif
|
||||
#else
|
||||
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
|
||||
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif // ~KSW_CPU_DISPATCH
|
||||
{
|
||||
#define __dp_code_block1 \
|
||||
z = _mm_load_si128(&s[t]); \
|
||||
|
|
|
|||
|
|
@ -9,7 +9,15 @@
|
|||
#include <smmintrin.h>
|
||||
#endif
|
||||
|
||||
#ifdef KSW_CPU_DISPATCH
|
||||
#ifdef __SSE4_1__
|
||||
void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#else
|
||||
void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif
|
||||
#else
|
||||
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
|
||||
#endif // ~KSW_CPU_DISPATCH
|
||||
{
|
||||
#define __dp_code_block1 \
|
||||
z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \
|
||||
|
|
|
|||
Loading…
Reference in New Issue