From a22ebb98367c5c13b9d93129426f0231217155c2 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Mon, 26 Feb 2018 09:51:01 -0500 Subject: [PATCH] use SSE compiler flags more precisely (#127) --- Makefile | 29 +++++++++++++++++------------ setup.py | 2 +- 2 files changed, 18 insertions(+), 13 deletions(-) diff --git a/Makefile b/Makefile index c53b52d..38533e5 100644 --- a/Makefile +++ b/Makefile @@ -6,16 +6,16 @@ PROG= minimap2 PROG_EXTRA= sdust minimap2-lite LIBS= -lm -lz -lpthread -ifeq ($(arm_neon),) -ifeq ($(sse2only),) +ifeq ($(arm_neon),) # if arm_neon is not defined +ifeq ($(sse2only),) # if sse2only is not defined OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o -else +else # if sse2only is defined OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o endif -else +else # if arm_neon is defined OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char - INCLUDES+=-I sse2neon + INCLUDES+=-Isse2neon endif .PHONY:all extra clean depend @@ -42,26 +42,31 @@ sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h # SSE-specific targets on x86/x86_64 +ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2) +ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h + $(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@ +endif + ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h - $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h - $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h - $(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@ ksw2_dispatch.o:ksw2_dispatch.c ksw2.h - $(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ + $(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@ # NEON-specific targets on ARM diff --git a/setup.py b/setup.py index cf2ea8e..0f70fe8 100644 --- a/setup.py +++ b/setup.py @@ -39,7 +39,7 @@ setup( depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h', 'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h', 'python/cmappy.h', 'python/cmappy.pxd'], - extra_compile_args = ['-DHAVE_KALLOC', '-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4 + extra_compile_args = ['-DHAVE_KALLOC', '-msse4.1'], # WARNING: ancient x86_64 CPUs don't have SSE4 include_dirs = ['.'], libraries = ['z', 'm', 'pthread'])], classifiers = [