use SSE compiler flags more precisely (#127)

This commit is contained in:
Heng Li 2018-02-26 09:51:01 -05:00
parent eeb314edd6
commit a22ebb9836
2 changed files with 18 additions and 13 deletions

View File

@ -6,16 +6,16 @@ PROG= minimap2
PROG_EXTRA= sdust minimap2-lite
LIBS= -lm -lz -lpthread
ifeq ($(arm_neon),)
ifeq ($(sse2only),)
ifeq ($(arm_neon),) # if arm_neon is not defined
ifeq ($(sse2only),) # if sse2only is not defined
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
else
else # if sse2only is defined
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
endif
else
else # if arm_neon is defined
OBJS+=ksw2_extz2_neon.o ksw2_extd2_neon.o ksw2_exts2_neon.o
CFLAGS+=-D_FILE_OFFSET_BITS=64 -mfpu=neon -fsigned-char
INCLUDES+=-I sse2neon
INCLUDES+=-Isse2neon
endif
.PHONY:all extra clean depend
@ -42,26 +42,31 @@ sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
# SSE-specific targets on x86/x86_64
ifeq ($(arm_neon),) # if arm_neon is defined, compile this target with the default setting (i.e. no -msse2)
ksw2_ll_sse.o:ksw2_ll_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) -msse2 $(CPPFLAGS) $(INCLUDES) $< -o $@
endif
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse2 -mno-sse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
$(CC) -c $(CFLAGS) -msse4.1 $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
# NEON-specific targets on ARM

View File

@ -39,7 +39,7 @@ setup(
depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h',
'python/cmappy.h', 'python/cmappy.pxd'],
extra_compile_args = ['-DHAVE_KALLOC', '-msse4'], # WARNING: ancient x86_64 CPUs don't have SSE4
extra_compile_args = ['-DHAVE_KALLOC', '-msse4.1'], # WARNING: ancient x86_64 CPUs don't have SSE4
include_dirs = ['.'],
libraries = ['z', 'm', 'pthread'])],
classifiers = [