Merge branch 'master' into short

This commit is contained in:
Heng Li 2017-09-07 11:41:32 -04:00
commit 0031158936
9 changed files with 188 additions and 7 deletions

View File

@ -1,15 +1,16 @@
CC= gcc
CFLAGS= -g -Wall -O2 -Wc++-compat
CPPFLAGS= -DHAVE_KALLOC
INCLUDES= -I.
OBJS= kthread.o kalloc.o ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o ksw2_ll_sse.o \
misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o
INCLUDES=
OBJS= kthread.o kalloc.o misc.o bseq.o sketch.o sdust.o index.o chain.o align.o hit.o map.o format.o ksw2_ll_sse.o
PROG= minimap2
PROG_EXTRA= sdust minimap2-lite
LIBS= -lm -lz -lpthread
ifeq ($(sse2only),)
CFLAGS+=-msse4
OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
else
OBJS+=ksw2_extz2_sse.o ksw2_extd2_sse.o ksw2_exts2_sse.o
endif
.SUFFIXES:.c .o
@ -33,6 +34,27 @@ libminimap2.a:$(OBJS)
sdust:sdust.c getopt.o kalloc.o kalloc.h kdq.h kvec.h kseq.h sdust.h
$(CC) -D_SDUST_MAIN $(CFLAGS) $< getopt.o kalloc.o -o $@ -lz
ksw2_extz2_sse41.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_extz2_sse2.o:ksw2_extz2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_extd2_sse41.o:ksw2_extd2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_extd2_sse2.o:ksw2_extd2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_exts2_sse41.o:ksw2_exts2_sse.c ksw2.h kalloc.h
$(CC) -c -msse4 $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
ksw2_exts2_sse2.o:ksw2_exts2_sse.c ksw2.h kalloc.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH -DKSW_SSE2_ONLY $(INCLUDES) $< -o $@
ksw2_dispatch.o:ksw2_dispatch.c ksw2.h
$(CC) -c $(CFLAGS) $(CPPFLAGS) -DKSW_CPU_DISPATCH $(INCLUDES) $< -o $@
clean:
rm -fr gmon.out *.o a.out $(PROG) $(PROG_EXTRA) *~ *.a *.dSYM session*

23
NEWS.md
View File

@ -1,3 +1,26 @@
Release 2.1.1-r341 (6 September 2017)
-------------------------------------
This is a maintenance release that is expected to output identical alignment to
v2.1. Detailed changes include:
* Support CPU dispatch. By default, minimap2 is compiled with both SSE2 and
SSE4 based implementation of alignment and automatically chooses the right
one at runtime. This avoids unexpected errors on older CPUs (#21).
* Improved Windows support as is requested by Oxford Nanopore (#19). Minimap2
now avoids variable-length stacked arrays, eliminates alloca(), ships with
getopt_long() and provides timing functions implemented with Windows APIs.
* Fixed a potential segmentation fault when specifying -k/-w/-H with
multi-part index (#23).
* Fixed two memory leaks in example.c
(2.1.1: 6 September 2017, r341)
Release 2.1-r311 (25 August 2017)
---------------------------------

1
ksw2.h
View File

@ -169,5 +169,4 @@ static inline int ksw_apply_zdrop(ksw_extz_t *ez, int is_rot, int32_t H, int a,
}
return 0;
}
#endif

97
ksw2_dispatch.c 100644
View File

@ -0,0 +1,97 @@
#ifdef KSW_CPU_DISPATCH
#include <stdlib.h>
#include "ksw2.h"
#define SIMD_SSE 0x1
#define SIMD_SSE2 0x2
#define SIMD_SSE3 0x4
#define SIMD_SSSE3 0x8
#define SIMD_SSE4_1 0x10
#define SIMD_SSE4_2 0x20
#define SIMD_AVX 0x40
#define SIMD_AVX2 0x80
#define SIMD_AVX512F 0x100
#ifndef _MSC_VER
// adapted from https://github.com/01org/linux-sgx/blob/master/common/inc/internal/linux/cpuid_gnu.h
void __cpuidex(int cpuid[4], int func_id, int subfunc_id)
{
#if defined(__x86_64__)
asm volatile ("cpuid"
: "=a" (cpuid[0]), "=b" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
: "0" (func_id), "2" (subfunc_id));
#else // on 32bit, ebx can NOT be used as PIC code
asm volatile ("xchgl %%ebx, %1; cpuid; xchgl %%ebx, %1"
: "=a" (cpuid[0]), "=r" (cpuid[1]), "=c" (cpuid[2]), "=d" (cpuid[3])
: "0" (func_id), "2" (subfunc_id));
#endif
}
#endif
int x86_simd(void)
{
int flag = 0, cpuid[4], max_id;
__cpuidex(cpuid, 0, 0);
max_id = cpuid[0];
if (max_id == 0) return 0;
__cpuidex(cpuid, 1, 0);
if (cpuid[3]>>25&1) flag |= SIMD_SSE;
if (cpuid[3]>>26&1) flag |= SIMD_SSE2;
if (cpuid[2]>>0 &1) flag |= SIMD_SSE3;
if (cpuid[2]>>9 &1) flag |= SIMD_SSSE3;
if (cpuid[2]>>19&1) flag |= SIMD_SSE4_1;
if (cpuid[2]>>20&1) flag |= SIMD_SSE4_2;
if (cpuid[2]>>28&1) flag |= SIMD_AVX;
if (max_id >= 7) {
__cpuidex(cpuid, 7, 0);
if (cpuid[1]>>5 &1) flag |= SIMD_AVX2;
if (cpuid[1]>>16&1) flag |= SIMD_AVX512F;
}
return flag;
}
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
{
extern void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
extern void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez);
unsigned simd;
simd = x86_simd();
if (simd & SIMD_SSE4_1)
ksw_extz2_sse41(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
else if (simd & SIMD_SSE2)
ksw_extz2_sse2(km, qlen, query, tlen, target, m, mat, q, e, w, zdrop, flag, ez);
else abort();
}
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
{
extern void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
extern void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez);
unsigned simd;
simd = x86_simd();
if (simd & SIMD_SSE4_1)
ksw_extd2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
else if (simd & SIMD_SSE2)
ksw_extd2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, e2, w, zdrop, flag, ez);
else abort();
}
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
{
extern void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
extern void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez);
unsigned simd;
simd = x86_simd();
if (simd & SIMD_SSE4_1)
ksw_exts2_sse41(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
else if (simd & SIMD_SSE2)
ksw_exts2_sse2(km, qlen, query, tlen, target, m, mat, q, e, q2, noncan, zdrop, flag, ez);
else abort();
}
#endif

View File

@ -6,12 +6,26 @@
#ifdef __SSE2__
#include <emmintrin.h>
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
void ksw_extd2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
#else
void ksw_extd2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
#endif
#else
void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t e2, int w, int zdrop, int flag, ksw_extz_t *ez)
#endif // ~KSW_CPU_DISPATCH
{
#define __dp_code_block1 \
z = _mm_load_si128(&s[t]); \

View File

@ -6,12 +6,26 @@
#ifdef __SSE2__
#include <emmintrin.h>
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
void ksw_exts2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
#else
void ksw_exts2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
#endif
#else
void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat,
int8_t q, int8_t e, int8_t q2, int8_t noncan, int zdrop, int flag, ksw_extz_t *ez)
#endif // ~KSW_CPU_DISPATCH
{
#define __dp_code_block1 \
z = _mm_load_si128(&s[t]); \

View File

@ -5,11 +5,23 @@
#ifdef __SSE2__
#include <emmintrin.h>
#ifdef KSW_SSE2_ONLY
#undef __SSE4_1__
#endif
#ifdef __SSE4_1__
#include <smmintrin.h>
#endif
#ifdef KSW_CPU_DISPATCH
#ifdef __SSE4_1__
void ksw_extz2_sse41(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
#else
void ksw_extz2_sse2(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
#endif
#else
void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uint8_t *target, int8_t m, const int8_t *mat, int8_t q, int8_t e, int w, int zdrop, int flag, ksw_extz_t *ez)
#endif // ~KSW_CPU_DISPATCH
{
#define __dp_code_block1 \
z = _mm_add_epi8(_mm_load_si128(&s[t]), qe2_); \

2
main.c
View File

@ -6,7 +6,7 @@
#include "mmpriv.h"
#include "getopt.h"
#define MM_VERSION "2.1-r335-dirty"
#define MM_VERSION "2.1.1-r341"
#ifdef __linux__
#include <sys/resource.h>

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "25 August 2017" "minimap2-2.1-r311" "Bioinformatics tools"
.TH minimap2 1 "6 September 2017" "minimap2-2.1.1-r341" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences