diff --git a/public/c++/VectorPairHMM/LoadTimeInitializer.cc b/public/c++/VectorPairHMM/LoadTimeInitializer.cc index fb640ef88..9b56fa9ad 100644 --- a/public/c++/VectorPairHMM/LoadTimeInitializer.cc +++ b/public/c++/VectorPairHMM/LoadTimeInitializer.cc @@ -7,6 +7,10 @@ LoadTimeInitializer g_load_time_initializer; LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded { ConvertChar::init(); +#ifndef DISABLE_FTZ + //Very important to get good performance - enable FTZ, converts denormals to 0 + _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); +#endif m_sumNumReads = 0; m_sumSquareNumReads = 0; m_sumNumHaplotypes = 0; diff --git a/public/c++/VectorPairHMM/Makefile b/public/c++/VectorPairHMM/Makefile index 35b38e291..0b564d033 100644 --- a/public/c++/VectorPairHMM/Makefile +++ b/public/c++/VectorPairHMM/Makefile @@ -13,15 +13,18 @@ CXX=icc LDFLAGS=-lm -lrt $(OMPLDFLAGS) -#USE_PAPI=1 PAPI_DIR=/home/karthikg/softwares/papi-5.3.0 ifdef USE_PAPI ifeq ($(USE_PAPI),1) - COMMON_COMPILATION_FLAGS+=-I$(PAPI_DIR)/include + COMMON_COMPILATION_FLAGS+=-I$(PAPI_DIR)/include -DUSE_PAPI LDFLAGS+=-L$(PAPI_DIR)/lib -lpapi endif endif +ifdef DISABLE_FTZ + COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz +endif + BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker #BIN=checker diff --git a/public/c++/VectorPairHMM/baseline.cc b/public/c++/VectorPairHMM/baseline.cc index d92a21ecf..eb233d5c3 100644 --- a/public/c++/VectorPairHMM/baseline.cc +++ b/public/c++/VectorPairHMM/baseline.cc @@ -1,6 +1,7 @@ #include "headers.h" #include "template.h" -extern uint64_t exceptions_array[128]; +#include "utils.h" + template NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL) { @@ -66,18 +67,22 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL) distm = distm/3; - //feclearexcept(FE_ALL_EXCEPT); + //feclearexcept(FE_ALL_EXCEPT); M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]); - //M[r][c] = (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]); - //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); - //feclearexcept(FE_ALL_EXCEPT); + //feclearexcept(FE_ALL_EXCEPT); X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX]; - //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); - //feclearexcept(FE_ALL_EXCEPT); + //feclearexcept(FE_ALL_EXCEPT); Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY]; - //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + //STORE_FP_EXCEPTIONS(flagp, exceptions_array); + + //CONVERT_AND_PRINT(M[r][c]); + //CONVERT_AND_PRINT(X[r][c]); + //CONVERT_AND_PRINT(Y[r][c]); + } for (c = 0; c < COLS; c++) diff --git a/public/c++/VectorPairHMM/headers.h b/public/c++/VectorPairHMM/headers.h index feb687660..9e4600136 100644 --- a/public/c++/VectorPairHMM/headers.h +++ b/public/c++/VectorPairHMM/headers.h @@ -27,14 +27,19 @@ #include #include -#define STORE_FP_EXCEPTIONS(flagp, exceptions_array) \ - fegetexceptflag(&flagp, FE_OVERFLOW | FE_UNDERFLOW | FE_DIVBYZERO | FE_INVALID | __FE_DENORM); \ +extern uint64_t exceptions_array[128]; +extern FILE* g_debug_fptr; +#define STORE_FP_EXCEPTIONS(flagp, exceptions_array) \ + fegetexceptflag(&flagp, FE_ALL_EXCEPT | __FE_DENORM); \ exceptions_array[FE_INVALID] += ((flagp & FE_INVALID)); \ exceptions_array[__FE_DENORM] += ((flagp & __FE_DENORM) >> 1); \ exceptions_array[FE_DIVBYZERO] += ((flagp & FE_DIVBYZERO) >> 2); \ exceptions_array[FE_OVERFLOW] += ((flagp & FE_OVERFLOW) >> 3); \ exceptions_array[FE_UNDERFLOW] += ((flagp & FE_UNDERFLOW) >> 4); \ - feclearexcept(FE_ALL_EXCEPT); + feclearexcept(FE_ALL_EXCEPT | __FE_DENORM); +#define CONVERT_AND_PRINT(X) \ + g_converter.f = (X); \ + fwrite(&(g_converter.i),4,1,g_debug_fptr); \ #endif diff --git a/public/c++/VectorPairHMM/utils.cc b/public/c++/VectorPairHMM/utils.cc index 4625537bd..419b2d4ce 100644 --- a/public/c++/VectorPairHMM/utils.cc +++ b/public/c++/VectorPairHMM/utils.cc @@ -45,7 +45,7 @@ uint64_t get_machine_capabilities() void initialize_function_pointers(uint64_t mask) { - //mask = 0; + //mask = 0ull; if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX))) { cout << "Using AVX accelerated implementation of PairHMM\n"; @@ -287,16 +287,23 @@ uint64_t diff_time(struct timespec& prev_time) } //#define USE_PAPI +//#define COUNT_EXCEPTIONS +//#define CHECK_RESULTS +#define CHECK_UNDERFLOW 1 #ifdef USE_PAPI #include "papi.h" #define NUM_PAPI_COUNTERS 4 #endif +IF_32 g_converter; +FILE* g_debug_fptr = 0; uint64_t exceptions_array[128]; void do_compute(char* filename) { - memset(exceptions_array, 0, 128*sizeof(uint64_t)); - _MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON); + //g_debug_fptr = fopen("/mnt/app_hdd/scratch/karthikg/dump.log","w"); + //assert(g_debug_fptr); + for(unsigned i=0;i<128;++i) + exceptions_array[i] = 0ull; //assert(feenableexcept(FE_DIVBYZERO | FE_INVALID) >= 0); #ifdef USE_PAPI PAPI_num_counters(); @@ -310,11 +317,11 @@ void do_compute(char* filename) int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; //assert(PAPI_event_name_to_code("ICACHE:IFETCH_STALL",&(events[2])) == PAPI_OK); //assert(PAPI_event_name_to_code("MACHINE_CLEARS:e",&(events[3])) == PAPI_OK); - char* eventnames[NUM_PAPI_COUNTERS]= { "instructions", "cycles", "ifetch_stall", "store_misses" }; + char* eventnames[NUM_PAPI_COUNTERS]= { "instructions", "cycles", "fp_assists", "idq_ms_cycles" }; assert(PAPI_event_name_to_code("ix86arch::INSTRUCTION_RETIRED",&(events[0])) == PAPI_OK); assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES",&(events[1])) == PAPI_OK); - assert(PAPI_event_name_to_code("ICACHE:IFETCH_STALL", &(events[2])) == PAPI_OK); - assert(PAPI_event_name_to_code("perf::L1-DCACHE-STORE-MISSES", &(events[3])) == PAPI_OK); + assert(PAPI_event_name_to_code("FP_ASSIST:ANY", &(events[2])) == PAPI_OK); + assert(PAPI_event_name_to_code("IDQ:MS_UOPS_CYCLES", &(events[3])) == PAPI_OK); long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 }; @@ -353,6 +360,9 @@ void do_compute(char* filename) testcase tc_in; int break_value = 0; + uint64_t fp_single_exceptions_reexecute = 0; + uint64_t fp_single_exceptions_continue = 0; + uint64_t num_double_executions = 0; while(1) { break_value = use_old_read_testcase ? read_testcase(&tc_in, fptr) : @@ -373,13 +383,38 @@ void do_compute(char* filename) for(unsigned i=0;i