diff --git a/PairHMM_JNI/avx_function_instantiations.cc b/PairHMM_JNI/avx_function_instantiations.cc index b236ddc8f..4118fc5cf 100644 --- a/PairHMM_JNI/avx_function_instantiations.cc +++ b/PairHMM_JNI/avx_function_instantiations.cc @@ -16,3 +16,4 @@ template double compute_full_prob_avxd(testcase* tc, double* nextlog); template float compute_full_prob_avxs(testcase* tc, float* nextlog); + diff --git a/PairHMM_JNI/define-double.h b/PairHMM_JNI/define-double.h index 79c6b323f..502b919fe 100644 --- a/PairHMM_JNI/define-double.h +++ b/PairHMM_JNI/define-double.h @@ -47,6 +47,7 @@ #undef MASK_ALL_ONES #undef COMPARE_VECS(__v1, __v2) #undef _256_INT_TYPE + #undef BITMASK_VEC #endif #define PRECISION d @@ -156,3 +157,32 @@ } \ } \ } + +class BitMaskVec_double { + + MASK_VEC low_, high_ ; + _256_TYPE combined_ ; + +public: + + inline MASK_TYPE& getLowEntry(int index) { + return low_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return high_.masks[index] ; + } + + inline const _256_TYPE& getCombinedMask() { + VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ; + + return combined_ ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(low_.vec) ; + VEC_SHIFT_LEFT_1BIT(high_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_double diff --git a/PairHMM_JNI/define-float.h b/PairHMM_JNI/define-float.h index 25ebd1489..3cc57ec38 100644 --- a/PairHMM_JNI/define-float.h +++ b/PairHMM_JNI/define-float.h @@ -47,6 +47,7 @@ #undef MASK_ALL_ONES #undef COMPARE_VECS(__v1, __v2) #undef _256_INT_TYPE + #undef BITMASK_VEC #endif #define PRECISION s @@ -157,3 +158,31 @@ } \ } +class BitMaskVec_float { + + MASK_VEC low_, high_ ; + _256_TYPE combined_ ; + +public: + + inline MASK_TYPE& getLowEntry(int index) { + return low_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return high_.masks[index] ; + } + + inline const _256_TYPE& getCombinedMask() { + VEC_SSE_TO_AVX(low_.vecf, high_.vecf, combined_) ; + + return combined_ ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(low_.vec) ; + VEC_SHIFT_LEFT_1BIT(high_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_float diff --git a/PairHMM_JNI/define-sse-double.h b/PairHMM_JNI/define-sse-double.h index e48325ba9..a30b2e5f5 100644 --- a/PairHMM_JNI/define-sse-double.h +++ b/PairHMM_JNI/define-sse-double.h @@ -47,7 +47,7 @@ #undef MASK_ALL_ONES #undef COMPARE_VECS(__v1, __v2) #undef _256_INT_TYPE - + #undef BITMASK_VEC #endif #define SSE @@ -69,7 +69,7 @@ #define HAP_TYPE __m128i #define MASK_TYPE uint64_t #define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFL -#define MASK_VEC MaskVec_D128 +#define MASK_VEC MaskVec_D #define VEC_EXTRACT_UNIT(__v1, __im) \ _mm_extract_epi64(__v1, __im) @@ -123,6 +123,31 @@ __vdst = _mm_castsi128_pd(_mm_set_epi64(__vsHigh, __vsLow)) #define VEC_SHIFT_LEFT_1BIT(__vs) \ - __vs = _mm_slli_si64(__vs, 1) + __vs = _mm_slli_epi64(__vs, 1) +class BitMaskVec_sse_double { + + MASK_VEC combined_ ; + +public: + + inline MASK_TYPE& getLowEntry(int index) { + return combined_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return combined_.masks[AVX_LENGTH/2+index] ; + } + + inline const _256_TYPE& getCombinedMask() { + return combined_.vecf ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(combined_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_sse_double + diff --git a/PairHMM_JNI/define-sse-float.h b/PairHMM_JNI/define-sse-float.h index f5758c74a..6612b28e6 100644 --- a/PairHMM_JNI/define-sse-float.h +++ b/PairHMM_JNI/define-sse-float.h @@ -47,7 +47,7 @@ #undef MASK_ALL_ONES #undef COMPARE_VECS(__v1, __v2) #undef _256_INT_TYPE - + #undef BITMASK_VEC #endif #define SSE @@ -69,7 +69,7 @@ #define HAP_TYPE UNION_TYPE #define MASK_TYPE uint32_t #define MASK_ALL_ONES 0xFFFFFFFF -#define MASK_VEC MaskVec_F128 +#define MASK_VEC MaskVec_F #define VEC_EXTRACT_UNIT(__v1, __im) \ _mm_extract_epi32(__v1, __im) @@ -123,5 +123,29 @@ __vdst = _mm_cvtpi32x2_ps(__vsLow, __vsHigh) #define VEC_SHIFT_LEFT_1BIT(__vs) \ - __vs = _mm_slli_pi32(__vs, 1) + __vs = _mm_slli_epi32(__vs, 1) +class BitMaskVec_sse_float { + + MASK_VEC combined_ ; + +public: + + inline MASK_TYPE& getLowEntry(int index) { + return combined_.masks[index] ; + } + inline MASK_TYPE& getHighEntry(int index) { + return combined_.masks[AVX_LENGTH/2+index] ; + } + + inline const _256_TYPE& getCombinedMask() { + return combined_.vecf ; + } + + inline void shift_left_1bit() { + VEC_SHIFT_LEFT_1BIT(combined_.vec) ; + } + +} ; + +#define BITMASK_VEC BitMaskVec_sse_float diff --git a/PairHMM_JNI/pairhmm-1-base.cc b/PairHMM_JNI/pairhmm-1-base.cc index 3030b640e..3941c92de 100644 --- a/PairHMM_JNI/pairhmm-1-base.cc +++ b/PairHMM_JNI/pairhmm-1-base.cc @@ -19,6 +19,12 @@ LoadTimeInitializer g_load_time_initializer; #define BATCH_SIZE 10000 #define RUN_HYBRID +double getCurrClk() { + struct timeval tv ; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; +} + int main(int argc, char** argv) { if(argc < 2) @@ -29,6 +35,10 @@ int main(int argc, char** argv) bool use_old_read_testcase = false; if(argc >= 3 && string(argv[2]) == "1") use_old_read_testcase = true; + unsigned chunk_size = 100; + if(argc >= 4) + chunk_size = strtol(argv[3],0,10); + initialize_function_pointers(); @@ -45,12 +55,24 @@ int main(int argc, char** argv) assert(ifptr.is_open()); } + vector tc_vector; + tc_vector.clear(); testcase tc; while(1) { int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true); if(break_value < 0) break; + tc_vector.push_back(tc); + } + vector results_vec; + results_vec.clear(); + results_vec.resize(tc_vector.size()); + double start_time = getCurrClk(); +#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12) + for(unsigned i=0;i(&tc); baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0)); - cout << std::scientific << baseline_result << " "< 1e-5 && rel_error > 1e-5) + cout << std::scientific << baseline_result << " "< NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIM GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, AVX_LENGTH) ; #endif // Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors - MASK_VEC currMaskVecLow ; // corresponding to lower half - MASK_VEC currMaskVecHigh ; // corresponding to upper half + + BITMASK_VEC bitMaskVec ; for (int d=1;d NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIM sumX = VEC_SET1_VAL(zero); // Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors - MASK_VEC currMaskVecLow ; // corresponding to lower half - MASK_VEC currMaskVecHigh ; // corresponding to upper half + BITMASK_VEC bitMaskVec ; for (int d=1;d; diff --git a/PairHMM_JNI/vector_defs.h b/PairHMM_JNI/vector_defs.h index c89f7f932..6574f4a76 100644 --- a/PairHMM_JNI/vector_defs.h +++ b/PairHMM_JNI/vector_defs.h @@ -17,6 +17,7 @@ #define SIMD_TYPE sse #define SIMD_TYPE_SSE + #include "define-sse-float.h" #include "vector_function_prototypes.h"