From 7180c392af944a2bc47d842e4c9029eb9b7612fb Mon Sep 17 00:00:00 2001 From: Karthik Gururaj Date: Mon, 20 Jan 2014 08:03:42 -0800 Subject: [PATCH] 1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now. --- PairHMM_JNI/.gitignore | 3 + PairHMM_JNI/Makefile | 6 +- PairHMM_JNI/avx_function_instantiations.cc | 6 + PairHMM_JNI/avx_function_prototypes.h | 19 - PairHMM_JNI/define-double.h | 54 +-- PairHMM_JNI/define-float.h | 4 +- PairHMM_JNI/define-sse-double.h | 128 +++++ PairHMM_JNI/define-sse-float.h | 127 +++++ PairHMM_JNI/headers.h | 1 + PairHMM_JNI/hmm_mask.cc | 451 ------------------ PairHMM_JNI/libJNILoglessPairHMM.so | Bin 96945 -> 0 bytes ...e_sting_utils_pairhmm_JNILoglessPairHMM.cc | 104 ++-- ...te_sting_utils_pairhmm_JNILoglessPairHMM.h | 8 + PairHMM_JNI/pairhmm-1-base.cc | 82 +--- PairHMM_JNI/pairhmm-template-kernel.cc | 161 ++++--- PairHMM_JNI/pairhmm-template-main.cc | 74 ++- PairHMM_JNI/run.sh | 7 +- PairHMM_JNI/shift_template.c | 38 +- PairHMM_JNI/sse_function_instantiations.cc | 18 + PairHMM_JNI/template.h | 47 +- PairHMM_JNI/utils.cc | 35 +- PairHMM_JNI/utils.h | 19 +- PairHMM_JNI/vector_defs.h | 26 + PairHMM_JNI/vector_function_prototypes.h | 19 + .../haplotypecaller/HaplotypeCaller.java | 2 +- .../LikelihoodCalculationEngine.java | 2 + .../PairHMMLikelihoodCalculationEngine.java | 2 + .../utils/pairhmm/JNILoglessPairHMM.java | 38 +- .../sting/utils/pairhmm/PairHMM.java | 3 + 29 files changed, 708 insertions(+), 776 deletions(-) delete mode 100644 PairHMM_JNI/avx_function_prototypes.h create mode 100644 PairHMM_JNI/define-sse-double.h create mode 100644 PairHMM_JNI/define-sse-float.h delete mode 100644 PairHMM_JNI/hmm_mask.cc delete mode 100755 PairHMM_JNI/libJNILoglessPairHMM.so create mode 100644 PairHMM_JNI/sse_function_instantiations.cc create mode 100644 PairHMM_JNI/vector_defs.h create mode 100644 PairHMM_JNI/vector_function_prototypes.h diff --git a/PairHMM_JNI/.gitignore b/PairHMM_JNI/.gitignore index 9722ad970..a68330124 100644 --- a/PairHMM_JNI/.gitignore +++ b/PairHMM_JNI/.gitignore @@ -8,3 +8,6 @@ pairhmm-template-main *.swp checker reformat +subdir_checkout.sh +avx/ +sse/ diff --git a/PairHMM_JNI/Makefile b/PairHMM_JNI/Makefile index 62dc3b0f6..b86958a6c 100644 --- a/PairHMM_JNI/Makefile +++ b/PairHMM_JNI/Makefile @@ -1,4 +1,4 @@ -OMPCFLAGS=-fopenmp +#OMPCFLAGS=-fopenmp #OMPLDFLAGS=-lgomp #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas @@ -20,7 +20,7 @@ DEPDIR=.deps DF=$(DEPDIR)/$(*).d #Common across libJNI and sandbox -COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc +COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc #Part of libJNI LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES) SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc @@ -33,7 +33,7 @@ NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pa #Use -xAVX for these files AVX_SOURCES=avx_function_instantiations.cc #Use -xSSE4.2 for these files -SSE_SOURCES= +SSE_SOURCES=sse_function_instantiations.cc NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o) AVX_OBJECTS=$(AVX_SOURCES:.cc=.o) diff --git a/PairHMM_JNI/avx_function_instantiations.cc b/PairHMM_JNI/avx_function_instantiations.cc index c29370561..b236ddc8f 100644 --- a/PairHMM_JNI/avx_function_instantiations.cc +++ b/PairHMM_JNI/avx_function_instantiations.cc @@ -1,5 +1,11 @@ #include "template.h" +#undef SIMD_TYPE +#undef SIMD_TYPE_SSE + +#define SIMD_TYPE avx +#define SIMD_TYPE_AVX + #include "define-float.h" #include "shift_template.c" #include "pairhmm-template-kernel.cc" diff --git a/PairHMM_JNI/avx_function_prototypes.h b/PairHMM_JNI/avx_function_prototypes.h deleted file mode 100644 index 1836a1c37..000000000 --- a/PairHMM_JNI/avx_function_prototypes.h +++ /dev/null @@ -1,19 +0,0 @@ -void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut); -void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn); -void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]); -void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess); -void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt); -void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen); -template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D); -template void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)( - int stripIdx, Context ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY, - _256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM , - _256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, - UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM); -_256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256, - _256_TYPE distm, _256_TYPE _1_distm); -void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, - UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, - _256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel); -template NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL); - diff --git a/PairHMM_JNI/define-double.h b/PairHMM_JNI/define-double.h index 5d31741d6..79c6b323f 100644 --- a/PairHMM_JNI/define-double.h +++ b/PairHMM_JNI/define-double.h @@ -18,34 +18,34 @@ #undef MASK_TYPE #undef MASK_ALL_ONES - #undef SET_VEC_ZERO - #undef VEC_OR - #undef VEC_ADD - #undef VEC_SUB - #undef VEC_MUL - #undef VEC_DIV - #undef VEC_BLEND - #undef VEC_BLENDV - #undef VEC_CAST_256_128 - #undef VEC_EXTRACT_128 - #undef VEC_EXTRACT_UNIT - #undef VEC_SET1_VAL128 - #undef VEC_MOVE - #undef VEC_CAST_128_256 - #undef VEC_INSERT_VAL - #undef VEC_CVT_128_256 - #undef VEC_SET1_VAL - #undef VEC_POPCVT_CHAR - #undef VEC_LDPOPCVT_CHAR - #undef VEC_CMP_EQ - #undef VEC_SET_LSE - #undef SHIFT_HAP - #undef print256b + #undef SET_VEC_ZERO(__vec) + #undef VEC_OR(__v1, __v2) + #undef VEC_ADD(__v1, __v2) + #undef VEC_SUB(__v1, __v2) + #undef VEC_MUL(__v1, __v2) + #undef VEC_DIV(__v1, __v2) + #undef VEC_BLEND(__v1, __v2, __mask) + #undef VEC_BLENDV(__v1, __v2, __maskV) + #undef VEC_CAST_256_128(__v1) + #undef VEC_EXTRACT_128(__v1, __im) + #undef VEC_EXTRACT_UNIT(__v1, __im) + #undef VEC_SET1_VAL128(__val) + #undef VEC_MOVE(__v1, __val) + #undef VEC_CAST_128_256(__v1) + #undef VEC_INSERT_VAL(__v1, __val, __pos) + #undef VEC_CVT_128_256(__v1) + #undef VEC_SET1_VAL(__val) + #undef VEC_POPCVT_CHAR(__ch) + #undef VEC_LDPOPCVT_CHAR(__addr) + #undef VEC_CMP_EQ(__v1, __v2) + #undef VEC_SET_LSE(__val) + #undef SHIFT_HAP(__v1, __val) + #undef print256b(__v1) #undef MASK_VEC - #undef VEC_SSE_TO_AVX - #undef VEC_SHIFT_LEFT_1BIT + #undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) + #undef VEC_SHIFT_LEFT_1BIT(__vs) #undef MASK_ALL_ONES - #undef COMPARE_VECS + #undef COMPARE_VECS(__v1, __v2) #undef _256_INT_TYPE #endif @@ -83,7 +83,7 @@ #define VEC_MUL(__v1, __v2) \ _mm256_mul_pd(__v1, __v2) -#define VEC_DIV(__v1, __v2) \ +#define VEC_DIV(__v1, __v2) \ _mm256_div_pd(__v1, __v2) #define VEC_BLEND(__v1, __v2, __mask) \ diff --git a/PairHMM_JNI/define-float.h b/PairHMM_JNI/define-float.h index 0e2822b2d..25ebd1489 100644 --- a/PairHMM_JNI/define-float.h +++ b/PairHMM_JNI/define-float.h @@ -84,7 +84,7 @@ #define VEC_MUL(__v1, __v2) \ _mm256_mul_ps(__v1, __v2) -#define VEC_DIV(__v1, __v2) \ +#define VEC_DIV(__v1, __v2) \ _mm256_div_ps(__v1, __v2) #define VEC_BLEND(__v1, __v2, __mask) \ @@ -133,7 +133,7 @@ _mm256_set_ps(zero, zero, zero, zero, zero, zero, zero, __val); #define SHIFT_HAP(__v1, __val) \ - _vector_shift_lasts(__v1, __val.f); + _vector_shift_lastavxs(__v1, __val.f); #define print256b(__v1) \ print256bFP(__v1) diff --git a/PairHMM_JNI/define-sse-double.h b/PairHMM_JNI/define-sse-double.h new file mode 100644 index 000000000..e48325ba9 --- /dev/null +++ b/PairHMM_JNI/define-sse-double.h @@ -0,0 +1,128 @@ +#ifdef PRECISION + #undef PRECISION + #undef MAIN_TYPE + #undef MAIN_TYPE_SIZE + #undef UNION_TYPE + #undef IF_128 + #undef IF_MAIN_TYPE + #undef SHIFT_CONST1 + #undef SHIFT_CONST2 + #undef SHIFT_CONST3 + #undef _128_TYPE + #undef _256_TYPE + #undef AVX_LENGTH + #undef MAVX_COUNT + #undef HAP_TYPE + #undef MASK_TYPE + #undef MASK_ALL_ONES + + #undef VEC_EXTRACT_UNIT(__v1, __im) + #undef VEC_INSERT_UNIT(__v1,__ins,__im) + #undef SET_VEC_ZERO(__vec) + #undef VEC_OR(__v1, __v2) + #undef VEC_ADD(__v1, __v2) + #undef VEC_SUB(__v1, __v2) + #undef VEC_MUL(__v1, __v2) + #undef VEC_DIV(__v1, __v2) + #undef VEC_BLEND(__v1, __v2, __mask) + #undef VEC_BLENDV(__v1, __v2, __maskV) + #undef VEC_CAST_256_128(__v1) + #undef VEC_EXTRACT_128(__v1, __im) + #undef VEC_EXTRACT_UNIT(__v1, __im) + #undef VEC_SET1_VAL128(__val) + #undef VEC_MOVE(__v1, __val) + #undef VEC_CAST_128_256(__v1) + #undef VEC_INSERT_VAL(__v1, __val, __pos) + #undef VEC_CVT_128_256(__v1) + #undef VEC_SET1_VAL(__val) + #undef VEC_POPCVT_CHAR(__ch) + #undef VEC_LDPOPCVT_CHAR(__addr) + #undef VEC_CMP_EQ(__v1, __v2) + #undef VEC_SET_LSE(__val) + #undef SHIFT_HAP(__v1, __val) + #undef print256b(__v1) + #undef MASK_VEC + #undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) + #undef VEC_SHIFT_LEFT_1BIT(__vs) + #undef MASK_ALL_ONES + #undef COMPARE_VECS(__v1, __v2) + #undef _256_INT_TYPE + +#endif + +#define SSE +#define PRECISION d + +#define MAIN_TYPE double +#define MAIN_TYPE_SIZE 64 +#define UNION_TYPE mix_D128 +#define IF_128 IF_128d +#define IF_MAIN_TYPE IF_64 +#define SHIFT_CONST1 1 +#define SHIFT_CONST2 8 +#define SHIFT_CONST3 0 +#define _128_TYPE __m128d +#define _256_TYPE __m128d +#define _256_INT_TYPE __m128i +#define AVX_LENGTH 2 +#define MAVX_COUNT (MROWS+3)/AVX_LENGTH +#define HAP_TYPE __m128i +#define MASK_TYPE uint64_t +#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFL +#define MASK_VEC MaskVec_D128 + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi64(__v1, __im) + +#define VEC_INSERT_UNIT(__v1,__ins,__im) \ + _mm_insert_epi64(__v1,__ins,__im) + +#define VEC_OR(__v1, __v2) \ + _mm_or_pd(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm_add_pd(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm_sub_pd(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm_mul_pd(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm_div_pd(__v1, __v2) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm_cmpeq_pd(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm_blend_pd(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm_blendv_pd(__v1, __v2, __maskV) + +#define SHIFT_HAP(__v1, __val) \ + __v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0) + +#define VEC_CVT_128_256(__v1) \ + _mm_cvtepi32_pd(__v1) + +#define VEC_SET1_VAL(__val) \ + _mm_set1_pd(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm_cvtepi32_pd(_mm_set1_epi32(__ch)) + +#define VEC_SET_LSE(__val) \ + _mm_set_pd(zero, __val); + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm_cvtepi32_pd(_mm_loadu_si128((__m128i const *)__addr)) + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm_castsi128_pd(_mm_set_epi64(__vsHigh, __vsLow)) + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_si64(__vs, 1) + + diff --git a/PairHMM_JNI/define-sse-float.h b/PairHMM_JNI/define-sse-float.h new file mode 100644 index 000000000..f5758c74a --- /dev/null +++ b/PairHMM_JNI/define-sse-float.h @@ -0,0 +1,127 @@ +#ifdef PRECISION + #undef PRECISION + #undef MAIN_TYPE + #undef MAIN_TYPE_SIZE + #undef UNION_TYPE + #undef IF_128 + #undef IF_MAIN_TYPE + #undef SHIFT_CONST1 + #undef SHIFT_CONST2 + #undef SHIFT_CONST3 + #undef _128_TYPE + #undef _256_TYPE + #undef AVX_LENGTH + #undef MAVX_COUNT + #undef HAP_TYPE + #undef MASK_TYPE + #undef MASK_ALL_ONES + + #undef VEC_EXTRACT_UNIT(__v1, __im) + #undef VEC_INSERT_UNIT(__v1,__ins,__im) + #undef SET_VEC_ZERO(__vec) + #undef VEC_OR(__v1, __v2) + #undef VEC_ADD(__v1, __v2) + #undef VEC_SUB(__v1, __v2) + #undef VEC_MUL(__v1, __v2) + #undef VEC_DIV(__v1, __v2) + #undef VEC_BLEND(__v1, __v2, __mask) + #undef VEC_BLENDV(__v1, __v2, __maskV) + #undef VEC_CAST_256_128(__v1) + #undef VEC_EXTRACT_128(__v1, __im) + #undef VEC_EXTRACT_UNIT(__v1, __im) + #undef VEC_SET1_VAL128(__val) + #undef VEC_MOVE(__v1, __val) + #undef VEC_CAST_128_256(__v1) + #undef VEC_INSERT_VAL(__v1, __val, __pos) + #undef VEC_CVT_128_256(__v1) + #undef VEC_SET1_VAL(__val) + #undef VEC_POPCVT_CHAR(__ch) + #undef VEC_LDPOPCVT_CHAR(__addr) + #undef VEC_CMP_EQ(__v1, __v2) + #undef VEC_SET_LSE(__val) + #undef SHIFT_HAP(__v1, __val) + #undef print256b(__v1) + #undef MASK_VEC + #undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) + #undef VEC_SHIFT_LEFT_1BIT(__vs) + #undef MASK_ALL_ONES + #undef COMPARE_VECS(__v1, __v2) + #undef _256_INT_TYPE + +#endif + +#define SSE +#define PRECISION s + +#define MAIN_TYPE float +#define MAIN_TYPE_SIZE 32 +#define UNION_TYPE mix_F128 +#define IF_128 IF_128f +#define IF_MAIN_TYPE IF_32 +#define SHIFT_CONST1 3 +#define SHIFT_CONST2 4 +#define SHIFT_CONST3 0 +#define _128_TYPE __m128 +#define _256_TYPE __m128 +#define _256_INT_TYPE __m128i +#define AVX_LENGTH 4 +#define MAVX_COUNT (MROWS+3)/AVX_LENGTH +#define HAP_TYPE UNION_TYPE +#define MASK_TYPE uint32_t +#define MASK_ALL_ONES 0xFFFFFFFF +#define MASK_VEC MaskVec_F128 + +#define VEC_EXTRACT_UNIT(__v1, __im) \ + _mm_extract_epi32(__v1, __im) + +#define VEC_INSERT_UNIT(__v1,__ins,__im) \ + _mm_insert_epi32(__v1,__ins,__im) + +#define VEC_OR(__v1, __v2) \ + _mm_or_ps(__v1, __v2) + +#define VEC_ADD(__v1, __v2) \ + _mm_add_ps(__v1, __v2) + +#define VEC_SUB(__v1, __v2) \ + _mm_sub_ps(__v1, __v2) + +#define VEC_MUL(__v1, __v2) \ + _mm_mul_ps(__v1, __v2) + +#define VEC_DIV(__v1, __v2) \ + _mm_div_ps(__v1, __v2) + +#define VEC_CMP_EQ(__v1, __v2) \ + _mm_cmpeq_ps(__v1, __v2) + +#define VEC_BLEND(__v1, __v2, __mask) \ + _mm_blend_ps(__v1, __v2, __mask) + +#define VEC_BLENDV(__v1, __v2, __maskV) \ + _mm_blendv_ps(__v1, __v2, __maskV) + +#define SHIFT_HAP(__v1, __val) \ + _vector_shift_lastsses(__v1, __val.f) + +#define VEC_CVT_128_256(__v1) \ + _mm_cvtepi32_ps(__v1.i) + +#define VEC_SET1_VAL(__val) \ + _mm_set1_ps(__val) + +#define VEC_POPCVT_CHAR(__ch) \ + _mm_cvtepi32_ps(_mm_set1_epi32(__ch)) + +#define VEC_SET_LSE(__val) \ + _mm_set_ps(zero, zero, zero, __val); + +#define VEC_LDPOPCVT_CHAR(__addr) \ + _mm_cvtepi32_ps(_mm_loadu_si128((__m128i const *)__addr)) + +#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ + __vdst = _mm_cvtpi32x2_ps(__vsLow, __vsHigh) + +#define VEC_SHIFT_LEFT_1BIT(__vs) \ + __vs = _mm_slli_pi32(__vs, 1) + diff --git a/PairHMM_JNI/headers.h b/PairHMM_JNI/headers.h index 42485fc51..f502ff5ce 100644 --- a/PairHMM_JNI/headers.h +++ b/PairHMM_JNI/headers.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/PairHMM_JNI/hmm_mask.cc b/PairHMM_JNI/hmm_mask.cc deleted file mode 100644 index c335a0160..000000000 --- a/PairHMM_JNI/hmm_mask.cc +++ /dev/null @@ -1,451 +0,0 @@ -#include -#include -#include -#include - - -// /usr/intel/pkgs/icc/13.0.0e/bin/icc -o ed -O3 ed.cpp -xAVX -openmp -openmp-link static - - -#include - -#include -#include -#include -#include -#include -#include - - -#include -#include -#include -#include -#include - - -#include -#include -#include -#include -#include -#include -#include "template.h" - -using namespace std ; - -#define CHECK_MASK_CORRECTNESS - -template -string getBinaryStr (T val, int numBitsToWrite) { - - ostringstream oss ; - uint64_t mask = ((T) 0x1) << (numBitsToWrite-1) ; - for (int i=numBitsToWrite-1; i >= 0; --i) { - oss << ((val & mask) >> i) ; - mask >>= 1 ; - } - return oss.str() ; -} - -int normalize(char c) -{ - return ((int) (c - 33)); -} - - -uint8_t ConvertChar::conversionTable[255] ; - -int read_testcase(testcase *tc, FILE* ifp) -{ - char *q, *i, *d, *c, *line = NULL; - int _q, _i, _d, _c; - int x, size = 0; - ssize_t read; - - read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp); - if (read == -1) - return -1; - - - tc->hap = (char *) malloc(size); - tc->rs = (char *) malloc(size); - q = (char *) malloc(size); - i = (char *) malloc(size); - d = (char *) malloc(size); - c = (char *) malloc(size); - - if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6) - return -1; - - tc->haplen = strlen(tc->hap); - tc->rslen = strlen(tc->rs); - assert(tc->rslen < MROWS); - tc->ihap = (int *) malloc(tc->haplen*sizeof(int)); - tc->irs = (int *) malloc(tc->rslen*sizeof(int)); - - //tc->q = (int *) malloc(sizeof(int) * tc->rslen); - //tc->i = (int *) malloc(sizeof(int) * tc->rslen); - //tc->d = (int *) malloc(sizeof(int) * tc->rslen); - //tc->c = (int *) malloc(sizeof(int) * tc->rslen); - - for (x = 0; x < tc->rslen; x++) - { - _q = normalize(q[x]); - _i = normalize(i[x]); - _d = normalize(d[x]); - _c = normalize(c[x]); - tc->q[x] = (_q < 6) ? 6 : _q; - tc->i[x] = _i; - tc->d[x] = _d; - tc->c[x] = _c; - tc->irs[x] = tc->rs[x]; - } - for (x = 0; x < tc->haplen; x++) - tc->ihap[x] = tc->hap[x]; - - - free(q); - free(i); - free(d); - free(c); - free(line); - - return 0; -} - - - - -#define ALIGN __attribute__ ((aligned(32))) - - -typedef union ALIGN { - //__m256i vi; - __m128 vf ; - __m128i vi ; - __m128d vd; - - uint8_t b[16]; - //uint16_t hw[8] ; - uint32_t w[4] ; - uint64_t dw[2] ; - - float f[4] ; - //double d[2] ; - -} v128 ; - -typedef union ALIGN { - __m256i vi; - __m256 vf ; - __m256d vd; - //__m128i vi128[2] ; - - uint8_t b[32]; - //uint16_t hw[16] ; - uint32_t w[8] ; - uint64_t dw[4] ; - - float f[8] ; - double d[4] ; - -} v256 ; - - -#define NUM_DISTINCT_CHARS 5 -#define AMBIG_CHAR 4 - -#define VEC_ENTRY_CNT 8 -#define VEC_LEN 256 -#define VTYPE vf -#define VTYPEI vi -#define VENTRY f -#define VENTRYI w -#define MTYPE uint32_t -#define MASK_ALL_ONES 0xFFFFFFFF - - -#define VECTOR v256 -#define VECTOR_SSE v128 - -#define SET_VEC_ZERO(__vec) \ - __vec= _mm256_setzero_ps() - -#define SET_VEC_ONES(__vec) \ - __vec = _mm256_set1_epi32(0xFFFFFFFF) - -#define VEC_OR(__v1, __v2) \ - _mm256_or_ps(__v1, __v2) - -#define VEC_ADD(__v1, __v2) \ - _mm256_add_ps(__v1, __v2) - -#define VEC_MUL(__v1, __v2) \ - _mm256_mul_ps(__v1, __v2) - -#define VEC_BLENDV(__v1, __v2, __maskV) \ - _mm256_blendv_ps(__v1, __v2, __maskV) - -#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \ - __vdst = _mm256_castps128_ps256(__vsLow) ; \ - __vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ; - -#define VECS_SHIFT_LEFT_1BIT(__vs) \ - __vs = _mm_slli_epi32(__vs, 1) - - -#define VECS_SHIFT_RIGHT_WHOLE(__vs, __cnt) { \ - uint64_t __mask = ; \ - uint64_t __shiftWord = ((((uint64_t) 0x1) << __cnt) - 1 ) & __vs.dw[1] ; \ - __shiftWord <<= (64-cnt) ; \ - __vs = _mm_slri_epi64(__vs, __cnt) ; \ - __vs.dw[0] |= __shiftWord ; \ - } - - -#define GET_MASK_WORD_OLD(__mask, __lastShiftOut, __shiftBy, __maskBitCnt){ \ - MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \ - MTYPE __nextShiftOut = (__mask & __bitMask) << (__maskBitCnt - __shiftBy) ; \ - __mask >>= __shiftBy ; \ - __mask |= __lastShiftOut ; \ - __lastShiftOut = __nextShiftOut ; \ - } - - -#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \ - MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \ - MTYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \ - __dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \ - __lastShiftOut = __nextShiftOut ; \ -} - - -void precompute_masks_avx(const testcase& tc, int COLS, int numMaskVecs, - MTYPE (*maskArr)[NUM_DISTINCT_CHARS]) { - - const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ; - - for (int vi=0; vi < numMaskVecs; ++vi) { - for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) { - maskArr[vi][rs] = 0 ; - } - maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ; - } - - for (int col=1; col < COLS; ++col) { - int mIndex = (col-1) / maskBitCnt ; - int mOffset = (col-1) % maskBitCnt ; - MTYPE bitMask = ((MTYPE)0x1) << (maskBitCnt-1-mOffset) ; - - char hapChar = tc.hap[col-1] ; - - if (hapChar == AMBIG_CHAR) { - maskArr[mIndex][0] |= bitMask ; - maskArr[mIndex][1] |= bitMask ; - maskArr[mIndex][2] |= bitMask ; - maskArr[mIndex][3] |= bitMask ; - } - - //cout << hapChar << " " << mIndex << " " << getBinaryStr(bitMask, 32) - // << endl ; - //cout << getBinaryStr(maskArr[0][hapChar],32) << endl ; - //exit(0) ; - - maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ; - - - // bit corresponding to col 1 will be the MSB of the mask 0 - // bit corresponding to col 2 will be the MSB-1 of the mask 0 - // ... - // bit corresponding to col 32 will be the LSB of the mask 0 - // bit corresponding to col 33 will be the MSB of the mask 1 - // ... - } - -} - - -void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) { - - int ROWS = tc.rslen + 1 ; - int COLS = tc.haplen + 1 ; - - // only for testing - VECTOR mismatchData, matchData ; - //SET_VEC_ZERO(mismatchData.VTYPE) ; - //SET_VEC_ONES(matchData.VTYPEI) ; - for (int ei=0; ei < VEC_ENTRY_CNT; ++ei) { - matchData.VENTRY[ei] = 1.0 ; - mismatchData.VENTRY[ei] = 0.0 ; - } - - const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ; - const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function - - MTYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ; - precompute_masks_avx(tc, COLS, numMaskVecs, maskArr) ; - -#ifdef DEBUG - if (printDebug) { - cout << "The first 32 hap chars are: " ; - for (int i=0; i < 32; ++i) { - cout << tc.hap[i] ; - } - cout << endl ; - - cout << "Masks computed for A, C, T, G, N are: " << endl ; - cout << getBinaryStr(maskArr[0][0], 32) << endl ; - cout << getBinaryStr(maskArr[0][1], 32) << endl ; - cout << getBinaryStr(maskArr[0][2], 32) << endl ; - cout << getBinaryStr(maskArr[0][3], 32) << endl ; - cout << getBinaryStr(maskArr[0][4], 32) << endl ; - } -#endif // #ifdef DEBUG - - int beginRowIndex = 1 ; - while (beginRowIndex < ROWS) { - - int numRowsToProcess = min(VEC_ENTRY_CNT, ROWS - beginRowIndex) ; - - char rsArr[VEC_ENTRY_CNT] ; - for (int ri=0; ri < numRowsToProcess; ++ri) { - rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ; - } - - // Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors - VECTOR_SSE currMaskVecLow ; // corresponding to entries 0-3 - VECTOR_SSE currMaskVecHigh ; // corresponding to entries 4-7 - - MTYPE lastMaskShiftOut[VEC_ENTRY_CNT] ; - for (int ei=0; ei < VEC_ENTRY_CNT; ++ei) - lastMaskShiftOut[ei] = 0 ; - - int col = 1 ; - int diag = 1 ; - for (int maskIndex=0; maskIndex < numMaskVecs; ++maskIndex) { - // set up the mask vectors for the next maskBitCnt columns - - // For AVX, maskBitCnt = 32 (so, the operation below is amortized over 32 cols) - for (int ei=0; ei < VEC_ENTRY_CNT/2; ++ei) { - SET_MASK_WORD(currMaskVecLow.VENTRYI[ei], maskArr[maskIndex][rsArr[ei]], - lastMaskShiftOut[ei], ei, maskBitCnt) ; - - int ei2 = ei + VEC_ENTRY_CNT/2 ; // the second entry index - SET_MASK_WORD(currMaskVecHigh.VENTRYI[ei], maskArr[maskIndex][rsArr[ei2]], - lastMaskShiftOut[ei2], ei2, maskBitCnt) ; - } - -#ifdef DEBUG - if (printDebug && maskIndex == 0) { - cout << "The masks for entry 1: " << endl - << getBinaryStr(maskArr[0][rsArr[1]], 32) << endl - << getBinaryStr(currMaskVecLow.VENTRYI[1], 32) << endl ; - - } -#endif // #ifdef DEBUG - - // iterate over mask bit indices and columns - for (int mbi=0; mbi < maskBitCnt && diag < COLS + ROWS -2; ++mbi, ++diag) { - - VECTOR maskV ; - VEC_SSE_TO_AVX(currMaskVecLow.VTYPE, currMaskVecHigh.VTYPE, maskV.VTYPE) ; - - VECTOR testData ; - testData.VTYPE = VEC_BLENDV(mismatchData.VTYPE, matchData.VTYPE, - maskV.VTYPE) ; - - VECS_SHIFT_LEFT_1BIT(currMaskVecLow.VTYPEI) ; - VECS_SHIFT_LEFT_1BIT(currMaskVecHigh.VTYPEI) ; - -#ifdef DEBUG - if (printDebug && maskIndex == 0) { - cout << "The mask for entry 1, mbi=" << mbi << ": " - << getBinaryStr(maskV.VENTRYI[1], 32) << endl ; - - } -#endif // #ifdef DEBUG - -#ifdef CHECK_MASK_CORRECTNESS - - int firstRowIndex = (diag < COLS) ? 0 : (diag - COLS) ; - int lastRowIndex = min(col-1, numRowsToProcess-1) ; - - for (int ri=firstRowIndex; ri <= lastRowIndex; ++ri) { - int currRow = beginRowIndex + ri ; - int currCol = col - ri + firstRowIndex ; - - char hapChar = tc.hap[currCol-1] ; - char rsChar = tc.rs[currRow-1] ; - - bool match = (hapChar == rsChar || hapChar == 'N' || rsChar == 'N') ; - - if ((bool) testData.VENTRYI[ri] != match) { - cout << "Error: Incorrect mask for tc " << tcID << ", diag = " << diag - << " (" << currRow << ", " << currCol << ")" << endl ; - - cout << "The chars are: " << hapChar << " and " << rsChar << endl ; - cout << "The selected value is: " << testData.VENTRYI[ri] << endl ; - - exit(0) ; - } - } -#endif // #ifdef CHECK_MASK_CORRECTNESS - - if (diag < COLS) - ++col ; - - } // mbi - } // maskIndex - - beginRowIndex += VEC_ENTRY_CNT ; - } // end of stripe - - //cout << "Finished validating entry " << endl ; -} - -#ifdef HMM_MASK_MAIN -int main () { - - #define BATCH_SIZE 10000 - - ConvertChar::init() ; - - testcase* tcBatch = new testcase[BATCH_SIZE] ; - - int numBatches = 0 ; - int numRead = 0 ; - - const int DEBUG_TC = -1 ; - - FILE* ifp = stdin ; - - do { - numRead = 0 ; - - while (numRead < BATCH_SIZE) { - if (read_testcase(tcBatch+numRead, ifp) < 0) - break ; - - ++numRead ; - } - - if (numRead == 0) - break ; - - for (int ti=0; ti < numRead; ++ti) { - - int tcID = numBatches * BATCH_SIZE + ti ; - test_mask_computations(tcBatch[ti], tcID, tcID == DEBUG_TC) ; - } - - ++numBatches ; - } while (numRead == BATCH_SIZE) ; - - fclose(ifp) ; - - delete[] tcBatch ; - - return 0 ; -} -#endif diff --git a/PairHMM_JNI/libJNILoglessPairHMM.so b/PairHMM_JNI/libJNILoglessPairHMM.so deleted file mode 100755 index 9b6b8e6930e9764b187f019979a525dd50840252..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 96945 zcmb@v3t$x0^*_F014acmSfEj{tnF$eQA`Mt$ATu1$V`}p6oOKXE+GjL$jfBILlJ=_ z$aGmtEn2nIqF?&0_3>@0*e_CQ6G(We5I_{cN5I!CQHfX}fRO*^bMMS%5|g(6zrWGR z%sJ=YbI(2Z+;h);%<@o%c3ZEgD2H`99k)0L&73GPsV2cl>`0U980F~WNOk%IauNOe(TWIE6Xy&t?ZYIEG zJ!2&3h{lumYAMclo*pxu^<-LJS)T^-#d|L^W0z-W+a-)SXpV#3z)O^3b3+;9r zJO=O6ao>sCjhilteb$HDx-K_gCRqqPC)nQk`g7br!<~-XbR&*|c<=5i1yaSm5O)^t zOx*dni*cvoz6*CU?ygrE5KD3Y*hUzQ_qjIMdcDPb`30Uq+|?%hD?B~8vvEI!dopfb zMYwOmJrnn8+{1C(uV(?Qu_3&+_wHVu=<#K@cBWi_L;A7fQb#J8;i9g2{SXaHeCz|J(jA|87-V<&!oKwJS2&j5bd6xpF4+kG z&-$h+5)n&hge%X|!&V8$M$6}Amy!Raj&*ZPynHyeK`y~RQSwLi#Wlhy+ojnwyjH-x zTEL&}wbYPst$a92(O#n6%Ot{T$3PEydjK7W>DR6AoPIsZp*#+=zNWYisM+C`Njv?e zpXm-?hI)6W&ol_u9sa6Ir94g5lD%0CM~T^vWk)1F)^v*Zh;~G#MkbcGVsK0@xwjrL0T4 z427d7zAhjOtTW5)eX+#1R!D@^?)y<+rq41JY2km_gZ=-ehyM6{5BS@Kotr|~9#_M! zbXTuOd(gv&u*2@s*P89yx>_=?GyB)(9`trC@T~9Zn$CLo%IrsVg%aOn%KDV4XLlDp zPlEsKPM*8W{<73e?>6~d=s^#E0h8|dtnWeo8%+MOUF7+whx9wl{*v0IzFW{==x4HK zO98XYe(_MW)K9JHPb_;FZQ`5Ej-G1P>x`)%M^|~^2)e7+f*$(I3q9CdiRlk(&2lYy zc7wUF+h?U9wJ48Qs%a0+Aql(X!%^FVT`|V`v6;sH*kqPl*QMOd9{l7Pv)ouSpv*!) z=k-_7?(m;mB%Er>IRoioLf^T{%>LWdMgOm(zWovLzBud}kq6Bot`t-MiKhNjP5k3M z=<@~WpMHB~mvQ7bkc;|s2PFfy$>*0n)b|wV-Lc0?KlhiXTMwhj7(?MHwpGB^ke$%b&~8@)tw=a z)Q8tX2-%&S=X;R=AouOSDWQpcJgr#e10SRn3cQZdC=7V>Mr_k2gB~@uk+}zp00MEfaQPCbB(>!Mr>E8D{*G7?z2f(B8a$--F-V z)kD4R>Y-n3HQVbs({D)bSSt~BXh9v%znlGiW|_pTHsjIz4b!5-Qi2kc%ZdJ^0nmIDaKj!yDsC)B-FP% zK8t!NcW4joYnXa#GVQQT7Vj8u>djgdvE=y}{(=3pX}wIi)J$L7L;s?AbSLK}NKZbF z6Ouub$%oTM;u95#u*yy7!Tw`=@H4*?=`rtH-I>oLJ?P=I>EGNtWcoV!a0H=G%9B+i z5veBrsvi8(xE|VNLl1WT&mQ{Ao*vR~Htl3x7e7NiUEa;SJG}>a`t^W!`2F`5mzMY| z0=eY@zu)06EGY~){Il@@DQ{seUvrBJ?=NupGw1uK7TjA{5hy61P?TFyQBdLV-NXDNkyPAP#Guy-@=l6{gr{jq6&XmZejVH;$na1WK}D@ zx2T|^A`2MB=kw1kDV$b0YeH#p8S+dm$j#RZ=M@wc&M7U;pIA7*phU?nD=G~vDl0&~ zq(qcDe0ZKr?4Mm(ROBx!FP-Jjoxd<&&Ckg8XN?LJR0Q&JD+;ovCHfsj`2`EhkZ1C= zTs1FaLQ=+j0aV~EYf|3SNdhfUfuvEhaw`h+Aayy!#@~Y6Vl{7CASo$tPHwqBP@Y>D zr~ng@g{(kI_N%28{wZYzCB>!r&=SfaYgtHOv}9OVifrHIlaZ2FRFGRvHp2xQunOb` z$OWjxioC*tl0f0?!n~=|0;9mg4{oxE(xQrttlR8pR5imtzufm ztXWb^CRw(N1g4iPC@jf`nN?I4qoRIV8SHX)Zefw*UjKL7JioMZR#8E>q}fHKxq<(? zU7AMOVURR>XyWGi1$lwea&=l@1npYQN=ow2owX=XkXBxvyC^G{HUjNsWZYa{P=W4~ zQ4EM#N5cFwO6L2ACrvB}Oqn$oY($ofqJmQcsp-fuI#6m_w>mdtT17TRnieQ4>drc)iNFek zNze$gCQVEBqsQF*{j!N1A}?j0G_4{=ylF_YAsS~1ts?e=5(YJwHjTVJFh@^oo-skY&O%{g_*e z*>rS((NhcH{otklU8B$ESR|UJbeojX z@I*gVTe`rHqR_|u1?AftC9J*&8Mz8p|F0^1A!PLA&*`FAsaN7BR| zB>fLPMGCqmyXZ(f(^;4Q-yJ+@1e7?t5K$DOHVn2IlSd;aEU85Bw=ij>-(ON%UJQR$ zh^UWkrDnk`OdEw_i>-mc-XZ?oXqY>VhD}Exg3DQmCri*x;caCN&*esY`-YOPh4eBY4d=`ARuqUC> zNw(s?M^{HBeEz&S1$pz>X)=m??ky32`mgk6Z|m=4g*M^;WA@*%(~&*e>A#74OshmQ z<&{=q__}j)QqqLdlKC*P2?(Z>Qt|{)QCM1%or@sOj4&f%?g$k#k&ZyRGhTX1VL}L( z7jnvSXVT4u6`Tb4D=N#%O3MQU`OqEk#DIL=w~DDP4Uat>S2aaq0av9 z3c;=fJRVD}sl^D*Bo zcEHnOOmtLKgeEB4gO{F z5v7QP7zm+u-Augyvoc4b!33X(f9Pgvj{4NnkSp^d&f6<(=gLzbR!xEA%G=i4{JEd z1*NmiRw*2bo?KK|f>1OMvkQdJH&@InEL%{4t`ix+gp^c6H|sZ03p6JNliYG}kuprH zK-grxN1D>CUqmYrLZk)1ch!;8th+V$knx(Fh*0e|qxg4`8oUDouw--c*^`-luZYI7 zO?3V)k$;5<{QrGxjo?~Rq!V?H`7Mk29&;2cOILaFx}S$wZ5D~xyJ*E?7m=&5xU5LF zu)kt{F;+TD7mR=bC5E7= zaxgp*!x|PBL|#mm{hZ?2d1U2f6&JFj4o|KWL%1z%9wv)eCYd_ghwPX&`O~rDAc|xz zmTL(+dsO$*R(wZV&ON&o%AOSL=l)BfDr`dCW(qZzxyXL*mAT3B9kulZVx=dhbaN~}(&SZM?Kc?k&-(!F_D{34Vj z5|#vt@(866DwC^5gcAu1ieQ+};)X%W0z^}}Q1Gk@L>*kT2dX6BF>%5Ke=;&n)YS14 z{KJP02gBPje5q3=J0{GSkv3lSCk;!M#mNk*N<#}ADE){!Y4u=~x$+~V4 zHof{dc9{Ezr2p1Nue`DlJ8#jBCL2B5MlV_NN9;&B9q&izy&N-5I@mk*|9KvEc6vKL zF>!(GAH@z?FUN5+eWH2J9!PN|7RcwQF8G#@mA2R}@YuSK zT+i9?)?O=#wKhDqIwRLc8=mLrt*g$4x6b~Pw!?u1+_D#>I2*pgN{px5 zhF@yKuQS^%3b?0i_*64JwujMsJ1)hBM@NrbSvEY|-nwSk@Z@D(Gi`XwhDe)Z!^06p zt}+`Q4lHslwBd17A#yFX;c>Jra;>!Cu|*%bR@?CQ^8!!U@Yte{TFv58Kt$u_)V!8Nu>umVjZ20GF_=z@rtqrf(@EdLT zOdGz=hSzNP9X7n(4>#HHQ*7xE+VEL6e6tPzGaKHp;iuW~tv3908@|nkzuks+T-#y) z{65FJVr+PR_hVhLHhhkS#52x@zuSg)+wk|;@bNah--b`L;b+?LqipzG8$Q*BpJl@< zHhi89pJl`6+we1Ncr(-!)R{K?Y+L#{HvGLde3=bD$A({M!x!4{OKo`bsFI+rwBhI3 z(yzARi){EOZ1`duew__(AAg^-;Y)4lYi;;48-Ak=f1eFsXTt|<_#HNUr48R?!_T+j z58ChxZ1`pyp5KC5mtn&%vXFST+VBt9@NG8yVjJFZU5EWYXv4?Y@Jnp?SR4K!8$Qm4 zUuMI*ZTRIje7p@`Wy2@h@HIC4C>wr-4WDYmue9M68(z2JvuyaF4L`$%UuDD3wBbWG z{2Uwp5gWeDhX17vztDzXZNo3M;eTbrue9MGwc%IW@V~auh+#hJVI}Z?)l{ zwc*=r`2Vuu9p*fe@y~i2KE{UsgAE^R!~fBSkF(*Qv*DGhFJg?(!QEIMgFg?xf={oj z8e+D#r#$jeA@!cM}q0$xry zn(#US-%q$V;ne~zC(MvPyi~w*3A5VaG6Byb3>h8anF79(Fhl!rmVl=c#+rg7oGRc< z!VK}li2@!^n4x{xE#NVP`w@;6@NmKm@xu-Q4<+m(+*a^t$=$G<`gu%PQYi60_Id8yjs8~2y+S$UMk>Ygc@3A+WniSX5gV+H&&VNMak4go(;m{Wsr z>v`7yS;B6@%>sUkFr9O_Nx+X1{xRV?0sn&VPYBlvcsXGY;dKJOpYXMWR|~kD@O6Zj z3V1HzpAs$;@GQddgl7u)PQup{&Jyrc!kh|(Qw5w!m{WjoqJYN}zJah?z+(vCNH|u& z!wGY05OxT7DB+t3w|*NX*KTp_Cxb=)^f5I~fHw*YF z!nuT-1pFxBS%m8Z{0qW)glh%7oNzwjbppPhZ~@`f0xl;!oA6Qr&n0{>;W7cwBFq6O zJX64T5-ud1CE%%q=Mqj8a3k2yi9g)dD_2m?3v~seq3W zUO>1^z()x$Bs^2VhX^ksoF(9WgzqPuD&Sp&A0V6v7*BC=yjOy~ryq7Zl$yFgul6P} z+^@E-=}X6r`rM5mK>su?8{aM{RpS(-bp-k

cZ-x*bL@^bRF-^(%PSLf5Xx1N{0S zk9PgXO8q&fQq>Zrj6PW5b`3#qSE}lw6utMCpSc}r?E@N+^eA3IFaLB~@7;Kfx+^Vb z=hoS-A=RdQ<7R@TV>$!~T#4kb971wPdKt=}2+Gd(X2v5sw@Qh(S{YqKmXbkQcG~o` z+tcotHeFfv1=rM7{eq^i^0a|xn*Oe)S9=`5YWg<4uSW^3@m#?)!9Lrn#hasjb!#6; zjbyQspy{WT;0BT3O5Twsrc;`kfI6p9^+xbES#YrIN^a;Nmwkt<*mE!JhP+j&+AQRB zxT*~&xC5j}05<{j)qLryepI}C;;Np@1jLLNFNa;#SK=jYS^GbukRkL{FZ{?xv?rsc zT?wucOqJj&!7W|i>{*C3(8*#v6n%|n2ei2fve6QqrFaFwPq>O zrFpjlGh%?gVNKS>DB<8{tKe~_+MuW3a*qi-p1Og^QAWFf{hIG+=) z{?(pVusz|5?fr`v9FBi{3sH@Ok`#@0+w6(E2AJzGQYoR7hmnN1W@Mp;1?>>5{&nyN zX0a#6@S;;Qje#`A(*bQhSHr24I>QpN-*VqS~Rl68FRp0)we?kDUGoJ2)k_2 z9wT3%Vpj=qH67%f9j^F#qc2Nf!AfX@Ec5=;-IQ6$GKG8z=vJGcwn#z}a(t>pZm&{E1}LQ9)HZNP0u!YHAsH8RbU!YT${@)uci2WpHou`&%jKKZ-~K0x-B z$ju%mGMc}J7&UdS>bp^D)nZRyM_?cmfw|D3Nc3Ce1T9w5IByg*!Sx|Y8z5<%6AGG; zJP$NcW!UE$p-00Fl%j7{YT&cq7)ZTBBVQSRf*p;c+joT?2Ua2WL!$zYCtW|1*507h zABk3iYef@ect3;DU!myPSH!rMufYT_d<>4kv<|BweR@NBFe;;_&a#z%+4$0YXe@DV zXg@g8wc;rBl#%>pTKkT4y~vY=F7)QRZZu4~Cp%p~Mcj-{KS6DwtsJ4!s)p8JT=LGe z*9axo>4QCn@NZQOvER622R{7=Y9`w=13RcMd>R%c z@NvL{YP^KvT=6>u1YCd36K^~T91wWrAf^T{gAJv7^F0daecy8}k3$q3UJujJg3<+O zh>M;>J++`T1uZE30gaBXLIHjUpUWEPgG`@3?zMN^4r4Hk7iJM7yH7uMTnEw^`K?yB z5jfFZvLp`OMc_m?84H}{uZ(sA93N)WwTkg!-2MY7<*%qWOKEu0a9q}~x*4OhOz=5}WztZ~m|KzLFHE9;NNEn< z0=OPt4Oy53K8@o}Dveb9QTUc+OlFVH+hB@>1aN6E^LPcqia&{b8)aNa)?g5C3`P}` zzeIe;S~y%QzQ#%z{D6Lp!Kne9@jQx!^&@01+atUtT&WU_HTId_BiMU}4WAWjye0_M zo{eCLTx0CiID`I+VQ{Ua4z^R>(!YZGoTOgZ-BDszjNwOKh(bSo7>N!d5gzZ$M>8I- z>Sy5*b-1|Ao|S-&x4vXWQ;t2uCfe*-4cvCL7#`?*Kw;p()A&rZ`oKHPc<%||kc^CF zvJE1q(GG6Fvs{Va>|s*lr(d8HI&QdS2Xx5Vk0$wen`yv%Bc9O2B3CC(Y^98$l=T?QEny%Z^K{b0 z`|wptXrNC-2@!>{1WOa58YjUG*(rdf2_gK|Ev6>UT_!Za1hN+54S>}Q#y+rsv}7$) zh&MBZD5O@Vpo}twcr#Ooc6$UVgeK~Mg(e#0V+YfKcP*aKL>6SR*~Dvw~T_o8CY1&7?4R{J5vzwZ@IBaU-Pkn_Zm_XJ-yaBMd z8n1%|q+J(DA>PasLKACc3d$&Xh&MBZ(8T>nkzD7ip@UwCdoi@J6bW3_m!N!G?f*i# zLMfMZu04}>s{Jn57;5kRV~3mwGdN>~+-B8311lgoo9(Dff5WWzqnD!Ii#-cvShf&p zIugB!oS-e1w8a)p+COOsorx6QM7#l@zC>LgK?)JIeQ11xYNNJg5{$%nW08VBeHXHRgLP-cMaD)5PHY4ZJT~>?xCr2Tuc%f{Xs-Mg3egb0sn&?HNcbDM=j^A?imF zN?VLz#5_XI#t7vfpd{B}w!k{gaIF}LHdKOmW%cTt5$Vdw2FY`j;D}g5M_ymy`(o4v zzoi%?6$Ex5ZWd!bLcOw@eOfR(DQ3JrV2FkgfoUh`>CR{kYacQrnq-n*eQB1+EmQdP zHBJR^npgsHtylw%X26sS-((iXhZGFUY}pJ9HBX#^5YZikwq~zp%*0e_?R7rq*SEP| zA3SKHp7S`ONQpO*{U_>cA3t~zl+hV~Iq}b7#OQ*LP4{jmY2gnc3RYB}5b-b7gCMTG zF3r15s%`mB#&7y@a~>|WOhIFexu18Yuw_IQjI1sc+8!x!#?6ufgEbjuBeuMTNEDs= zI=fl&K1?-3i;EqTg3)c0^z=4VRr91y3XMHu-0=xb^dqWgrm=}8Nm6^G_eBxrWCjPH zQFN!GPs5zWg&AQNOFjra8qUv5GZzE<4;r^Y9&=F&>9`^VLpD+=JyRNg zWdo9h@uKa_3RyuAM7XhzBcQ&YG229oJ*I!YbkM4`?YqmL+bD+*mDk}rBY z9djf$&I#2u^wSe*uA%Q_xc>Yl?o%Kz)<5AI+DQC|K;!)_Cj1hUzTj5ah`w9ZcdGgu zs=iy%_gLjB`Wy6ybTg_B39H4bi>mM9(k-VYs{W>mM7&kzYGf2*rN@Lcp5b}OL&ucY^*+ptSF(W11$=EQ=&y=@2ci1K%|7f zumv9NDB(SpFt($Fx(-%C*kzJcLkC)vFjt~?5RFX=dkIrm0#7lRbvVru4xv@eI#hSC z5+w|gtk!j)MF}wyy`E@Sbu)JPBX#%^O%UEqjI@%093vK5C5V_%tUU(lA}q!Ns@0M`OlL4Pe=mhpFR%qFiGx5@@h$5^r#?jBl$*2{tERm?vv#CGWQ=z zzCn`rkUZ5a8-;@TN|F;Kxt8Q)N&WfN%HlQTu$=I_eH%+ zBsqcPcuAf{@;j0|5fK)0_a*s9l01Us|CQvGlKkakAYU!X14({Dl6Ok-ZjuK}@~K~f zyhxHS0Upf%O!8Hd{4U9NO7bL0UQP0qlDv^*FUita<}|t;#w7GJ>6o}&Q~V*?e2Lg8 z9ru!~++=b%3)4fR&L@X6heh2pWwYEw@)=3KgXBY!+#<=bB%hSzQ6&EjWY-YvcfxFv zX{|DS%1`l*c*OHJJc8qD{tn7aiW(gFbG$1U!}WI*{asaWQuTdUD$PhfjK!ZDLCfHt zKxU$*tE19a&PdyqIw5LX>O|+Z)XCA?QnPz$`s7qipQ8A5CCjI0&dAVn5VfVFPgU0i z#%RH7Rq)VsERUtU0b4OLWvPcZQ;?LwEIiozRRQ(8PKeS=(JyNmg`BQP&byg2R_1(0 z<~%AMy`6$VzZVJV%!#6m`A{=zc`uQnfr*<$QTG8#%L&hgf@ZIl`@yPy07Imze=D{` z`(owWhi!-IPtcxfFxmrF8Yi3sXTxmJJa^%NOy#tl2mjlx)Yxww_?k>zB2&9Nr@kBp6OYu2K0e;sk_(Na-$MIV!6r8ymc}?Q zgcck2o-pMWY3cbSzbnZa$u3D=Bgq*gb0C4JHVMl8>(f`DIBCO0ofmx9JT@ZX)^Dl3etN$i0(f4lyYEFC+)dvL*R>lDA9pqa;rU z*~N1V)-eO_R^rS-DiBMN&e8S!L035K4xG5at_1cd5zia1f}dEIB`||_j+i~oFPFwK zLyR%J2aX!;4ki2bWFK<;o4kRd8wi| zDbBryDmZZ#p>H)_e1`?M8|&W%dnd}njv9;02aJ6_ML#5Ak4T4wV-gq8c|$<4Eo+nU zA5J@^7Tymxj^?R^l z(-3FdDEHxzNWw?1>Rm8KHF#UBnozIQw?!-7t*+&bz+ny)Sd8YyB&`0}t^a^B9Ex*4 zmge`K<-9vasoAFli#)OOQ#4EooNa-NGxZTMnR-zSN@{NnT#~5=Vqyu$Ded(*&f`|R zyUO=tBgH3pg(kKu-h=Z8D)pa7YtB7t)tT5pjH=()s_KPptPe$3o$vX)2J!d6^Wnxb za202RSS7)1Q`M)#$7Nv3)fkI}mB+hOQrXxpFPECI9mTmG4x$A#?>SX}-{+e6xzG8z z&-;1#;Er^fw|PN7U(F|#mt*P}o&04>yt+BU0~q0#aeiaKAc5~=pMDG@bHck~ zBCdwYT}u50r&9mb*vwF1kPGqwj#l+=)V(c`Y5vbt{kYG0HgGu{`WRJ@9;ybj$3WxR zL!n+Y=m0gCJ_Z_3ABrI*8^VFYrd1)K>V2cUP6-W=O|iG;V3cZvleU+8Bp zwf?MAtv@z)N~q`()N2D9dxQ-Rni!LbNFZkZBX+$%=Ymq5E0 zS_AzFBKc@O<9zy}7-5~;+-g-l?nqTeM5>~N#$KX@Cb#>%Z&h5cI1j4a9>>dzb-o~(f+|U}PI=}Ei097xE?P{Q*agRKV@#17t zm+=Dr14DusK$b!c=#HlX=T>8#3HoAfB_+JY zg)Zk_rT#1!pW#}U<~{0q_;w_PqmGEgRQi6Ls_d(J-*>I}BMeA2SF}6QU=i(f3w%05 zSn^BwhAdePte_R{qnO4AY%L`0$aVTXrU!qlnRQ5^6rOzo5xtpBb)HwE?v62jPxE(v zpm|#`^@j}Pjg`PGu_JE$We=#!8d(lfR2~g4W0HiAHN81-6I&Jb-q<BTX|AXWns`MehbKSDTTlOjQ|+|F#Ef@S9>Q8tcS z@mTnOuZwXgr$53lr#LTY-p^eR=R@N@Zv#$(sIG}ksqZqoFz=!{Y? zJhjuI%GtY7KdDNrOImu-!yP3k!Ah9sZfO|Qfb%_?hcu%1l*0WiNZB6kK^tJdTy%F| zOhQy2oQYO+JNNMTX#ms7M3*IXl|L z#^rB;CMHo2I($yVGa2YY4|d}qi83uN{G+0BtzfZdlyK7{JeV|Vdcc$Fsy>8c zXG(Aid|G7XJ^_B1tM_byGt#4-a6SC>MGQ+9zy*Ax25(Xlur7a&>+-Ie)u1p^Kq!jA zSV!&A0E+i3RyW>NT}$IgILQNuIItwnhbf_Z5c}YSd40=Roast-VbTC@JioQq@YU4K z&r;Dto$mxNdF?w)(fjtnR?{$8qt*wZ&sl^%2-t>UtkL?Qk+osiHr4td^npSUH=y4m z^yw>uv-+dGwNPK|rrd`h?VF107!IKOE0XyDFdxtdnjVIF<@7?_g?{K-aR*$GrW=z&gXcpZPE9|eBy2@L6qc)b4OewN zw4H$yUa(cH*mI_ksMPGl!UXU>{d97jmT+!Tu;`}NNeM+a!NSf%)+n5jRP;0mm=^1* zxrptB9T6xSiziM4(|UA@x+%e&n~X`p#W%HT-j5W0Zmg^NCFosEIEO=g=El#0NW6&% z?G{%u<|StkKJQlceuZF$IV4rIGh@{D?U;1I1zvD1|A=byQz=yqifxKX z*OdEm3IdbjOcH~E)(DHxJTb27PuU)babdh_)%MQ)pR@Voy2+uyIEOD-(&|en zF*uWgD%t-dou4b}s@3^5tMk`u=dybhl?j*N3~MlV#VZyUnJ%-7ej2o9OxvLN;{$&*^SQ zVR8%m{f1qi?shwZw7?J4;Nlc$U~Hl4|37?+ezM2)*lJy`Q;iKD0C|4V0D| zoDu_#VtBm^Jp}{I9nj|vG)2UkZr%<6)`V1*$HFg>6j^ByY~X$u+(F$T2sI4aUWax3 znFZ3mEbG`P=&+6{;w54otpJUy8f0qYC=GEAiZTqmiN*_$g!zi{!W!QqG6?&ih2CLV zXg+wE0SYa2&bcl!jId;Q1I#HIOqnMOiYUWxw#wATJ(S^-Z-L6knKE1`54%FXDAqbsNmd7n}$?!IxJfl>~%@=Q--JOJLVvJUrldc=cB?98jg8us>`^ ze}Usv^g@h)Tyw!Rj~gTCLVu@$`a%lUEY4ufB8)W)EedNEbsPqL`Yv@Z29tdp2DkZ~ ztz31vAyL(DK+v-ohN>@ipf6y(0;?`XiSW2Z4y?K?PQ=24lrYRA$X7G1{n$<)U>FdX*5Iz_+%2mC2~`Pg^` zDMvY!2}hG8M^x z&@#Dhiu5hB;ag_Iw|ooV@<}H}wjr<(D{QcrD9zbSl_8kZ^j!Fs*`{x4wtP!N@+r%= zEQD`q$LM$*qhroRQlaP2%|2509*0Y(LhF6b9f8ZI1P8c%`T)n2V742DGIj}&K?Zy&Nwu6Hs6{>ho3+ECU9N}CNE$3p!a>BWwwZ@JxeanchzD0#^*#d|A zj_Uo&RdcwrLWO(5(G@K?2R`OYELN$wu{e*;=Bhr(ts}g`Jk6+0P!P3Y*Xb`H(rd%5 zslkU@u`8tprx;qok%)f#DEe(M)Z1XFt6&pSzfdm1TPoj+FK=di-!M+_0JMC{Y zA1yQymf8kOHOBfv`J+U;OzPCMr!7%EnNx5H3(ltb{rVoeR}XT2wf2E?eL2jH~7Wzg|CVsRM^YQgBynjX!A zHKS1z4b0&>kR1G`La&^3Ay8`;J{u?9sjPVFQVY7-qoF#V_tQX@FX4+0pMgN-hw5p> zXYZ)>7chxBf2%Lyq>7_HoLgW@Etcx4UWdcsysiOW{j2iN7*c@!hnn63-kg6A}_^E%$+c{5%?Mnbg}S^ zSoDHd677q|4c{uP$2L~166r8j{6xH%V@15kZyf(SohBlQztVd$-Z(E_5URqF1#S^u z*C-UO^&@@?^oVfm9}2vvi19w^X8h6OQ)2PSFiRU{BnT9~H=%-X1g!5=1mEzqH@pGw zvlUCrjSgcZQn&okNJUa%uHgxy8mL`3n{S*qoP8bso^37Hzy&J~HENg63>}4Wcn21qn zWcVuUn?Y-s?r#i)R{b|wnE=keM0q^|>a9P=xVXnyh|xQ;zWl)7to0Mb$``ok zl+SgLZ#5lZrhc-c!%Rg&_7?QF$}opp^g#?2z+mATQxbFiG(qx{>jU_@8Y`#XlQ^ft zaRfU$SV`A{H>(NQR(ueM>9I<}yJ8=z{w(&QwzwX?774YW4<396CP|zMsopx*!y)=N zy@6)Kz4k|1IG%Vf%*Q6?Srw}|-lI$6H1BEGiY7WY{RG&m-nU(=>e=T)^U+nY?BLza z^t-s~gY*Lx@7Jys>ySW6_*(V;)3pi*K+rJT)PxVQZE`EzrE^=F*w9h-wsHG}D-w0{ zC!!8Yy@7AGwgR6S95I|*3i^mKn86O`;4mg;a!?!2fOZT6+Tjdn#~@zWf&pho`EK+d z=@>()VoNNJBlo0WdLmqW`l!}P!R*8~Y_^PQql&`4I3w?nX&_|4h zOi7e!G-XPnOrt4N5@i|!39*hk>#|FmV)( zvTw-{(J1tMAfa8v))3ofpKDbkuxbc4A=>5*vu*Zk-hFJF_gpL1i?(@B^&WDq!l@&) z>sBpc50-UqRjZD)YR;|6JJr2yW~6{HD|sJRcP45$*T^(j^4xoFaxj|hgFy!Elg;+Q zF4yE>4%;V(?UT>;$+z0)4eCPfYpG&8YnN-qipLdy(C8)c6{8)?JQM%tLRFiu5u zBNul6>~ihwRxY*?Lnov8 zuqSD>_;+~>Ky#kZyjbKA`vVCdChwEmyrD^mvCbgI3S+q}aD@wokDWqpIPY!265M(u|jgCeGhz0 z#gD`uFr;!KyikJ!Zc+6Cqt&2x3;Yhk0CaPT&aU2ak=f@}=eLUfAiRfx^#=3HZx~*U z*v-}(5Z#Lbg_V1Xt%b^+l07#x4V62K${j=Hv@2cg$7fKvH##a8)||VR;tIR-H>_N=j305x7jZZ$hkaVCuR5E^b3kIIJ^-sx zS|axgc4*AAT!QxWN zaZpdV?!jlH{rh^1LL?(;A|_wkjjJ|TEW_lXSwzHM+l>paSj0C;yop3>OY&%hR!iCs zMB#VLL^c+_OO!AQrPd?I0M7~6&-dZ;)Gfvfk!)%Ovb_)Wd`D)4Gbs}~$^0sk367~0 z>)r^D6c+u2MN~-4j1c2Ue1Jqt=3x=qw}qhjMd4j!#%Hr#Wj_6~rH04Kku6?i>!Rl@ z$oyL{?a*_SB~}*2dL_bR8H@e|Y8MrnLE_^PVm^u6Nwj3XKSEPTdjYhxU>BKj*t@ID z6C&A;+=pxdWTR14HN^0@8p8iXbJ^{6n&_RbXY0d*tByqJJMKadu@pVm+7Gtq%Z~8h zF|Q8~kVzuvsjU5n>DV~tbO~n$_pWgxMEF*0Hgn2~H9QQR!5GDrv3t`ce8&b_JP7e5 z82kPLC0Dg!QE&xmc}0ltuz1rV>Tz7fX?O)&WX9nRnBVpoZve&|o=NM(E7k;#8?TDj zM!r5TGN7w6Ru83Al;KBee0r--KZBn>nXcl%J^8Bma7gg;LQ&9CY*kyIzz;;w3MldJ z$PZjt8UB-#To1wAn4XLgGo)d91W)(xPk>ECzBJ4u#>ua=ehzP;5#)i3Bhc04h~A*5hzJ@;7LaD?}KzurPzqdbtJUR&=c2AnNuiHUk(bIe5j z#m)Q(YJ$FYGe3HopjU6^7f$2x+0^Lq`ry~OdW}kG-geij61Y#UjA2C{a&mRb%|#~G zW3eaNvBYC(oW-!ZuGjna()+GZ+UwLjq)@!u7r)Pq-LIhq9+eA==W*i*o-}~>_+;xF zd+>&pqfYIKW4KF8Xp1Sucx<|rwioI*v=nNMtz{pn1_n6S>)Wg9K8hR@jQm!SB9;uk z0o%|9PmBPn#WQu~0R?RmXQE>9G@hnts0IN|Z{>#-%)-M_nEQ)Et88r6;*~$s4(qV? zwcQxWcfE>s%0nc~V*DC7eL{2ub^>NUu;>+l#R0)BO7L%@=hTynNye%`jFClzS;X7_ zBZ?4~YEN?m(%@4rJL*X!wUs1T^|qHL`q4CCf|v4p7FJeCSR<8#p+6Lj)1g4+p(Rj1 zZqv&T(7_(X@*&kT)dGu)KlBpAu8fC-G( z(_e`(_JW+&zAdeNd+^e9Y`gre-!+KNwS59t8;kQ(d8-Hb4RgIW|w&2X7 znTSg?h-i-+Z7=YrSXyy>N#gJwT#gSfx{n* zRUj;8!N8~$-2pY4n?BvD|iM^fJ(72E1u-ud;JT|=| zWk{q96_g<~u0e|U7m{C=afj#kED#r`uV7+0PIG=o%JXwcH(E7*2~_ZuNT7Z|Rkc{aAEAc#N9+Fc74^8YRAS3H`ziuIQ@Lf#By+1ixVK00_jg&N!aQ z-@X$HO*oStbc_!TK%YNbkpXSc+|Y#4w<2DsxC}3gsB~s)xsC_3LIV~>v-@8* za_?A_0dD+?y>9;Xmbd=JfL_)6_)~|n#P+(C11@dHqT;|Ky4&IHRsL?bmCJz|=nm82 zE>d3=;~C}M7VUJjZ>6WLxr}xXlM~fIdxt?bPC~opZ`IVXrlB`T!H-j+-3+30~WxB)x7vgAv|3*2PrN_Okv)q52?<#IsGTip;VoX!b&Ml z=U|bl+OwJ|y<5tYBWuM&6nq0tbomg4pTIIf6@K-CnJceD_d%{F0JPj{%VKe|GQea7 z=g?TLZ{d3@YQW4$!30+%A|ot}G%42UT|@&{)8HjRDu5Z6BB8M!iecQqYWck1vcW_; z{UG)-VYmu@se=i5u}>Z(cN|T>3Vneel7P<+^2wosk(mGk646EUgJ4;?gZYSOIkn8s z?gQLPyyMust9lX(0)hqc+01985d3o3T2B*D#_2!ELI|i;4WbY*sKiEzOktH^bzl%J zzOL#fwky^=mO)hJX3pc`mj!mcXB~KQcLvi8@e51Bfv-S;mMJic)!*UB{dYn}%XD7< zFACPl8ohs%9b^$V^`U)H@dVR7${-m#2g2jlB5C-wb4X_&i*`|?D4>-!|GNo1M_?Vh zvsLsfLMIsejtMNi|4i`n1+RnmQiCPf7BM}S&XkttviutPl!2*|@S?Nhu|oG`YLvl` zOt0iM;k3ZwH%X$-6j&Rh1FVeEO52D+m!R9nO4<9U9_!8&#!Uo}Tx1gqX@57##2B%H zEcHVhuO%xh1WRTu>>(Jd6A8g8O+Lb!VwiMU14mK&=_a$$Mbi#^2ByH@N4HF&i~{U| zd4)fQA^Ffc;lF_#F>qKctnzcCuaI$#?7YyxYDxjuA*4a?5Hg4Q!a=PP4lO*Ib}5Rc z$*}#x%PbUBA-W*;$?{=KA{scyrUPLm8KWVZDA+a#X4FGwaxzwmBrwH2;>GOG=*N?k zkf}YG$`+9vAuWCtf;^V9@5muhcpkVi3U6ehMn-|f`v$NSKeXSSt26#bs9BcL^0MU( z!Y8dhg|dX>LWO>92{}}}NFi9E9W3-D3vFG;s+zil4oL~$YU8yK2yP-yq)?YwQ6CDS z6Cq(33D4S^QH)tT(Ej1UVmndBoOLYn1gh9_xZ6SYU8shG>~GtU@sIf9s=-%=4sK8% z7r~5z%{1c~A?{jf?D&rEG>**R#QKMkQ`j@X2Il}mV{hO`1u$Ut0UpV~f(jfl8tfLf zfinjXDiP^4gFQH2{mSpD3@oe zN(f@F+^^^-jXuyQC%U_zZf}1L%5-evgs%FqNaD)CaguGu<-)$qIN(#XZ$|Ye2<1X4 zzq%V*ATMJGH7MSC3qvCAE?ORbIMC{>)=$Ecb}MO|Z_G#eo$D9buY3R^;rkD3J#pG} ztcqhH1s~hJ#kDJZfPyA>RX+!LeL;NFBR-wmu6i3?%hv&iWBsc0BiCPxvH$hK{M$7B z9O|hie83$5{K{rXNPwdQ*izIF`Jzg23PA~;paA2qb1;Eq@)}3 zK+~|LhO&7EfuC9=yrbglA|uJH9Xn-{`5ioNTU+Uj>5LlF&BRX z857hqIOyQ>p4WmN558#lh{sCs-5|ap9H4C93cG-Td}RCxZGx4&7HnG_uKWtefA9?e zgoX{d{#prt_dDvB=Y@`898(i;h#s2e~FQ_=sn{FZHKtq*tIKacEZzW`?Zp2zxr z>bEA$uXwN;bA6ZnZFYw^=u8G0pK@^~-?p>e@q6Q4zR$u+E+$ymrjk`iK`EFoq}?43 zN-@J#;@PoY-8p^lNcy{Edhz{}`Tc_UdLb1vm$pDM#l>Q316NkDGj^Wm3+F)XIOBrA z=nJkUq}#to==3eZ@4*}0G+xXaJHA;^g8X83>QiRt_&Mxob>!F3tG>YY0Xi*D@v;lR zcJTot&Q+wks^6sB3{J!uJ$#~NMH=|X@{sUTqU**RXIUtO8pThrc&Z~+3ogXicMh9! z8cqZ7U_Li@|A%HMJX>h+TX<(zfW}p*cef9NKWRW@?vEb`HZ!q)db&dqVmXD z`L3)v8B3)@-`1+p06Q8eReICFd*Kx*Z&mQk>cEX(a6R6`q!>^M)YIzPlt zabQp+XMQB-H#koX9~^HSfhaAT6*D%8H}(P!PZY-$Fl?gkZVs_;pmQlfIJsHk1@%*c z0aPRzt!NB;5=4e1Ct=bp8>!5=uy#QrBP>>0EQWMu(TW0%mM2UWFM~z6TsSUr;l`(s z%@Sn1#pOLDByUrY=gH*{7MCJ&xdB|HEV<-@&l2&495<0Sg1~DCt)HJ4pQ7b^_p6v> zxh9^~oM$!f+4Ac<(rMnd1(>o!ANa0^vk4q=g~7E~4#C1{%Lr1`e+ZU>X-66yt~$?7 zVf+>XiPdkk4x5>-7^Z>^C@45El$$JZe8(o3=p^v4Y@7fqgi6MX4uWsQ(6r8I$jxN`RjOcQdLNetz0Qn%~T-X;* zIu6FpSdPc-s?HPUoI^QtDCgn8Krt(Ys5uaI=Wi$~-ssa<0CKZ4LsF4|43*#*ae|jj zyypYyN&>nlJfjlIxCna1M+3$95z*F7=xy5N$;VjF)FePQi;20|G z661l#S-Q~5aGa^fO!-dqyYaE%kG=(g#tkhd(vqX#vy~t$`3chjq2laRg($?xCmULF zf*>MQxp<+#sPI^F8D*3pm7HxE$;gFX4c8z^PH4gSk;Uax7^Ns{vsujfc+Xk}J*G4m}zKCXpk!aft$h+!Gy@vXYW_)0N;nC={a? z{-q8qg0Aip<=BXI1V`i9(th)&(}Vs!ri^mD>c9i>R^yqHy2dkQO`yEgR!YhrTJgXF zV&mC|LQBtFXvg!hGCnVS_B#QekL^7#8!W%PV_)N!IUhHEaL*27DtrsdkQu(dATuB% zGc-K&VE^h*o959QHbkq-zFi7|%4@}sU^kwN?L7=~HMT7<9zi2Dp6g%Jgz}ANpoxs& zunhf+wBYU0L>~lN*C(S$Y0KdnhwXe=Hu=(L+z*Z`!g?;1-uYcxhVI z7g1?dZ+@HB-jJamt%P2VTBHSnACu8b3Dd)w3MkN-%2H4d33g!&5DlyYs!#E35NK9K?Gl=jI8cIU}Br%xX2UwJN2uCUZswVk!a(F z`ih~GQR~>6GE}hf+&w$fs@lJGJ#v8+ui751=5230e@9*8`TjLoWM9=5>ss|tf9Ubt z<@IS*pSP-c?eVz;9?Dja&v|y02k`|<=#D5V49kG&zC74i9ri{uLmSSs z;LOmf^R?ia8Cr9mqT#I8S=?vP^nEYz`xI|mc}q)Qd$qYnShE@m!Xv96iqdOhuNIFc z3@W6J6^|?pKLhr6no5-Gku6x8_2r$%uP(&6R^^}zsUh*(oj9^JL~lBM5GU2^jb1ox zpV1V`gm*mST6Hs0W#IQ7>e3VTrBxlhDDCv24CetJ$X~(pPiUrra}hLW1glqYqbMV| zb_KV8(yE#tN~`+#p|s$t6=HMe$U_tG4MElC4>h*rRKF$qR7=jP^P=&C8w6r;>#*Gh6 z+zne(;!Z&0*T#Jt)i^z88}2<&NA&?3HeOa8sO1aPEGqzx_0;tmsl;bcGf}vfhlml< zUq9$YQ~I<)rd=RSwGg%O$e<0P9*t)Pnfd^`-Hl%t@1&4Tjp5>b6nPJ!y;>elB}34> z9a`RT8n_TQZ(!VR*yOmlkJxIf4zShSA9rmCMQ9AsAc;~V4JP^1r?L0CPzI9{p`)UvNvZNbE~$+djMx7gGU z%AOq*J)7QkJiKikf@0V5FW{zpdDzdxLd4(M@m2oQ86rJGfra$xaAJS(IUJKhBOZqf zKEQG^L)D^+lksGWOb)FThB`U4O7xA%NXPEDIz71gb$U3AJP)M@H@r?Sm{#@KLz9Cq z3YGix8_wW9hu>9wU3$h(A8L4JUXEGY`Um<)pPd*QJB;~>mp)^1P4%UEelM zAAx)9SurG>DX-RoqC#fvwVpB->{I_= zcW(n9RdMwJ-z9e;K*(-1*jPaq4H^W}4G9ngF$-BpG+;DXiKZ-JvytHD!|aBF4TvUC z)?^`1pU~PGY|zl!K4{YxTG|Gi$Zq%$Au4SVQ6r)xf*KVqAdvTe=3{rW*#%Kw-{0@O zzhuwcbI+VPGjrz5nU6ack10kf#qc$)KnqCT5XyVOlQ?4)_ z-ceU=@wI{faCwj_Q{C4ytp7y|U~Q`xih72%FAq%2e_>)q#+@f+MX!NkK%m8IqyVzO zn+mx6o5>|oM7tMlinRceiV5nSA{#g|!6o*PcuXo1kFyI-HCv`y#%iGzz5qI>|AF6( z56r-Y;~H8%d5-nNIb+Y?;f{SCTK6HzTH|~?5;~Mo)517sxTfu<1Cgd4D1YxnB=u|q z;Spn;f{C(>D(0D?cmF>5Q#Gsg(C8ZG6Q;X1Q-5b_!LSI*fR~WY$2@1m;|!O(5x_k| z3qJvYP_EdP$I+}utP{-j$BkRv&HX~t{ zhcd#ih}oD;_bXy-b@bEheQCh~kLlLDy|{Dh@d)!R7300<^~_K7zH{#z-cMp*^PY<_ zci?ArGnh~x`>qCeo3n9u!VKaals)o3ng937y&odHv4e1DVqf;Y6Z;;hcM!Gb-HNz> z!94va?sMX>f%pMdbjTFp@p{kc=iokOwUHhoXB~hhHY%Cnj-s7IsF!Wef_sL0L-Tpi zz&W0=j&|uB-A+%dc96?_9+ivnKZYr*aK|R;xZ!%Z)ZIa`9xh$o0|-M<2Lc#^G6Jq0 zYXR_)!F1P|N8h=P1_qH;-I}UBt-WnkXEeI%UD)u58ZEBFRx(JvO;6csOLBaIR-uHo@Rp9^vT?&)wETIhvEZqS^a$?uKl-Ij67a@U#t^54HJ;zU4huW-sTi zuA^t?V=C&r-uWMZXh$<4yt$i=yf(q%{QM(GezOq|Jl64Xn<+0O!iEkaefSZUInTSJ z?_ky1>K&iE5gF_C{wufJ`=i{=WfZS%^!$UAaMOFjao@n$?Obv7hZ?WTaX!*}#_?vP z$23&`A5Ysubnnm~@>tJY)vdYRRr_8u7(&YDC`F7Ju0DjCJ#$q%o^5@PTyqb6vX?a8 zO^!Drz3(`_jP(A_aT?sL`jZ~hSp8?7O+V2$yvIfg=VIn`c#!OI4QP%;dS7>(i==dr zqx{EkMB9 zv-)JsIje^0g0%Vce&H@d*3{>yUF&Mco}=98Sz}MHVOF5VO=CSV-l|zm!#pwSVy&O@ zZUipfZI8rweGQ&+Q-X1wR1%uLXK7Vyjj#rwtktqd=k@j4X4nE? z8ni~{P3oJpWv~RmY}NLRpg_#(9t`^c-8B3I+>mW&aHF2X&1D>JGZI9TZzec#!SK;$ z7!K3an`fyo9}9MS+V08g#`6swoc_5wKr`CE5vc{Y|JBB`us$eW=4|^+fF9Tr8KD14 zCV;4eO#tx&-))S)NS8Hs48WoJZjMK$iH^8Jh-jX*pLbBk$#xLed*(k!8E19_io|G6+b&vk$Sx%K-X8`%->FQv`kR)LJh2QVb3iPsTu zxIur48;Q1jG7VtX@;O3;p&6yYe;im6YqrO>k?<*Ljq#RM`P=k=m?F^K{K zVFF-h+gOD|!a3GsXyCqyo0<)z6;w>s?=8k--%L(KC%Pmk4NZ)}=CEn98a9Cja}9Y^JNBoxwG2UV#H|5ptC8t1$PwSa*I49(^_YV zJ@rcz^Go znYtHJvAGIwXHNT1=Flk_uIZV(sN+`fU>P=1^CuauGkEE3Kt|Q^h|Hmx*wZ+~bX z481Fky~_StXQsy^~s;+h$ZmN!+!y%s5S1}Xi{p904-ZM*E zalip)tW?j=GgoCA6b3RdLd9JfyK~X2kqj}IKTY6IBl(??-|6{XD!+^4cWLa-#JQRT z*KGc58K<1j?p&*68kq|@VyzFcm?P>W!oy5WKP|ydu$Rpf36oXIhtYJ zGY4-tPkp_75@z4B#$xkt?9cGuffG>YxYU?AuJjlz1gFQ~;D+>=xXfMYF$P4&Y;9Wt zosN14XBkvofOGvQcxTlN|M^0L(z`$1wMY0~ zYJN?xCzv8|kIB+Kvvq$N5i$Gm5m3q~muc0PYDHYe;Mm<6gXiszq#r%~05xy70hRoQ z2@ZcR9d2+ab8zfS=~MTYuicqU)zv);0uo#ZSQlC!x6i1V_6yU{DX!@zzdF*n9PEMX zOk1~FbzEnHE;3ad@!D}yfF=r}SGu8wpiy0ORjA%|j7TMQMHBOL(pDY64pgzO+llHL zMind7-WJ=OxsBh@GdL04%o0P>t63GAAbnv?{&Bw%S+H&Xz`#`@pd~75oRTpm0r{{nmr$X z;r+z%hU#tm7#+;=m5Z}IrYRq{(lZQSKgP&)`O1t0k7>-uufpnTR4qn3b@8vLD_gj2I!*=M7}VM zejBOXvOuQ{7$reE!_!;wEO1N)QY1kJ!&5_Nfi4+HC1F_+B8B;}^l*Lw{XVf(kUXIh z57Y9MX)HtY<0zalk5MkBa9xw6+)e>to;W2jks|KGNJc+`o-Urx!gdNbKPxF)MBMGC zB_>M9KHos}WqB-C8U1eGB57Jh;MsE$qZhH#^hB8*Prn(d^t&*Ne$y5*ypw*{l4+zs zV}l5+Y8HVyj|i+iBxz1c8bu^g7b^m56GWhK8UmS*wx$8h!G~ra8;x!d77*&sZ7Hly ze_L7yTWWSi>CLVzy-Gik^b=*;m9E!Q8^QrNvBH+N!j`W53GC?H`+as4CUVt^AUk@U zFr#W)_ceiLR2$NaUO6~+?cjN9Rs2WdKMMc)J{I%|)8N<|vY_jj1>Ms<7Gfb^-Ae$a zArRZPXg}Jsi?JW=n&t2++!GrR#(p&0e1Mu;+wa4ZO?Z+EE3yhD3ZwA~cF*h9J&my0 z)y78crnc_uP-+poO|P6WeEPQ>9E=r*o-d_>&Zyt(LVOjGxitFX^clW<4IINne{~q&GACr%qw@Ic}$v{|^_D ze+lGo05e(%@1YGsT><-}tb4L*eoN`5BP(;PZ_J4}ik{AXL+`A|;}g=?#v>~%qjMrY zpdN7SEaWBKpP7HscA*~1481Ga6*e;?`e(*_1~&EVBLnb1Loe$i?0RtaYDL)|iCkjZ zbL5)I3=Urhh&w$*_+n0nJy1J3;rzhR%oa;OrwCdpO+OLGWA-PI=g-M;vx) z^mCC2VeMD}ptWNfw=*)m_SeLZraV~m(hZD_C%4C&K7W@tEBAo+$i2rrrfGSv(?rxp zN(@s;_n+|Q z4I}cNj#=GG%xhTTU?sH!ciTH*5^FY43Ai>K0s@h%wN%!iuhw?pT|i)JwW~O$R@=%k z_1ZFy!DI->G-wxcOoO(OW16(t9Mhz&116)YE4!kRgcIyn6L(KrIjxGH#k?e?ewJ3n z&z#n+YlA|Ys@ftmt*>~`VO5-FHNXvTpums$`<1Bep7u6Syn|)&HY|&u({G^Xd|4dS z_3jSji3Q3q=0u>{t$$x&dgP-z`N)Vzd{ujQc^lqjc4v}{?LB=j7G7T|pFPLACv)s8 zx4T;y4=iuHi7A%1c`9dP)JLI6>Kgn+{gkiP;>AY2_td?N2U2&Fbfj!lVFc?Qy?>pn$G5=!h0fgL z{vj5)aoo#qK$Y%lP2MxM9iyoK9x_F=VoRrP%@fR0=!e%QqkwB#*0ELW<58f{AUg_wcsrz-0VT??sYGT0zWXm134Puf-8-I}QtsPmejnp=Us=HR7 z#tr6;JE1gawp{?8QFpyjn?%{6_fS2X?s}s>iPpZ^!U~pf_R^9LCemqLr|rzj*XZX! zh7~Vz_hR9+jh6K0SYN}zn*dlI>@~n(wlE#_E-&F3_l#Oz(!kXgp(Zy|V1!y&&ODp& zbR)wd)byMTiMS?{y$cSp{ghW@|I+)q{j|rVZ*O1gRoV}%^~}6A?iG#4l$CcF)1(c< zs6>%!lT3aKD|bqMD^H%1;>lA|EO~TYBtf-Lf)*|TA_)oFuD1Wn>$1P(&9(3LK%5Tm z;`Z0qdQ8UlqmV$%(QX;WG*hMoDOz~ql%y|lG%sqrl%6l|Ej(pP&zCaSBv0F5`vLDe z_M_hAb}V#FvA^yuYd?mR6WUK8<&m_~P#Lpz7-<<*TvHOM7q3E6N^kCkvtG>`UaG-e zgl*xUEYT-dP#%@VlNU?#JzIu-ZOJW+y#xNEh}#qS+8NtaGVi3L5z0SQTg-Q@u_H*6-R5G zHc<8MCP}M#MIpqSAFDD^AXDsz>9&^D8*yNGu9^=)JB;ne*LvCv?JqL9|ARL4DvA6$ z+X;XGRz=RNJdDM3k{o83H=)zCwC;I#WLzgLA2khQh7GB(lFhW@Y^$>dCOB6T2-@xS zWeei8aETGB=aqqwkSy1}3#1J`G*^6Yw|zI1>zr5LzGtm>d;6}no_8n4?WWXghcOdV z)n>p^Zo0=_{}M){h8q&zIo5DPQo{{N4KpOkd)(Qu{;!f5eT+tpw4TXL4cwzN*zvx` zDAS#?Cd0LjEK-KMo*D2A*9HYWa+d2YVVp9?_Mnd&eqAECxm%dA$E+ixao4gw0V^KN zrhx-B1L8ytF~eQGnyf*cS=ee?PseT~W`JoY(ySWZy3<|i<7WD=sb+?pb}Eta+ykQ2Em!x<)sC= zsYuW@p<^BsMfqDzmRhdP6Z#6E(_JsJ)%h1;)Q9zsLl-3pcOfmB9C8&FDcMuRpDW4+X=f9*E+oV0_N-uVhU?rXOlNmfp zuo8>=qtHs3-GbJ0GmbzDiw{P^TtcEV-Xi-;WVDZg-840LD{2yLuF z`t79Ag43CjElh>8tp6s-VQoJoHyAsgkb2&lRLbhxBm(ajk#apt8QW>Nph@;msc+c7 zmV{!gAQW#5-bd|cz4^soQkrLk3?Y(6ny`Us?H#bL6XOn3Q>bROLcPmsJya9^C118* zhIw^zc%;ggPn=_I#Rn7aa7V4drV$pa#`!2M+t=_Ca!q@Tj&hm0v-~&~?a^ib@n*cL z^(sQcc2TI++jZ}&nN{cEx11aTGyZPTs2BB_?g}_(%SRnHnA~>`_Pm=v>O~{Dq(*X! zu(Xs7h}E4QbmUkEhF=R84UMW5K(sx)RU{e{Un-JruHUhBlh=_x+Uq6~!g0~VYbrhjeznPe0Jfp83O^upDwWGcD4cbF>m4Wp^)6Rv4A888ZM z8O=RK{Wj((k6SSEzA-dQGK2OQz&p-n{u4F`@FmuJ>VVux^Av!a>o}6F0z(D(#)Hb3 z&CP@dG!ooP+YF{*n?WVtsX*Hd(!A%e)I!1^y7!p(uzru{-G@dU)Ki@vfsxN2bn*-u|6FSA-gookTApg%bLpoY|#Ihk( z9hM%;u|1(u&*}zCkLB2&fa2jviR}roxoj%~M%X)yl(~Jm8>mRYgLWuHVTZ!?%i~%$ zQ#s?AnN(mKrQ`Lm9SYcy;K8~i*9p$y9Cj$2)3aS70Xq~bF_b|~cKZtg(X(-w~%3P`wVvlvdXZ4*+v%vsz zEPzv_(=i+QU_H>Lx##J7vDX~5Y}oFScCA4U*c-IMt~JR4dy^cnZ;=D`Epou#A_wd( ze89d81OIGFdl?4)+^&fM`^GvU5^={F{aTOd`kNQ1q{U@;;u!@yHNFFuj_g#!_#V6t z$86X5I$_zij5o5OV5Pogd>L6b#Hb#xC(DLV?f7gsJl=<)>?j;>!vc)vV>WhBn&+^A ziNoiZ4A`P%r29&Yqd9c<@aJwL4(|j0xokjB+szBG?vRyt3LLiSfx#E1rh7lozv0Dh zg)+n-({&VPEO*ogFk^R(dX>zWyBX6$1nU5)%Wfq@wmKVkCl>BEe ztquc1?a83AVj(od)Fb(HSJ!A^)o4RSr`S+2n>JJkeK27c6s%eY6^`Gkv1;*IHI@*p z8bMgKY^p?9HL5(aYT3L%SOGD^2IOWV5urXA5w~bNnu*YZ%~<@|jxq+hQ7gkUZq$$p znbr+Pq9&sT1U6XQ?k3e>My;9R38U77Fj<>gUeEL9Kc^>7Wadc0=muhJAokvZ`Oi`J zO?ZN0Lk0FyL}C5HaRKJ$VeF%Tajf6Llm}ZaRPO-&>tOI4NfW%GLKr-@&5F$42Oioy zp)$L^gNz>z;-ftlD($g=O}(D)u|PWa(T0jed_zSu;~cd0ii^Fv4$zEq_G{UO3N!~M zKeR=U2f{#k&tXFa=_RtnH&pnhC2Ka2R$yPWzIyW(Joe2~$_*6?N)HAuk^wfEj13hu z4;`?fVo&!kPeKYT^*;7F6{db-iGdu)?ngf*jB3lXxU%! zg2{AqO`(d(%TI3 z_nK>J*|?C#hkQ@=JK}AXt!$nT<2ktTIH*+nZ?xgrZuM#!CE|WqJMMAXYxOCT<uvN__ zSk-F8D?_@+=A)a(3w^qEZ)<4+a;}Y!<~asTek% z#`2E!)k0lttABk)H)ezfW#Lf<--^3Jxu$MKP?Fm9bnhd7%$DU$>SQU2i)c)T_&QDJ>-Kv1tjEOfDr{9h=4~ zhXP3tQ{vUJtEN*Sfo|1oCJr#!5a-bs=>bkL0!2d+vps=U$Q7zrNUfB8!>&qE#2i)S% zGJ5ww{NXD`l0^Dp1k+tj@g2|}9Hg^lKUJ8n%YB)CJoNJ>{T!s959#Mvp>F<# zLbGoEZu&V)KPPAE#|~+ zXSqkj^DI%pWZ`%*DvCJ~hvr0d&@si8Cn5QB7?!RmWq)1@k*^? zy5-}Ff$6%0lPC0PtIp4Lo-(cKo$LGvT)RJ=t<$G`K)>-Pz&!VT1m~xmq~P11nsvDy z6#5*Y`G@J}6yZJxP0ZF4lyk87H{CUyq&vLaoP~0z!K){iYS<*RXmiXxiP{iM}CW%kB;ax9;v_uw{1-Aatf_FA*THWiL@6uyrqyAkeZ`&>X?dzwo~w z|M=b)7R;vZD}M$zhw;A~|F0wcLf!D4sQyC;;xH>mYK9eWmvfEDD0-RXg=F#cncRpr z)WiJhPD+@Cj#3EYB*ZH}h8Y3pK++*IY>kIHF?slbDG&Q3kDb_rJ(4*nHf)XDj((4f zX!qL)z`AsA|B`ovVt)ah7xTmIFeOUH-|?f6Q;Wl=QRQ0A)rwN6D``z`MeD(9%!OIH zlD#;*{WFv9{a3(qk4cx%TByt0TWG==>0LMHLe`luVkgnEF6=Fwsrz2~Nt`|J9gz9x z`Bh+BP6k$cB2H1*`^jBr?q^4N$5h8Fu#8xLVDEn6eh7zY2Hm#hNVZ=O%Yvyr%2RJl>B}C!YKT z8hL!3`+nr_<5mziB8V1`!kr_YmqUtJN8Oie#8CnXxx8BU3fJKMxpw83eBqF7_8@|Y(;7WTL3YG+(6H< z?#JjpL&uV`1rHFYPkEM6R;M(J7~28qvqJbdlJ2p4VVLl(Qr)^qs6JE8BWl%oJw&LF zUyB0L)mCDz<1^jO$8`yNT_1d=Td)t?&TuBCb+4{{56+>fW`(Td`%TbyZ>7F_YhCrV z=*Vlg@;*FoEA`!58wJt;BS)0fcW-SLNZyC%ZA~L&t3akv7f{uXNzPBopTRDlsolEA zXr~rU?9)dR_hwX`!Isf~;8hVR;(jLL`{(F`WY#ZWQ|p-wkTuI)TTgOjxzp?QFx?}b zpr2pjC+b(3RXf$Jsc)5cAtgr=cpU(nSF+sIEH?Z*789oj)$3jApIqu+p~f+>&GZEX zh<1{7e+M8K-e6DeZTkotY)lcS_*YY2xqy!?Siiu!?NU36wKkrW4KJ_H;6KDtHNtXH zk;m?174xEzsxQh|e=V|=yVq46)rf0aGuO|1RB~ zewzlf!SEc8>ejpnXAIN9nMJ!B!G%PK>TzC+7Ybw3DE)&CNR!kzJqp#1WJx|GLd9lc zts0wD@f{l7Iyx_!Y$-L4O!p5l7{K{5&~qF*wjHWsMoSrn)eIdJ-}K)%%n?U2tuN8Y z1BbXZQ;|%$oJo6I6Y9rLOj`kn=P2Q?)ATJ3g3(j8V6xQb}VvoaGQr zN^g&fHMPJwq4ouTq#hYZz23IAWJe>kJ23`#M&6n}-fj9tf;{PB? z1^6NUk5P|`|C2M|yFA?lg_wnQ`t(*FXn5Mg16!Uxh(IPrVEE9H?y+*1h^Y;%UUZLr zz#3pHJ97>>$Qav!&RtWXrM{yWdpp$1&rPm_x4XCa%9)qJT(=z+;VTGqwCA85Xs49l z47YtX%k&%yP|i21l+TSS&lPe*5^p4lsj<$PdZheye80$H;T%~bf`>!OOPD5E_ruWM z@hQ;=3CyA zPL^raIp$=+)nZN-)KQ;Og>>`f9#*YNrAQ|(&r|goRyb%dJkwR+ryrx?`UO zgX+gf9tu240aj-1ujU-YxHm>f~0Oy-dAH)vr*T-H*Tx zbG_I-08fGJE@smZ=_oSM?Q;JM<_=Sd3B#YK>TsQdzg0TyX9$~hIWPKMutRv}h0}Ei z|D7;shfTWqfcL+G@FC%ly$F9ym-ntNcVE|-JFx=}{@51yU@G}!FG)x2AF$rqa`0yx z_qrC&Lw=d(rNAP0ZSvi!xMe%`h<*m>yC(Xc(LsgWG1JjGIWj-AzjfcO2BmB2T$G)A zVk^Sf-@1A{K+Uc1{{R5H2fKIRDT{XxPSeT3p>a4FNt;SU_>u|Ens<*JiyKTcWK^Aq z?Cr>?!AmHodn-Pn$oKErSyx9jhTCvD(Gu)oPG~ZHM{(xmsd1-z@ zQHjG@k1ak1t0xtUp|57=!EM>a4SbLLo>o2k4!e>o@m)^3pomLmhMrKLp$-#qss>;7yzzDg!})&sKH55a54 zN6}Au1>Y<9HsqgeFI|*x&nzi&hLhW}#A?rXICyo4puZ%0ZsSGTbK75jdR~uxsNx=j z8k@7Cyu`9J-(F#JkfJ0ympMUPPFi9sdBC~YFwHR0Xr%aro00FZISfS(!`yqavJCl7 zXhnf-nc)@#o|i7QmFL^-vu9?9CZPCtRpi^l#a{|VD=oj}7Rww{5r!}`)@<2U?A6` zi*ofBmA{U->;d=RN(uq`z!L~WdytnFoqju+f2p=!+A}-E%{-D6NsNw-jK}B&H^TyY zuUJORals{J6=o?eEwD&0D8b<@xaF2b`Hmv1rKk|St}VZKnssr0`8Wf6LP3kOJio~4 zVCd=74L6Q6$k^m#E-EdxkMT2?j}l~n`JtJ2T4rTtnGH7w_32zP{_8A#&{pC|bU?8a zSOb_~7;8wmIl)&h7m4fV!_Gv&Z8+%ne|enX`^Yqo4+RY-MY-5N`O+8rPXq6Z{qr@8 z0T6)Z0Urta$1Pjakk4tPjoDNM^UVsT#Y5673gyr5;|G@urax$)%+l+hH#|^eIRusy zn@C9rH^Z+&k9--%ervN(uqfj87fboDGIKX5c z3a1m$4%h=&O@&q5+uH!x0k{t^tpxOVB`Okd7=?031Iz+! zt?BJeK{<2+<^d|Tkb~~mAs?8%=mD$-Y_10#MsfNLNFQ+FuK{C`4&WBRv__-@d^X@X zocyXh4Ssl!i}ngT0S$n)fJVS3z%syNfU5w{0X71T!@F_Kfa!o8fKI^dXOKU@I_%MN zA|3j!4$YnGUqF2VwgReG0pALKfSqmN2iW87?Ty7NREBoQ4cG~o2iUv|<%8#CZzEs0 zU-drtVb#fa9Qg%QFntw;MZ6Bcd4MB;qD%8|_T7qRnSS7t*WYYVu8A5HNgqJz^E`_Y z40=*S1d;+GE12q0DrffgqIi5)147gA?<73*Nqq)F(cF;|t*;%x199#M9x$v^c_3lE zmfoFLqK#Dpc_4MoAeGcCx^sp~I}V~#+Xcl=4igRt9{UB;mpRn$<1mtwbMYY~T0t{sSs>hcIMPiDum}P@z64-V}i7#a_&)q6UTdhV`T+1^BNN@tYA=9sc<1r2edr;FW8rQaMMXT1gxYLO0?BXT8u3k%F;G zT{mDo^$c9jV@M|pa(3Wey@AqUI;dYWKqy3nx(<(`^+NSz)kLfrpfLRht0RaXsQ-k| z1O8wLyc76+A@H@pdqUuwfNu%H(?J&vz;6Y<2T)y)6h--7Zu?@Dk*%gu9=~Su$_{K@ z>fvvV)6Y>g--=9BA19Gfr>%cIQNL&pfgb^UK@ff%_{5U^0G{c~STuH@n$%Oqed>W5 zWgh<1L4OYPWeBU+5j`t$BeN6h2C`NU!$(E|icDZl1Zx^Q8DnlJ4v>`iwjn)^j%0e* z08A|UchWocHFPq#D4z#`j}mx>#1HAqN#IrB>!2^+P*<|R;ajN}bu9$@b~sghTb!uO zs2oS2J1Btv6Wp#-fX@rUQ$3FZejf0R2&;F(Ns#q?3lO!uX5Cs=r>ONCf!xO`^<_r4 zpWGYL9;)YPBaa(Nax{T&Lr{8TAIa~k2fh_y^#r7p>7@g4F}>o(U1}Gl*G@i0zr=x} z<8VmlP9mK&^mXcbN{1};<@e)E>sM*iFQI>5x5`&(`dFG~669;ztRPmikjrm}6=Y)i zlZP(#)Y88C6OU(3;7~@x4erLobx*gevK; ztr@7^1t~-inIJk~!^NF4W0iKzK<<-Ke(jJywo=%$6XdLOd7FTb0^Yzd{D;c11$gy` zz>_R~S!pR+I)BVJJqEgy*h0UGvLopF`w(g&^)>xm2A&+VvRHtp0&-3W47Wd_xeP%3~k!)hk3f3B9Tsz&un@ACU^todR8D ze>&f07m`nVR;kar*1G#R+_vzMq0=ZwRVqvNusM zK=mQ;@xa#x;i(^*27EQ}X$XhugQ&^(>w}1#(yJ8l0esguglL|n5_}B^tKTD~V``>% z4^SzZ7~oTHCi(}^+l74UXXwOm5uX;|3nZW8z+Z$<0q9SHejeyoA*}8sdS=Fq%y9Yr zw`=iOI&NrW{k$gLs*JTNd_59I(9%817l?Uq4N2ZK%&V!TedOZpX`P%GSugEMU_Z-* zf*VEeOZ)Ap>~V!XjT8NkUX~V@3O*0IT=#%=5n$bp)M}&E1d@@}KYD;z%zvR6eAfts z;(_0X_dtz=6*fg3jW9e4r+9t@&ztG-Sbu(eePiG_oi#L{oorTllM8DnWk=4FZDw|| z7IxBuv>G6n6Z3Xu5MDh1eUP%z_$DfX>>l-L`hf$MD(dwVidL7d>Rh4MsY};(u28>E zs3>)5&)X~X!_=jx-(I0!jnUaYW-VW4zx&zm0j3D;>=!d*yr@|MjCWriL5aj85zGLY z2`EgHa5tOX;fJ+@Y9u=1SzdmGql@aJ0r;)JQ%@o<%6C2RwRqpRmFBIE!X_bhgYAKw0v$*q|0 zObf^#<+~m~M{p2^-0M&Dq=z&Qd+_-nJ-i7Ipodh=Co$-s&nC!s^53$)m^6LnkVQWK2I<4*ll12#_&EO)q(8!r;jz;D zq)9V1Z8fZiOV#ckq&%T&j}B5ctJ=+x%HP%HYa^BITEZOEYCj*Oyu-pDX^HaZ1NR9X z?tQXZqxqetK{N5w!O9n!XEe2gmF5UDZay3g^Z)4(t!K2dVW{?l(aMgY+BXxGT|@t* z!Od@mZPz?|lk#OWJ!#f$*F2t}td60ZzsGFXoJ>&GUr9HfE4OR*k5PU(oNitm{u@p6 z&C2TUT^NYF-x;)z->B>`Xn%jB@|r=r_eSMS7Cz4K|2wLelKC*6(mxxog+M^P-RPPD?~bocP$9VYFjl*+U*S*Wj#gtN1u53hN8H7pZqG}r!<=X)qE0} zd|gZ{A-=|&$@{J=!U$b zQ$A+l&H5i;W9#>|Z+t(Z_xnR!*F^mOnwws^MtSQR2-tPa7zF=!1U=k^-w_e#M-a(R zM^gA7Benk?8F6+bVXCjC@c&UJkqw@0lto}ZbQWe4Q`$vOp#wEE%cS|RT}+p+~1_lP*MT!3w-t~jb^>(I$+jnw110L>IT#f z_;85wzN-D_70P*4`>!jM$F&sxz4nymO`Y=gAg%j~h~GwP+hQVW2WxwD5w8x`0*^ZR z%@xY#A#n)5J46c>zZ(jse;#@}g1?CxcRE@*95n&q#%S%)Xyql+!;ShOztu;)qR*|0 zQ9g>H@VP6tX+RUM%mT|dFdVIk9IIWRJRF#>pK3HKGJ*KLM!Qdq_}KvM-&Ezm0PVlD z5x^!A-qZSG;Qehu_qPY#&kMR=7<6A6biX3#z9#7Y$)Nkq;+~vuy2`LJPZx&2 z>`MKNL)ad6v7>_!LIJZY1FerPbja)~69J4g*!890kC86BX!VXRax3XdxPg}!^+a?v z3PQLQ>>_u8F2P3WNaptvpTHbx>r1zzYIKr*Tf#3YaM1bOGlI zxIn-X0apsRPQa%Gd|tp81l%v+TLOM0-~|DrO$xiN6);i2=>pCbaDjj&0r1zzYIK&k*Sgm?+?M0p|+1K)@0KR|>dJz^4R!UceUw+%Mo;0)8am z1p%YUQ^$3!fQbT57jUkC3j{0?aHW9j1bj-s=LLL0!2JTgCE!N_UJx)kU8FByqJYx{ zoGai00ZRm2Dd0K*pAztS0bdYszkqKE_>q7Y1dKL|^aV^5aJqnV1zaFtiGV8wTqodD z0zNO`3j*#J@GSv967Yh6(X&MQ0wxMLUBI~lE)cLpz?A~76YwbkpBL~20rv~|mVh4# zctODE*&=-b69t?u;9LQTY5*r?zo|EkVsL6j$%7@OKQ5X2jo<9)|8C3F8DJbIsyM7(oY@pUF3gK^3)0V50U=% z?L33h(%=5wq>uKO z8nXSRj{8pXkM@@uvi)uUZqlEG^e2TX|J3o{LH?5`Cr_pSQ1$=+ar&u=-+lh67u2w> zP@~MAIrA1n!tA;Cj4`AnrX)@_OfV)*FeWD@8xrouOA;A)qmkjqr;Jg?7uX8(E9_2t z(V|=MzpP|&sl#aq#4anb5N=XRK_B#G5j&O^+bsn}jsxalT`*l31|31hF`DmMe?~Z#_BiW@Mim z3!atR?D-Tygv#trC6QipR1%#wyo;Jxh#NdBEx;?AN}_GCrLa7|*k)N=fVlDwOs!Un zZJE_p=CnXayG==?mq{HEfrBMhkP{0^58Rk+`Nc(6L@h-s_(^0*^JEs`ozO&UX)(QG z!@a0)_InMY55m9n#rTe8?4{<4$>#b4Q&0nM5T;?X7XLE7oG0)>5g|gvljC*CCk1#K z`pfuo9znuGIv$beiAKhkX-Hp%|Y? zci`4eD+A~b zmwL4yfnXuyN%(P4e1jNA8*UQmhiQL;o$@&X#7idAD*T}4L>|A314*tJd?6k<3lK!Y z9?19;f*uClDzuI+)8-uc@6jJ{%7~x{LANaJpeDu`EKcQ=8Pvhy8j1f({L6Bac8%tX zL#5xG!{cji0ztqPoPNNge!&PxC;QRBhY_$gvVZK0k5FX4*B7rUvOnvK*DA7~>WjxB z%MSuEJRE_ZUiK@2Pa{w&vcKqyAMEQ7`r?Nu(!Te_4^^by?u(C7q&@D7AErn<+ZPX` zEA42&g9t^ZNV^z-M*Zm(Y47^tV-#u6`r@xpq@C)EzfzI*r!PKMk#?moez+p-MPK|? zX#b-EQ7jyR)ePCb10P0UwL-SrzWA#Z*&h4iY1d8Pi}Z2;JgMOFr}m{PpY-;n=6gXj zXnn!uLu+FirGNRPGJOB?p=nP_ca`Y3WjgmWdcXX#-H^YcQLa{Y3p*p*<1)cV$&eg4 z3@3jz;}fG~2d!WH55wyer|1VIw_h`SfBDJ1Ao&&1?@RhN1VWYDdB(@Dx2Hq|vQrw3 zUk}p-ektLR-^D)|#W1oTImGbbBN+Ti(7z@4vBF7xR;n{&UG+;3a(wjHw3t+!#ckEbwxkQ_|ll z@CGseC;2}l@N!;K(z_-9Ao@Q@{sJ%QUy}UA{F>zdq2w>{l727>mdPJPKUUzk2hqf7>KZyR0JQANP z@UcPojRJ27!gmY&IDvm!$g>y*kob25(eD!YBLYvGFzKSbbwuA2M1L#rWXI$_1_KvY znN9St8+KuT1X{U=;fE;(asY9y0-S^P(rAeu6H^r%~X4#L-Ft@rRt}gq%eJ z|7VtNobs%wcPYSWxhtCYauzi1Rvp!D${@uRnGSl zJ$7NN!Y^G0_KR2k1s~PG@nTv;`HjHG2H~Zgavw{uoQ*+p9u6bV`$C>Z4Nq5&dnA9k zKPH%eQxN}ij1T$+QSTyKN?a7>_mkdWzbKL6`}@%u z;5UcCw=#VH@m+fu`pz);FT>ysC^VMepnADS;9F?|6IUzyRg@3ur7UJXNk5_l@Ny49Rc z=J&=}j&CjE_zn(~TLj(^RL*wbLzT}T1pO*OZxDPAgrR>U4F2;l`0vB<#(9=Y|HVM2EB+o(ML&-A^ z{YfbN%rN+S7#{M7bfr9-fG0V#MgPU#&x1^#2>MPp7tkQ&91I6LRJtR9zh2M;f+>uC zD9$Y6Sefq6!tnW{px6I`)6@Hmbe$3S#uxd08h00zM3_!A?S7XDc*<8tQ28%s^l{1- z;U}mfrc2NpMfs-*{NI2NmEV^cpR1L%LH$UV;A0T|ph57_VoD_xpV49PX5d5RD-U?4 zw}QVc&-IKC^tqKM*eufZ2>ND`FN46p$>_yR0Js_sWGFe4!{F^<@NK}8oQl)jF;%L3Vdr&J-UFW{-|yfr<3`8O3>?_9N)-+a)i-~n*cBZ6Su^t zwUr|~gQjQBGIp^;B<7J16{uZK2*JU zfY-ynZ4UB#I|QFPk*?H}t}uL}t_#ioCg4e)x}fsU3q!w6(6fa*$lbxDSClZg%Qz)F7Y4z@0^cn3T-NW6Fv3LNTFvPtel_r+hnzwE%egRou8j{Z&$KZ3M}enwJ4$)F zqWF}Z0?%HG#Um-t1xeq;=~r=}Ts=B;`!WC@syuH2p3+r5^8i9$1PBmf>z< zlEU}w;5gIb(5){AaO!s%A|>w2`nR4i*+&f zuj7jo)(0(#lZ~m0?=iAzwb)D9S?(29CnHImXiV}`P)TqSijHSS|1;ksY%uf1o}3gu z_T;2X#hz%K;^*Hl?Mq2M`67}}zNF+6lO~5xJ86>tS;%}(oG{T(5iI$H6#ui3u_sLq zpZ285mr8rmr0}ISY0{-iZBj~j$tR^;Qt}Dm>wD6KOV;<4@cEuNqvY79_BWIE&`;MGsi?m;t5t+qf9DI?d_`dd#xx}2~nCE9ZH^(`N zA77oB<4mE0$1}5&k^;UjlbuiU<)m25=1Ju?Iwsm&jJr^*3&OLuT$($jG6|_nwAyX? z<>sZn6aqv{mB(8Xu@S`SNuddIE+fHjgNMIpCi#mdK=8|vCn=@x_gBnwr&ui5<7=@l zTb4B0Vkx2X$?rDrFm$sq;

wBpMTwCQOD1 zQ>>*WOXrnlmN;`9zO1FP6V=g9zG{M?^EE*SDYnCG_HpBt_(S$#9t)(mL1tzy(kJxQ zd{lJ^DnFLKJ`h-v_XTLDWErk8NvXoc&yfU|uey#Lm^-Gq{t`?w`p@16YsVs45&27( zSqdvktn_^!`Z$p@zXadnVc#X9)BF>&EE9_C#zK2(zLQ)Vn=>&vbt39%!US@S>2qC$ z6>z@G%1aknz^fp$z&y|5ukUP_6koxIiQ~vD3>SxL$6i!o3nL0Tko0@8_q{L?t$Eop zOBr_KmzLm5LeAxurSKnTXWco&lx4Yd)~p<)FwZn2%S>Z{0!OK3aehewHq?s&fpE^# zbLX1oWX?qP`s;|VLP$%(7eJ`Ygywgt2A(tpHVfaVu*@s7pq~OPNP~sqIPSi~*Iu)&*-MIwSem7Fhq)k#0q>LPMUQ-^b+M%c?>1OcD$>$^ zXudlq^Uk?nGvmp~c!5pi*=Ej0-&oAE4Wkz#7jttQGtth1WV>%}(v+;y{DOH!#kNe} z7pBUo!J*nwu=tKfkt{sR1o#=G*K?UEpgQi)E@>ug6TaRx6Q7bunn;FR{HLoSJ@h$@=lHWZaQK7(aYx1Zo5WDZf-!PCDe9aiB5(?Mp3p+a4%#z=vR;U&FU< zkc~TX9242VvA;r<SVJFd&{x6mie!T8mNp(|RvxfqQEi6>3&S2ZEbGZ7}U zu3dV90p>f^B0hKms=%%pw*~g>{%s`30WSexxLXRBHmJ}o_#_<~kEJlb$SxkqR>$6k zvcMV3x8U1-WVq7OV9!i5GA&7o$w-TeF@-tGA(Hf0bqeWfNK??SzJ2EWVrKSh)rDV? zCox6tmvleny{HI&cZ0bq{UWpO2+k`yh@eu%XL4+I6gM2LVq3Ap=2W1+fhcPk&0Ddz z>dH!gT!`_M|AsmeZdc&}JA$i{^Kqa;lTl-3#Gf(ymYhTgLb?DOv#iJ&l&er(^QCNy z=(76_*;pRQqYtzMe$D9D4>_nC%5mBq7~ z@i+5+Iw9Rf=3Dq`VFDA7eAAhsI-~$2$26b1wxF5{QCcFYq~P3Hzh34-_Q(Co)ZdaW zR#y|@w~|Gt!BIc?fGz&!y_x^3qg1w&#b;qBOvI<=(7hGIyf5V+ft^{7Fa{{*?8%lT z?2~rci&^bo{)7Xo#o!{5)ezRp%W;-FLXD>QSS3K)(B&{~V^bYw6w}?9_khm?&7+Br z{u<%vuPBA(He26@_Kl}>{eA<_zw`uT#xHkBZk59t*0rVLvtcklDHAM?in6lOa!k>a zTPF>Z4R~ic`a)q!(8I6~jQ+ot$7~FM@)s3h=uyNw{+y!8@G)udV$Qb84$o%+zLgOz z%5_3=1@pxMTzD%Sb&I3u?kRKN)x)q^X2Anuz92dCOB8$!t*E4LIRv(ZDR<-RdKh+H zvNbF%Qs$WEV`XKo*^*_RJ9}P+PtPwtL4vV3&3hzU6o+}U>OarIr1Xn(8BRSPsj?ni zOtM|H`Z38{>ag{%uMp$Jf`Ab|A14Y0Op+S$)oQkXKLSUv7~h%f+oZ+%8s;l37y*bc zJ!2J(KCEqnY4pz)gqa}-GB~cU;sF32?{xG6!!eU&+>`&?iI5$Y08`1kF&OItNL~RwIEraWhiV5K5|QQZkQs+Xvwe9Vm>LPXHIh!cNYa4uTZ1*{u89E ztOBP2gV{jEzfws~7T;g(uPxvp%|VzBLaZwxr3uWs!@}d+y|VR7TY}p`2%>iM<@Oy%0+(L3Lg7i(;`MOHe09&-!#?{|}kU BX;J_H diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc index 432a528bb..a2dbff785 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc @@ -5,20 +5,13 @@ #define ENABLE_ASSERTIONS 1 #define DO_PROFILING 1 -#define DEBUG 1 +//#define DEBUG 1 //#define DEBUG0_1 1 //#define DEBUG3 1 #include "template.h" #include "utils.h" - -#include "define-float.h" -#include "avx_function_prototypes.h" - -#include "define-double.h" -#include "avx_function_prototypes.h" - using namespace std; class LoadTimeInitializer @@ -33,32 +26,18 @@ class LoadTimeInitializer m_sumSquareNumHaplotypes = 0; m_sumNumTestcases = 0; m_sumSquareNumTestcases = 0; + m_maxNumTestcases = 0; m_num_invocations = 0; + m_filename_to_fptr.clear(); - if(is_avx_supported()) - { - cout << "Using AVX accelerated implementation of PairHMM\n"; - g_compute_full_prob_float = compute_full_prob_avxs; - g_compute_full_prob_double = compute_full_prob_avxd; - } - else - { - cout << "Using un-vectorized C++ implementation of PairHMM\n"; - g_compute_full_prob_float = compute_full_prob; - g_compute_full_prob_double = compute_full_prob; - } + initialize_function_pointers(); cout.flush(); - //if(is_sse42_supported()) - //{ - //g_compute_full_prob_float = compute_full_prob_avxs; - //g_compute_full_prob_double = compute_full_prob_avxd; - //} } void print_profiling() { double mean_val; cout <<"Invocations : "<::iterator mI = m_filename_to_fptr.find(filename); + ofstream* fptr = 0; + if(mI == m_filename_to_fptr.end()) + { + m_filename_to_fptr[filename] = new ofstream(); + fptr = m_filename_to_fptr[filename]; + fptr->open(filename.c_str(), to_append ? ios::app : ios::out); + assert(fptr->is_open()); + } + else + fptr = (*mI).second; + //ofstream fptr; + //fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); + (*fptr) << s; + if(add_newline) + (*fptr) << "\n"; + //fptr.close(); + } + void debug_close() + { + for(map::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end(); + mB != mE;mB++) + { + (*mB).second->close(); + delete (*mB).second; + } + m_filename_to_fptr.clear(); } jfieldID m_readBasesFID; jfieldID m_readQualsFID; @@ -89,8 +93,10 @@ class LoadTimeInitializer double m_sumSquareNumHaplotypes; double m_sumNumTestcases; double m_sumSquareNumTestcases; + unsigned m_maxNumTestcases; unsigned m_num_invocations; - + private: + map m_filename_to_fptr; }; LoadTimeInitializer g_load_time_initializer; @@ -174,10 +180,10 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadL tc.c[i] = (int)overallGCPArray[i]; } - double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)(&tc); + double result_avxd = g_compute_full_prob_double(&tc, 0); double result = log10(result_avxd) - log10(ldexp(1.0, 1020)); #ifdef DEBUG - debug_dump("return_values_jni.txt",to_string(result),true); + g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(result),true); #endif @@ -255,7 +261,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai haplotypeBasesArrayVector[j] = make_pair(haplotypeBases, haplotypeBasesArray); #ifdef DEBUG3 for(unsigned k=0;kGetArrayLength(haplotypeBases);++k) - debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true); + g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true); #endif } @@ -310,11 +316,11 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai #ifdef DEBUG3 for(unsigned j=0;j g_load_time_initializer.m_maxNumTestcases ? numTestCases + : g_load_time_initializer.m_maxNumTestcases; ++(g_load_time_initializer.m_num_invocations); #endif +#ifdef DEBUG + g_load_time_initializer.debug_close(); +#endif +} + +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose + (JNIEnv* env, jobject thisObject) +{ +#ifdef DO_PROFILING + g_load_time_initializer.print_profiling(); +#endif +#ifdef DEBUG + g_load_time_initializer.debug_close(); +#endif } diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h index 1fff9fa5a..b50f6a5a9 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h @@ -70,6 +70,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods (JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint); +/* + * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM + * Method: jniClose + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose + (JNIEnv *, jobject); + #ifdef __cplusplus } #endif diff --git a/PairHMM_JNI/pairhmm-1-base.cc b/PairHMM_JNI/pairhmm-1-base.cc index a3b5e0ad5..3030b640e 100644 --- a/PairHMM_JNI/pairhmm-1-base.cc +++ b/PairHMM_JNI/pairhmm-1-base.cc @@ -5,12 +5,6 @@ #include "template.h" #include "utils.h" -#include "define-float.h" -#include "avx_function_prototypes.h" - -#include "define-double.h" -#include "avx_function_prototypes.h" - using namespace std; class LoadTimeInitializer { @@ -36,16 +30,7 @@ int main(int argc, char** argv) if(argc >= 3 && string(argv[2]) == "1") use_old_read_testcase = true; - if(true) - { - g_compute_full_prob_double = GEN_INTRINSIC(compute_full_prob_avx, d); - g_compute_full_prob_float = GEN_INTRINSIC(compute_full_prob_avx, s); - } - else - { - g_compute_full_prob_double = compute_full_prob; - g_compute_full_prob_float = compute_full_prob; - } + initialize_function_pointers(); std::ifstream ifptr; FILE* fptr = 0; @@ -86,70 +71,5 @@ int main(int argc, char** argv) else ifptr.close(); return 0; - -#if 0 - float result[BATCH_SIZE], result_avxf; - double result_avxd; - struct timeval start, end; - long long aggregateTimeRead = 0L; - long long aggregateTimeCompute = 0L; - long long aggregateTimeWrite = 0L; - - bool noMoreData = false; - int count =0; - while (!noMoreData) - { - int read_count = BATCH_SIZE; - gettimeofday(&start, NULL); - for (int b=0;b(&tc[b]); - -#ifdef RUN_HYBRID -#define MIN_ACCEPTED 1e-28f - if (result_avxf < MIN_ACCEPTED) { - count++; - result_avxd = compute_full_prob(&tc[b]); - result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f)); - } - else - result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)); -#endif - -#ifndef RUN_HYBRID - result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)); -#endif - - } - gettimeofday(&end, NULL); - aggregateTimeCompute += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec)); - - gettimeofday(&start, NULL); - for (int b=0;b #include +//#define DEBUG +#define MUSTAFA +#define KARTHIK + /* template string getBinaryStr (T val, int numBitsToWrite) { @@ -17,8 +21,9 @@ string getBinaryStr (T val, int numBitsToWrite) { return oss.str() ; } */ +#ifdef MUSTAFA -void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) { +void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) { const int maskBitCnt = MAIN_TYPE_SIZE ; @@ -52,7 +57,7 @@ void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, i } -void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) { +void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) { for (int ri=0; ri < numRowsToProcess; ++ri) { rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ; @@ -63,6 +68,7 @@ void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsA } } + #define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \ MASK_TYPE __bitMask = (((MASK_TYPE)0x1) << __shiftBy) - 1 ; \ MASK_TYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \ @@ -71,33 +77,43 @@ void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsA } -void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt) { +void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_, SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt) { for (int ei=0; ei < AVX_LENGTH/2; ++ei) { SET_MASK_WORD(currMaskVecLow.masks[ei], maskArr[maskIndex][rsArr[ei]], - lastMaskShiftOut[ei], ei, maskBitCnt) ; - + lastMaskShiftOut[ei], ei, maskBitCnt) ; + int ei2 = ei + AVX_LENGTH/2 ; // the second entry index SET_MASK_WORD(currMaskVecHigh.masks[ei], maskArr[maskIndex][rsArr[ei2]], - lastMaskShiftOut[ei2], ei2, maskBitCnt) ; + lastMaskShiftOut[ei2], ei2, maskBitCnt) ; } } + //void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen, const _256_TYPE& distmSel, int firstRowIndex, int lastRowIndex) { -inline void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen) { +inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec, SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen) { //#define computeDistVec() { _256_TYPE maskV ; VEC_SSE_TO_AVX(currMaskVecLow.vecf, currMaskVecHigh.vecf, maskV) ; +#ifdef DEBUGG + long long *temp1 = (long long *)(&maskV); + double *temp2 = (double *)(&distm); + double *temp3 = (double *)(&_1_distm); + printf("***\n%lx\n%lx\n%f\n%f\n%f\n%f\n***\n", temp1[0], temp1[1], temp2[0], temp2[1], temp3[0], temp3[1]); +#endif + distmChosen = VEC_BLENDV(distm, _1_distm, maskV) ; /*COMPARE_VECS(distmChosen, distmSel, firstRowIndex, lastRowIndex) ;*/ VEC_SHIFT_LEFT_1BIT(currMaskVecLow.vec) ; VEC_SHIFT_LEFT_1BIT(currMaskVecHigh.vec) ; + + _mm_empty(); } @@ -120,8 +136,9 @@ struct HmmData { } ; */ +#endif // MUSTAFA -template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D) +template void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors, SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D) { NUMBER zero = ctx._(0.0); NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); @@ -157,18 +174,14 @@ template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS *(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c]; *(ptr_p_MX+r-1) = ctx.ph2pr[_i]; *(ptr_p_XX+r-1) = ctx.ph2pr[_c]; - *(ptr_p_MY+r-1) = ctx.ph2pr[_d]; - *(ptr_p_YY+r-1) = ctx.ph2pr[_c]; -#ifdef DEBUG3 - debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1) ),true); - debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true); - debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1) ),true); - debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1) ),true); - debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1) ),true); - debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1) ),true); -#endif - //*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; - //*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; + #ifdef KARTHIK + *(ptr_p_MY+r-1) = ctx.ph2pr[_d]; + *(ptr_p_YY+r-1) = ctx.ph2pr[_c]; + #else + *(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; + *(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; + #endif + } NUMBER *ptr_distm1D = (NUMBER *)distm1D; @@ -176,14 +189,11 @@ template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS { int _q = tc->q[r-1] & 127; ptr_distm1D[r-1] = ctx.ph2pr[_q]; -#ifdef DEBUG3 - debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true); -#endif } } -template inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)( +template inline void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION, SIMD_TYPE), PRECISION)( int stripIdx, Context ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY, _256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM , _256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, @@ -200,19 +210,18 @@ template inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION) NUMBER zero = ctx._(0.0); NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0); -#define TRISTATE_CORRECTION_FACTOR 3.0 - UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR); - + UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(3.0); /* compare rs and N */ - //rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH)); - //rsN.d = VEC_CMP_EQ(N_packed256, rs); - +#ifndef MUSTAFA + rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH)); + rsN.d = VEC_CMP_EQ(N_packed256, rs); +#endif distm = distm1D[i]; - _1_distm = VEC_SUB(packed1.d, distm); -#ifndef DO_NOT_USE_TRISTATE_CORRECTION - distm = VEC_DIV(distm, packed3.d); -#endif - + _1_distm = VEC_SUB(packed1.d, distm); + + #ifdef KARTHIK + distm = VEC_DIV(distm, packed3.d); + #endif /* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */ M_t_2.d = VEC_SET1_VAL(zero); X_t_2.d = VEC_SET1_VAL(zero); @@ -234,7 +243,7 @@ template inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION) -inline _256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256, +inline _256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM, SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256, _256_TYPE distm, _256_TYPE _1_distm) { UNION_TYPE hapN, rshap; @@ -263,12 +272,21 @@ inline _256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcas } -inline void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, +inline void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY, SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, _256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel) { /* Compute M_t <= distm * (p_MM*M_t_2 + p_GAPM*X_t_2 + p_GAPM*Y_t_2) */ M_t.d = VEC_MUL(VEC_ADD(VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(X_t_2.d, pGAPM)), VEC_MUL(Y_t_2.d, pGAPM)), distmSel); + +#ifdef DEBUG + double *temp1 = (double *)(&pGAPM); + double *temp2 = (double *)(&pMM); + double *temp3 = (double *)(&distmSel); + printf("%f\n%f\n%f\n%f\n%f\n%f\n", temp1[0], temp1[1], temp2[0], temp2[1], temp3[0], temp3[1]); + //printf("%f\n%f\n%f\n%f\n", X_t_2.f[0], X_t_2.f[1], Y_t_2.f[0], Y_t_2.f[1]); + printf("%f\n%f\n----------------------------------------------------------------------------\n", M_t.f[0], M_t.f[1]); +#endif M_t_y = M_t; /* Compute X_t */ @@ -278,7 +296,7 @@ inline void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_ Y_t.d = VEC_ADD(VEC_MUL(M_t_1_y.d, pMY) , VEC_MUL(Y_t_1.d, pYY)); } -template NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL) +template NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL) { _256_TYPE p_MM [MAVX_COUNT], p_GAPM [MAVX_COUNT], p_MX [MAVX_COUNT]; _256_TYPE p_XX [MAVX_COUNT], p_MY [MAVX_COUNT], p_YY [MAVX_COUNT]; @@ -306,54 +324,51 @@ template NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t int remainingRows = (ROWS-1) % AVX_LENGTH; int strip_cnt = ((ROWS-1) / AVX_LENGTH) + (remainingRows!=0); +#ifdef MUSTAFA const int maskBitCnt = MAIN_TYPE_SIZE ; const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function MASK_TYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ; - GEN_INTRINSIC(precompute_masks_, PRECISION)(*tc, COLS, numMaskVecs, maskArr) ; + GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(*tc, COLS, numMaskVecs, maskArr) ; char rsArr[AVX_LENGTH] ; MASK_TYPE lastMaskShiftOut[AVX_LENGTH] ; - - GEN_INTRINSIC(initializeVectors, PRECISION)(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY, +#endif + GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY, ctx, tc, p_MM, p_GAPM, p_MX, p_XX, p_MY, p_YY, distm1D); //for (int __ii=0; __ii < 10; ++__ii) for (int i=0;i NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t int i = strip_cnt-1; { //STRIP_INITIALIZATION - GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM , + GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM , p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM); if (remainingRows==0) remainingRows=AVX_LENGTH; - - GEN_INTRINSIC(init_masks_for_row_, PRECISION)(*tc, rsArr, lastMaskShiftOut, - i*AVX_LENGTH+1, remainingRows) ; - +#ifdef MUSTAFA + GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, remainingRows) ; +#endif _256_TYPE sumM, sumX; sumM = VEC_SET1_VAL(zero); sumX = VEC_SET1_VAL(zero); @@ -382,24 +396,25 @@ template NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t for (int d=1;d #include "headers.h" -#include -#include -#include #include "template.h" +#include "vector_defs.h" -#include "define-float.h" -#include "shift_template.c" -#include "avx_function_prototypes.h" -#include "define-double.h" -#include "shift_template.c" -#include "avx_function_prototypes.h" +#define BATCH_SIZE 10 +#define RUN_HYBRID -#define BATCH_SIZE 10000 -//#define RUN_HYBRID - -//uint8_t ConvertChar::conversionTable[255] ; int thread_level_parallelism_enabled = false ; double getCurrClk() { @@ -26,11 +15,45 @@ double getCurrClk() { return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; } +void print128b_F(__m128 x) +{ + float *p = (float *)(&x); + for (int i=3;i>=0;i--) + printf("%f ", *(p+i)); + printf("\n"); +} + +void print128b_D(__m128d x) +{ + double *p = (double *)(&x); + for (int i=1;i>=0;i--) + printf("%f ", *(p+i)); + printf("\n"); +} int main() { - - testcase tc[BATCH_SIZE]; +/* + IF_128f x; + x.f = _mm_set_ps(1.0, 2.0, 3.0, 4.0); + IF_32 shiftIn, shiftOut; + shiftIn.f = 5.0f; + print128b_F(x.f); + GEN_INTRINSIC(_vector_shift, s)(x, shiftIn, shiftOut); + print128b_F(x.f); + + IF_128d y; + y.f = _mm_set_pd(10.0, 11.0); + IF_64 shiftInd, shiftOutd; + shiftInd.f = 12.0; + print128b_D(y.f); + GEN_INTRINSIC(_vector_shift, d)(y, shiftInd, shiftOutd); + print128b_D(y.f); + + exit(0); +*/ + + testcase tc[BATCH_SIZE]; float result[BATCH_SIZE], result_avxf; double result_avxd; //struct timeval start, end; @@ -41,12 +64,12 @@ int main() // Need to call it once to initialize the static array ConvertChar::init() ; + - - char* ompEnvVar = getenv("OMP_NUM_THREADS") ; - if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) { - thread_level_parallelism_enabled = true ; - } +// char* ompEnvVar = getenv("OMP_NUM_THREADS") ; +// if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) { +// thread_level_parallelism_enabled = true ; +// } bool noMoreData = false; int count =0; @@ -56,7 +79,7 @@ int main() lastClk = getCurrClk() ; for (int b=0;b(&tc[b]); + result_avxf = GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_, SIMD_TYPE), s)(&tc[b]); #ifdef RUN_HYBRID #define MIN_ACCEPTED 1e-28f if (result_avxf < MIN_ACCEPTED) { + //printf("**************** RUNNING DOUBLE ******************\n"); count++; - result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)(&tc[b]); + result_avxd = GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_, SIMD_TYPE), d)(&tc[b]); result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f)); } else diff --git a/PairHMM_JNI/run.sh b/PairHMM_JNI/run.sh index 495a3761b..2bc45efad 100755 --- a/PairHMM_JNI/run.sh +++ b/PairHMM_JNI/run.sh @@ -2,18 +2,21 @@ rm -f *.txt export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable #-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed -java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \ +java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \ -R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \ --I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \ +-I /data/broad/samples/joint_variant_calling/NA12878_high_coverage_alignment/NA12878.mapped.ILLUMINA.bwa.CEU.high_coverage_pcr_free.20130906.bam \ --dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \ -stand_call_conf 50.0 \ -stand_emit_conf 10.0 \ --pair_hmm_implementation JNI_LOGLESS_CACHING \ +-XL unmapped \ -o output.raw.snps.indels.vcf #--pair_hmm_implementation JNI_LOGLESS_CACHING \ #-I /data/simulated/sim1M_pairs_final.bam \ #-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \ +#-I /data/broad/samples/joint_variant_calling/NA12878_high_coverage_alignment/NA12878.mapped.ILLUMINA.bwa.CEU.high_coverage_pcr_free.20130906.bam \ +#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly18.fasta \ #-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \ #-R /data/broad/samples/joint_variant_calling/broad_reference/ucsc.hg19.fasta \ #-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \ diff --git a/PairHMM_JNI/shift_template.c b/PairHMM_JNI/shift_template.c index 90160dcb3..db4a4f6f7 100644 --- a/PairHMM_JNI/shift_template.c +++ b/PairHMM_JNI/shift_template.c @@ -1,9 +1,8 @@ #ifdef PRECISION -#include "template.h" +#ifdef SIMD_TYPE_AVX - -inline void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut) +inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut) { IF_128 xlow , xhigh; @@ -33,7 +32,8 @@ inline void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE sh x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1); } -inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn) + +inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn) { IF_128 xlow , xhigh; @@ -44,10 +44,6 @@ inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TY /* extract xlow[3] */ IF_128 shiftOutL128; shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1); - /* extract xhigh[3] */ -// IF_MAIN_TYPE shiftOutH; -// shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2); -// shiftOut = shiftOutH.f; /* shift xlow */ xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3); /* shift xhigh */ @@ -63,6 +59,32 @@ inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TY x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1); } +#endif + +#ifdef SIMD_TYPE_SSE + +inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut) +{ + IF_MAIN_TYPE tempIn, tempOut; + tempIn.f = shiftIn; + /* extratc H */ + tempOut.i = VEC_EXTRACT_UNIT(x.i, SHIFT_CONST1); + shiftOut = tempOut.f; + /* shift */ + x.i = _mm_slli_si128(x.i, SHIFT_CONST2); + /* insert L */ + x.i = VEC_INSERT_UNIT(x.i , tempIn.i, SHIFT_CONST3); +} + +inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn) +{ + IF_MAIN_TYPE temp; temp.f = shiftIn; + /* shift */ + x.i = _mm_slli_si128(x.i, SHIFT_CONST2); + /* insert L */ + x.i = VEC_INSERT_UNIT(x.i , temp.i, SHIFT_CONST3); +} #endif +#endif diff --git a/PairHMM_JNI/sse_function_instantiations.cc b/PairHMM_JNI/sse_function_instantiations.cc new file mode 100644 index 000000000..38686ada2 --- /dev/null +++ b/PairHMM_JNI/sse_function_instantiations.cc @@ -0,0 +1,18 @@ +#include "template.h" + +#undef SIMD_TYPE +#undef SIMD_TYPE_AVX + +#define SIMD_TYPE sse +#define SIMD_TYPE_SSE + +#include "define-sse-float.h" +#include "shift_template.c" +#include "pairhmm-template-kernel.cc" + +#include "define-sse-double.h" +#include "shift_template.c" +#include "pairhmm-template-kernel.cc" + +template double compute_full_prob_ssed(testcase* tc, double* nextlog); +template float compute_full_prob_sses(testcase* tc, float* nextlog); diff --git a/PairHMM_JNI/template.h b/PairHMM_JNI/template.h index 52a4e4650..0ba4f843d 100644 --- a/PairHMM_JNI/template.h +++ b/PairHMM_JNI/template.h @@ -25,12 +25,25 @@ typedef union __attribute__((aligned(32))) { ALIGNED __m256i ALIGNED i; } ALIGNED mix_F ALIGNED; +typedef union __attribute__((aligned(32))) { + ALIGNED __m128 ALIGNED d; + ALIGNED __m64 ALIGNED s[2]; + ALIGNED float ALIGNED f[4]; + ALIGNED __m128i ALIGNED i; +} ALIGNED mix_F128 ALIGNED; + typedef union ALIGNED { __m128i vec ; __m128 vecf ; uint32_t masks[4] ; } MaskVec_F ; +typedef union ALIGNED { + __m64 vec ; + __m64 vecf ; + uint32_t masks[2] ; +} MaskVec_F128 ; + typedef union ALIGNED { ALIGNED __m128i ALIGNED i; @@ -50,12 +63,25 @@ typedef union __attribute__((aligned(32))) { ALIGNED __m256i ALIGNED i; } ALIGNED mix_D ALIGNED; +typedef union __attribute__((aligned(32))) { + ALIGNED __m128d ALIGNED d; + ALIGNED __m64 ALIGNED s[2]; + ALIGNED double ALIGNED f[2]; + ALIGNED __m128i ALIGNED i; +} ALIGNED mix_D128 ALIGNED; + typedef union ALIGNED { __m128i vec ; __m128d vecf ; uint64_t masks[2] ; } MaskVec_D ; +typedef union ALIGNED { + __m64 vec ; + __m64 vecf ; + uint64_t masks[1] ; +} MaskVec_D128 ; + typedef union ALIGNED { ALIGNED __m128i ALIGNED i; @@ -120,7 +146,6 @@ struct Context }; - typedef struct { int rslen, haplen; @@ -131,24 +156,11 @@ typedef struct int *irs; } testcase; - -template -std::string to_string(T obj) -{ - std::stringstream ss; - std::string ret_string; - ss.clear(); - ss << std::scientific << obj; - ss >> ret_string; - ss.clear(); - return ret_string; -} -void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); - int normalize(char c); -int read_testcase(testcase *tc, FILE* ifp); -int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false); +int read_testcase(testcase *tc, FILE* ifp=0); + +#define MIN_ACCEPTED 1e-28f #define NUM_DISTINCT_CHARS 5 #define AMBIG_CHAR 4 @@ -176,7 +188,6 @@ public: }; - #endif diff --git a/PairHMM_JNI/utils.cc b/PairHMM_JNI/utils.cc index 5b91fc7e9..94e2144c0 100644 --- a/PairHMM_JNI/utils.cc +++ b/PairHMM_JNI/utils.cc @@ -1,5 +1,7 @@ #include "headers.h" #include "template.h" +#include "utils.h" +#include "vector_defs.h" uint8_t ConvertChar::conversionTable[255]; float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0; @@ -31,6 +33,30 @@ bool is_sse42_supported() return ((ecx >> 20)&1) == 1; } +void initialize_function_pointers() +{ + //if(is_avx_supported()) + if(false) + { + cout << "Using AVX accelerated implementation of PairHMM\n"; + g_compute_full_prob_float = compute_full_prob_avxs; + g_compute_full_prob_double = compute_full_prob_avxd; + } + else + if(is_sse42_supported()) + { + cout << "Using SSE4.2 accelerated implementation of PairHMM\n"; + g_compute_full_prob_float = compute_full_prob_sses; + g_compute_full_prob_double = compute_full_prob_ssed; + } + else + { + cout << "Using un-vectorized C++ implementation of PairHMM\n"; + g_compute_full_prob_float = compute_full_prob; + g_compute_full_prob_double = compute_full_prob; + } +} + int normalize(char c) { return ((int) (c - 33)); @@ -214,12 +240,3 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat) return tokens.size(); } -void debug_dump(string filename, string s, bool to_append, bool add_newline) -{ - ofstream fptr; - fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); - fptr << s; - if(add_newline) - fptr << "\n"; - fptr.close(); -} diff --git a/PairHMM_JNI/utils.h b/PairHMM_JNI/utils.h index 8c244333e..8da8269b8 100644 --- a/PairHMM_JNI/utils.h +++ b/PairHMM_JNI/utils.h @@ -1,7 +1,22 @@ #ifndef PAIRHMM_UTIL_H #define PAIRHMM_UTIL_H -#define MIN_ACCEPTED 1e-28f + +template +std::string to_string(T obj) +{ + std::stringstream ss; + std::string ret_string; + ss.clear(); + ss << std::scientific << obj; + ss >> ret_string; + ss.clear(); + return ret_string; +} +void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); + +int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false); + bool is_avx_supported(); bool is_sse42_supported(); extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log); @@ -9,5 +24,5 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline); template NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); - +void initialize_function_pointers(); #endif diff --git a/PairHMM_JNI/vector_defs.h b/PairHMM_JNI/vector_defs.h new file mode 100644 index 000000000..c89f7f932 --- /dev/null +++ b/PairHMM_JNI/vector_defs.h @@ -0,0 +1,26 @@ +#undef SIMD_TYPE +#undef SIMD_TYPE_AVX +#undef SIMD_TYPE_SSE + +#define SIMD_TYPE avx +#define SIMD_TYPE_AVX + +#include "define-float.h" +#include "vector_function_prototypes.h" + +#include "define-double.h" +#include "vector_function_prototypes.h" + +#undef SIMD_TYPE +#undef SIMD_TYPE_AVX + +#define SIMD_TYPE sse +#define SIMD_TYPE_SSE + +#include "define-sse-float.h" +#include "vector_function_prototypes.h" + +#include "define-sse-double.h" +#include "vector_function_prototypes.h" + + diff --git a/PairHMM_JNI/vector_function_prototypes.h b/PairHMM_JNI/vector_function_prototypes.h new file mode 100644 index 000000000..2593986a6 --- /dev/null +++ b/PairHMM_JNI/vector_function_prototypes.h @@ -0,0 +1,19 @@ +void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut); +void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn); +void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]); +void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess); +void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt); +void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen); +template void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D); +template void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)( + int stripIdx, Context ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY, + _256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM , + _256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1, + UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM); +_256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256, + _256_TYPE distm, _256_TYPE _1_distm); +void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y, + UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1, + _256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel); +template NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL); + diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java index 82015d153..b785b8d21 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/HaplotypeCaller.java @@ -1086,7 +1086,7 @@ public class HaplotypeCaller extends ActiveRegionWalker, In referenceConfidenceModel.close(); //TODO remove the need to call close here for debugging, the likelihood output stream should be managed //TODO (open & close) at the walker, not the engine. - //likelihoodCalculationEngine.close(); + likelihoodCalculationEngine.close(); logger.info("Ran local assembly on " + result + " active regions"); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java index 0626f2268..04e64186f 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/LikelihoodCalculationEngine.java @@ -89,4 +89,6 @@ public interface LikelihoodCalculationEngine { */ public Map computeReadLikelihoods(AssemblyResultSet assemblyResultSet, Map> perSampleReadList); + + public void close(); } diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java index 0a82e1997..5ab49b531 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java @@ -165,8 +165,10 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation } } + @Override public void close() { if ( likelihoodsStream != null ) likelihoodsStream.close(); + pairHMMThreadLocal.get().close(); } private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){ diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java index a80800f82..ca3db5ed3 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java @@ -66,7 +66,8 @@ import java.util.Map; */ public class JNILoglessPairHMM extends LoglessPairHMM { - private static final boolean debug = true; //simulates ifdef + private static final boolean debug = false; //simulates ifdef + private static final boolean verify = debug || false; //simulates ifdef private static final boolean debug0_1 = false; //simulates ifdef private static final boolean debug1 = false; //simulates ifdef private static final boolean debug2 = false; @@ -90,6 +91,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM { } private native void jniGlobalInit(Class readDataHolderClass, Class haplotypeDataHolderClass); + private native void jniClose(); private static boolean isJNILoglessPairHMMLibraryLoaded = false; @@ -104,6 +106,13 @@ public class JNILoglessPairHMM extends LoglessPairHMM { } } + @Override + public void close() + { + jniClose(); + debugClose(); + } + //Used to test parts of the compute kernel separately private native void jniInitialize(final int readMaxLength, final int haplotypeMaxLength); private native static void jniInitializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP); @@ -126,7 +135,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM { @Override public void initialize(final int readMaxLength, final int haplotypeMaxLength) { - if(debug) + if(verify) super.initialize(readMaxLength, haplotypeMaxLength); if(debug3) { @@ -150,14 +159,14 @@ public class JNILoglessPairHMM extends LoglessPairHMM { public PerReadAlleleLikelihoodMap computeLikelihoods(final List reads, final Map alleleHaplotypeMap, final Map GCPArrayMap) { // (re)initialize the pairHMM only if necessary - final int readMaxLength = debug ? findMaxReadLength(reads) : 0; - final int haplotypeMaxLength = debug ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0; - if(debug) + final int readMaxLength = verify ? findMaxReadLength(reads) : 0; + final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0; + if(verify) { if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength) { initialize(readMaxLength, haplotypeMaxLength); } if ( ! initialized ) - throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug mode"); + throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode"); } int readListSize = reads.size(); int alleleHaplotypeMapSize = alleleHaplotypeMap.size(); @@ -221,7 +230,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM { likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]); ++idx; } - if(debug) + if(verify) { //to compare values likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); @@ -273,6 +282,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM { } ++numComputeLikelihoodCalls; //if(numComputeLikelihoodCalls == 5) + //jniClose(); //System.exit(0); return likelihoodMap; } @@ -299,11 +309,11 @@ public class JNILoglessPairHMM extends LoglessPairHMM { //} //System.out.println("#### END STACK TRACE ####"); // - if(debug1) - jniSubComputeReadLikelihoodGivenHaplotypeLog10(readBases.length, haplotypeBases.length, - readBases, haplotypeBases, readQuals, - insertionGOP, deletionGOP, overallGCP, - hapStartIndex); + if(debug1) + jniSubComputeReadLikelihoodGivenHaplotypeLog10(readBases.length, haplotypeBases.length, + readBases, haplotypeBases, readQuals, + insertionGOP, deletionGOP, overallGCP, + hapStartIndex); boolean doInitialization = (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length); if (doInitialization) { @@ -358,8 +368,8 @@ public class JNILoglessPairHMM extends LoglessPairHMM { ++numCalls; //if(numCalls == 100) //{ - //debugClose(); - //System.exit(0); + //debugClose(); + //System.exit(0); //} return Math.log10(finalSumProbabilities) - INITIAL_CONDITION_LOG10; } diff --git a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java index 3b4498776..7b09fe047 100644 --- a/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java +++ b/public/java/src/org/broadinstitute/sting/utils/pairhmm/PairHMM.java @@ -280,4 +280,7 @@ public abstract class PairHMM { return Math.min(haplotype1.length, haplotype2.length); } + + //Called at the end of all HC calls + public void close() { ; } }