diff --git a/.gitignore b/.gitignore index 16e86f25d..dcc44daa4 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,5 @@ tests .deps hmm_Mohammad +pairhmm-template-main +*.swp diff --git a/Makefile b/Makefile index df5dbc463..f858a7241 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -#OMPCFLAGS=-fopenmp +OMPCFLAGS=-fopenmp #OMPLDFLAGS=-lgomp #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas @@ -7,7 +7,7 @@ JAVA_ROOT=/opt/jdk1.7.0_25/ JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux -CFLAGS=-g -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX +CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX CXXFLAGS=$(CFLAGS) CC=icc @@ -16,11 +16,12 @@ CXX=icc LDFLAGS=-lm $(OMPLDFLAGS) #BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev -BIN:=libJNILoglessPairHMM.so #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev +BIN:=libJNILoglessPairHMM.so pairhmm-template-main #SOURCES=pairhmm-1-base.cc input.cc -SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc -OBJECTS=$(SOURCES:.cc=.o) +LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc +SOURCES=$(LIBSOURCES) pairhmm-template-main.cc +LIBOBJECTS=$(LIBSOURCES:.cc=.o) DEPDIR=.deps DF=$(DEPDIR)/$(*).d @@ -28,11 +29,11 @@ all: $(BIN) -include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d)) -pairhmm-1-base: pairhmm-1-base.o input.o - $(CXX) -o $@ $^ $(LDFLAGS) +pairhmm-template-main: pairhmm-template-main.o hmm_mask.o + $(CXX) -fopenmp -o $@ $^ $(LDFLAGS) -libJNILoglessPairHMM.so: $(OBJECTS) - $(CXX) -shared -o libJNILoglessPairHMM.so $(OBJECTS) +libJNILoglessPairHMM.so: $(LIBOBJECTS) + $(CXX) -shared -o $@ $(LIBOBJECTS) %.o: %.cc @mkdir -p $(DEPDIR) diff --git a/define-double.h b/define-double.h index b2de7520a..5d31741d6 100644 --- a/define-double.h +++ b/define-double.h @@ -23,6 +23,7 @@ #undef VEC_ADD #undef VEC_SUB #undef VEC_MUL + #undef VEC_DIV #undef VEC_BLEND #undef VEC_BLENDV #undef VEC_CAST_256_128 @@ -82,6 +83,9 @@ #define VEC_MUL(__v1, __v2) \ _mm256_mul_pd(__v1, __v2) +#define VEC_DIV(__v1, __v2) \ + _mm256_div_pd(__v1, __v2) + #define VEC_BLEND(__v1, __v2, __mask) \ _mm256_blend_pd(__v1, __v2, __mask) diff --git a/define-float.h b/define-float.h index 4cba9a179..0e2822b2d 100644 --- a/define-float.h +++ b/define-float.h @@ -23,6 +23,7 @@ #undef VEC_ADD(__v1, __v2) #undef VEC_SUB(__v1, __v2) #undef VEC_MUL(__v1, __v2) + #undef VEC_DIV(__v1, __v2) #undef VEC_BLEND(__v1, __v2, __mask) #undef VEC_BLENDV(__v1, __v2, __maskV) #undef VEC_CAST_256_128(__v1) @@ -83,6 +84,9 @@ #define VEC_MUL(__v1, __v2) \ _mm256_mul_ps(__v1, __v2) +#define VEC_DIV(__v1, __v2) \ + _mm256_div_ps(__v1, __v2) + #define VEC_BLEND(__v1, __v2, __mask) \ _mm256_blend_ps(__v1, __v2, __mask) diff --git a/hmm_mask.cc b/hmm_mask.cc index aace7acf2..f128a39d9 100644 --- a/hmm_mask.cc +++ b/hmm_mask.cc @@ -438,7 +438,7 @@ void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) { //cout << "Finished validating entry " << endl ; } - +#ifdef HMM_MASK_MAIN int main () { #define BATCH_SIZE 10000 @@ -482,3 +482,4 @@ int main () { return 0 ; } +#endif diff --git a/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc b/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc index 80065b8c9..29668a306 100644 --- a/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc +++ b/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc @@ -15,6 +15,10 @@ #include #include #include +using namespace std; +//#define DEBUG3 1 +#define DEBUG 1 + #include "template.h" @@ -22,7 +26,6 @@ #include "shift_template.c" #include "pairhmm-template-kernel.cc" -using namespace std; #define MM 0 @@ -32,22 +35,17 @@ using namespace std; #define MY 4 #define YY 5 -//#define DEBUG3 1 -#define DEBUG 1 - -template -string to_string(T obj) +class LoadTimeInitializer { - stringstream ss; - string ret_string; - ss.clear(); - ss << std::scientific << obj; - ss >> ret_string; - ss.clear(); - return ret_string; -} + public: + LoadTimeInitializer() //will be called when library is loaded + { + ConvertChar::init(); + } +}; +LoadTimeInitializer g_load_time_initializer; -void debug_dump(string filename, string s, bool to_append, bool add_newline=true) +void debug_dump(string filename, string s, bool to_append, bool add_newline) { ofstream fptr; fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); diff --git a/pairhmm-template-kernel.cc b/pairhmm-template-kernel.cc index dd71373b0..24bcf7d1a 100644 --- a/pairhmm-template-kernel.cc +++ b/pairhmm-template-kernel.cc @@ -157,8 +157,18 @@ template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS *(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c]; *(ptr_p_MX+r-1) = ctx.ph2pr[_i]; *(ptr_p_XX+r-1) = ctx.ph2pr[_c]; - *(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; - *(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; + *(ptr_p_MY+r-1) = ctx.ph2pr[_d]; + *(ptr_p_YY+r-1) = ctx.ph2pr[_c]; +#ifdef DEBUG3 + debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1) ),true); + debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true); + debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1) ),true); + debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1) ),true); + debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1) ),true); + debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1) ),true); +#endif + //*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; + //*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; } NUMBER *ptr_distm1D = (NUMBER *)distm1D; @@ -166,6 +176,9 @@ template void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS { int _q = tc->q[r-1] & 127; ptr_distm1D[r-1] = ctx.ph2pr[_q]; +#ifdef DEBUG3 + debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true); +#endif } } @@ -187,13 +200,18 @@ template inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION) NUMBER zero = ctx._(0.0); NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen); UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0); +#define TRISTATE_CORRECTION_FACTOR 3.0 + UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR); /* compare rs and N */ //rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH)); //rsN.d = VEC_CMP_EQ(N_packed256, rs); distm = distm1D[i]; - _1_distm = VEC_SUB(packed1.d, distm); + _1_distm = VEC_SUB(packed1.d, distm); +#ifndef DO_NOT_USE_TRISTATE_CORRECTION + distm = VEC_DIV(distm, packed3.d); +#endif /* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */ M_t_2.d = VEC_SET1_VAL(zero); diff --git a/pairhmm-template-main.cc b/pairhmm-template-main.cc index f20755b6b..87d50eb5b 100644 --- a/pairhmm-template-main.cc +++ b/pairhmm-template-main.cc @@ -16,7 +16,7 @@ #define BATCH_SIZE 10000 //#define RUN_HYBRID -uint8_t ConvertChar::conversionTable[255] ; +//uint8_t ConvertChar::conversionTable[255] ; int thread_level_parallelism_enabled = false ; double getCurrClk() { diff --git a/template.h b/template.h index ee8036a11..4c8a4b5e3 100644 --- a/template.h +++ b/template.h @@ -13,6 +13,7 @@ #include #include +#include #define MROWS 500 #define MCOLS 1000 @@ -132,6 +133,20 @@ typedef struct int *irs; } testcase; + +template +std::string to_string(T obj) +{ + stringstream ss; + string ret_string; + ss.clear(); + ss << std::scientific << obj; + ss >> ret_string; + ss.clear(); + return ret_string; +} +void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); + int normalize(char c);