Changes:

1. Added TRISTATE_CORRECTION in pairhmm-template-kernel.cc (function stripINITIALIZATION) 2. Added VEC_DIV macros to define-double.h and define-float.h 3. Edited initializeVectors to match Java C++ original: *(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d]; *(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c]; Modified: *(ptr_p_MY+r-1) = ctx.ph2pr[_d]; *(ptr_p_YY+r-1) = ctx.ph2pr[_c];
2014-01-15 10:48:58 -08:00 · 2014-01-15 10:48:58 -08:00 · 8240ea826e
parent 5fab96b7ee
commit 8240ea826e
9 changed files with 72 additions and 29 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,5 @@
 tests
 .deps
 hmm_Mohammad
+pairhmm-template-main
+*.swp
--- a/19
+++ b/19
@ -1,4 +1,4 @@
-#OMPCFLAGS=-fopenmp
+OMPCFLAGS=-fopenmp
 #OMPLDFLAGS=-lgomp

 #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q            -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
@ -7,7 +7,7 @@
 JAVA_ROOT=/opt/jdk1.7.0_25/
 JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux

-CFLAGS=-g -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
+CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX

 CXXFLAGS=$(CFLAGS)
 CC=icc
@ -16,11 +16,12 @@ CXX=icc
 LDFLAGS=-lm $(OMPLDFLAGS)

 #BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
-BIN:=libJNILoglessPairHMM.so #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
+BIN:=libJNILoglessPairHMM.so pairhmm-template-main

 #SOURCES=pairhmm-1-base.cc input.cc 
-SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc
-OBJECTS=$(SOURCES:.cc=.o)
+LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc
+SOURCES=$(LIBSOURCES) pairhmm-template-main.cc
+LIBOBJECTS=$(LIBSOURCES:.cc=.o)
 DEPDIR=.deps
 DF=$(DEPDIR)/$(*).d

@ -28,11 +29,11 @@ all: $(BIN)

 -include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))

-pairhmm-1-base:	pairhmm-1-base.o input.o
-	$(CXX) -o $@ $^ $(LDFLAGS)
+pairhmm-template-main:	pairhmm-template-main.o hmm_mask.o
+	$(CXX) -fopenmp -o $@ $^ $(LDFLAGS)

-libJNILoglessPairHMM.so: $(OBJECTS) 
-	$(CXX) -shared -o libJNILoglessPairHMM.so $(OBJECTS) 
+libJNILoglessPairHMM.so: $(LIBOBJECTS) 
+	$(CXX) -shared -o $@ $(LIBOBJECTS) 

 %.o: %.cc
 	@mkdir -p $(DEPDIR)
--- a/define-double.h
+++ b/define-double.h
@ -23,6 +23,7 @@
        #undef VEC_ADD
 	#undef VEC_SUB
        #undef VEC_MUL
+        #undef VEC_DIV
        #undef VEC_BLEND
        #undef VEC_BLENDV
 	#undef VEC_CAST_256_128
@ -82,6 +83,9 @@
 #define VEC_MUL(__v1, __v2)                     \
  _mm256_mul_pd(__v1, __v2)

+#define VEC_DIV(__v1, __v2)                     \
+  _mm256_div_pd(__v1, __v2)
+
 #define VEC_BLEND(__v1, __v2, __mask)           \
  _mm256_blend_pd(__v1, __v2, __mask)

--- a/define-float.h
+++ b/define-float.h
@ -23,6 +23,7 @@
 	#undef VEC_ADD(__v1, __v2)
        #undef VEC_SUB(__v1, __v2)
 	#undef VEC_MUL(__v1, __v2)
+	#undef VEC_DIV(__v1, __v2)
 	#undef VEC_BLEND(__v1, __v2, __mask)
 	#undef VEC_BLENDV(__v1, __v2, __maskV)
        #undef VEC_CAST_256_128(__v1)
@ -83,6 +84,9 @@
 #define VEC_MUL(__v1, __v2)                     \
  _mm256_mul_ps(__v1, __v2)

+#define VEC_DIV(__v1, __v2)                     \
+  _mm256_div_ps(__v1, __v2)
+
 #define VEC_BLEND(__v1, __v2, __mask)           \
  _mm256_blend_ps(__v1, __v2, __mask)

--- a/hmm_mask.cc
+++ b/hmm_mask.cc
@ -438,7 +438,7 @@ void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) {
  //cout << "Finished validating entry " << endl ;
 }

-
+#ifdef HMM_MASK_MAIN
 int main () {

  #define BATCH_SIZE 10000
@ -482,3 +482,4 @@ int main () {

  return 0 ;
 }
+#endif
--- a/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc
+++ b/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc
@ -15,6 +15,10 @@
 #include <immintrin.h>
 #include <emmintrin.h>
 #include <omp.h>
+using namespace std;
+//#define DEBUG3 1
+#define DEBUG 1
+

 #include "template.h"

@ -22,7 +26,6 @@
 #include "shift_template.c"
 #include "pairhmm-template-kernel.cc"

-using namespace std;


 #define MM 0
@ -32,22 +35,17 @@ using namespace std;
 #define MY 4
 #define YY 5

-//#define DEBUG3 1
-#define DEBUG 1
-
-template<class T>
-string to_string(T obj)
+class LoadTimeInitializer
 {
-  stringstream ss;
-  string ret_string;
-  ss.clear();
-  ss << std::scientific << obj;
-  ss >> ret_string;
-  ss.clear();
-  return ret_string;
-}
+  public:
+    LoadTimeInitializer()		//will be called when library is loaded
+    {
+      ConvertChar::init();
+    }
+};
+LoadTimeInitializer g_load_time_initializer;

-void debug_dump(string filename, string s, bool to_append, bool add_newline=true)
+void debug_dump(string filename, string s, bool to_append, bool add_newline)
 {
  ofstream fptr;
  fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
--- a/pairhmm-template-kernel.cc
+++ b/pairhmm-template-kernel.cc
@ -157,8 +157,18 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
                *(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
                *(ptr_p_MX+r-1) = ctx.ph2pr[_i];
                *(ptr_p_XX+r-1) = ctx.ph2pr[_c];
-                *(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
-                *(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
+                *(ptr_p_MY+r-1) = ctx.ph2pr[_d];
+                *(ptr_p_YY+r-1) = ctx.ph2pr[_c];
+#ifdef DEBUG3
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1)  ),true);
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true);
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1)  ),true);
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1)  ),true);
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1)  ),true);
+		debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1)  ),true);
+#endif
+		//*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
+		//*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
        }

        NUMBER *ptr_distm1D = (NUMBER *)distm1D;
@ -166,6 +176,9 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
        {
                int _q = tc->q[r-1] & 127;
                ptr_distm1D[r-1] = ctx.ph2pr[_q];
+#ifdef DEBUG3
+		debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true);
+#endif
        }
 }

@ -187,13 +200,18 @@ template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)
 	NUMBER zero = ctx._(0.0);        
 	NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
 	UNION_TYPE packed1;  packed1.d = VEC_SET1_VAL(1.0);
+#define TRISTATE_CORRECTION_FACTOR 3.0
+	UNION_TYPE packed3;  packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR);
 	
        /* compare rs and N */                                           
        //rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH));        
        //rsN.d = VEC_CMP_EQ(N_packed256, rs);                             
                                                                         
        distm = distm1D[i];                                    
-        _1_distm = VEC_SUB(packed1.d, distm);                  
+        _1_distm = VEC_SUB(packed1.d, distm);
+#ifndef DO_NOT_USE_TRISTATE_CORRECTION
+	distm = VEC_DIV(distm, packed3.d); 
+#endif
                                                                         
        /* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */        
        M_t_2.d = VEC_SET1_VAL(zero);                                    
--- a/pairhmm-template-main.cc
+++ b/pairhmm-template-main.cc
@ -16,7 +16,7 @@
 #define BATCH_SIZE  10000
 //#define RUN_HYBRID

-uint8_t ConvertChar::conversionTable[255] ;
+//uint8_t ConvertChar::conversionTable[255] ;
 int thread_level_parallelism_enabled = false ;

 double getCurrClk() {
--- a/template.h
+++ b/template.h
@ -13,6 +13,7 @@
 #include <immintrin.h>

 #include <ctype.h>
+#include <string>

 #define MROWS  500
 #define MCOLS  1000
@ -132,6 +133,20 @@ typedef struct
        int *irs;
 } testcase;

+
+template<class T>
+std::string to_string(T obj)
+{
+  stringstream ss;
+  string ret_string;
+  ss.clear();
+  ss << std::scientific << obj;
+  ss >> ret_string;
+  ss.clear();
+  return ret_string;
+}
+void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
+
 int normalize(char c);