1. Added TRISTATE_CORRECTION in pairhmm-template-kernel.cc (function
stripINITIALIZATION)
2. Added VEC_DIV macros to define-double.h and define-float.h
3. Edited initializeVectors to match Java
C++ original:
*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
Modified:
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
This commit is contained in:
Karthik Gururaj 2014-01-15 10:48:58 -08:00
parent 5fab96b7ee
commit 8240ea826e
9 changed files with 72 additions and 29 deletions

2
.gitignore vendored
View File

@ -4,3 +4,5 @@
tests
.deps
hmm_Mohammad
pairhmm-template-main
*.swp

View File

@ -1,4 +1,4 @@
#OMPCFLAGS=-fopenmp
OMPCFLAGS=-fopenmp
#OMPLDFLAGS=-lgomp
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
@ -7,7 +7,7 @@
JAVA_ROOT=/opt/jdk1.7.0_25/
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux
CFLAGS=-g -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
CXXFLAGS=$(CFLAGS)
CC=icc
@ -16,11 +16,12 @@ CXX=icc
LDFLAGS=-lm $(OMPLDFLAGS)
#BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
BIN:=libJNILoglessPairHMM.so #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
BIN:=libJNILoglessPairHMM.so pairhmm-template-main
#SOURCES=pairhmm-1-base.cc input.cc
SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc
OBJECTS=$(SOURCES:.cc=.o)
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc hmm_mask.cc
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
DEPDIR=.deps
DF=$(DEPDIR)/$(*).d
@ -28,11 +29,11 @@ all: $(BIN)
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
pairhmm-1-base: pairhmm-1-base.o input.o
$(CXX) -o $@ $^ $(LDFLAGS)
pairhmm-template-main: pairhmm-template-main.o hmm_mask.o
$(CXX) -fopenmp -o $@ $^ $(LDFLAGS)
libJNILoglessPairHMM.so: $(OBJECTS)
$(CXX) -shared -o libJNILoglessPairHMM.so $(OBJECTS)
libJNILoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) -shared -o $@ $(LIBOBJECTS)
%.o: %.cc
@mkdir -p $(DEPDIR)

View File

@ -23,6 +23,7 @@
#undef VEC_ADD
#undef VEC_SUB
#undef VEC_MUL
#undef VEC_DIV
#undef VEC_BLEND
#undef VEC_BLENDV
#undef VEC_CAST_256_128
@ -82,6 +83,9 @@
#define VEC_MUL(__v1, __v2) \
_mm256_mul_pd(__v1, __v2)
#define VEC_DIV(__v1, __v2) \
_mm256_div_pd(__v1, __v2)
#define VEC_BLEND(__v1, __v2, __mask) \
_mm256_blend_pd(__v1, __v2, __mask)

View File

@ -23,6 +23,7 @@
#undef VEC_ADD(__v1, __v2)
#undef VEC_SUB(__v1, __v2)
#undef VEC_MUL(__v1, __v2)
#undef VEC_DIV(__v1, __v2)
#undef VEC_BLEND(__v1, __v2, __mask)
#undef VEC_BLENDV(__v1, __v2, __maskV)
#undef VEC_CAST_256_128(__v1)
@ -83,6 +84,9 @@
#define VEC_MUL(__v1, __v2) \
_mm256_mul_ps(__v1, __v2)
#define VEC_DIV(__v1, __v2) \
_mm256_div_ps(__v1, __v2)
#define VEC_BLEND(__v1, __v2, __mask) \
_mm256_blend_ps(__v1, __v2, __mask)

View File

@ -438,7 +438,7 @@ void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) {
//cout << "Finished validating entry " << endl ;
}
#ifdef HMM_MASK_MAIN
int main () {
#define BATCH_SIZE 10000
@ -482,3 +482,4 @@ int main () {
return 0 ;
}
#endif

View File

@ -15,6 +15,10 @@
#include <immintrin.h>
#include <emmintrin.h>
#include <omp.h>
using namespace std;
//#define DEBUG3 1
#define DEBUG 1
#include "template.h"
@ -22,7 +26,6 @@
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
using namespace std;
#define MM 0
@ -32,22 +35,17 @@ using namespace std;
#define MY 4
#define YY 5
//#define DEBUG3 1
#define DEBUG 1
template<class T>
string to_string(T obj)
class LoadTimeInitializer
{
stringstream ss;
string ret_string;
ss.clear();
ss << std::scientific << obj;
ss >> ret_string;
ss.clear();
return ret_string;
}
public:
LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
}
};
LoadTimeInitializer g_load_time_initializer;
void debug_dump(string filename, string s, bool to_append, bool add_newline=true)
void debug_dump(string filename, string s, bool to_append, bool add_newline)
{
ofstream fptr;
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);

View File

@ -157,8 +157,18 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
#ifdef DEBUG3
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1) ),true);
debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true);
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1) ),true);
debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1) ),true);
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1) ),true);
debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1) ),true);
#endif
//*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
//*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
}
NUMBER *ptr_distm1D = (NUMBER *)distm1D;
@ -166,6 +176,9 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
{
int _q = tc->q[r-1] & 127;
ptr_distm1D[r-1] = ctx.ph2pr[_q];
#ifdef DEBUG3
debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true);
#endif
}
}
@ -187,13 +200,18 @@ template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)
NUMBER zero = ctx._(0.0);
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
#define TRISTATE_CORRECTION_FACTOR 3.0
UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR);
/* compare rs and N */
//rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH));
//rsN.d = VEC_CMP_EQ(N_packed256, rs);
distm = distm1D[i];
_1_distm = VEC_SUB(packed1.d, distm);
_1_distm = VEC_SUB(packed1.d, distm);
#ifndef DO_NOT_USE_TRISTATE_CORRECTION
distm = VEC_DIV(distm, packed3.d);
#endif
/* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */
M_t_2.d = VEC_SET1_VAL(zero);

View File

@ -16,7 +16,7 @@
#define BATCH_SIZE 10000
//#define RUN_HYBRID
uint8_t ConvertChar::conversionTable[255] ;
//uint8_t ConvertChar::conversionTable[255] ;
int thread_level_parallelism_enabled = false ;
double getCurrClk() {

View File

@ -13,6 +13,7 @@
#include <immintrin.h>
#include <ctype.h>
#include <string>
#define MROWS 500
#define MCOLS 1000
@ -132,6 +133,20 @@ typedef struct
int *irs;
} testcase;
template<class T>
std::string to_string(T obj)
{
stringstream ss;
string ret_string;
ss.clear();
ss << std::scientific << obj;
ss >> ret_string;
ss.clear();
return ret_string;
}
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
int normalize(char c);