Merge branch 'master' of /home/karthikg/broad/archive/hmm_intra into intel_pairhmm
This commit is contained in:
commit
b57de8eec1
|
|
@ -0,0 +1,10 @@
|
|||
.svn
|
||||
*.o
|
||||
*.so
|
||||
tests
|
||||
.deps
|
||||
hmm_Mohammad
|
||||
pairhmm-template-main
|
||||
*.swp
|
||||
checker
|
||||
reformat
|
||||
|
|
@ -0,0 +1,47 @@
|
|||
OMPCFLAGS=-fopenmp
|
||||
#OMPLDFLAGS=-lgomp
|
||||
|
||||
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
|
||||
JAVA_ROOT=/opt/jdk1.7.0_25/
|
||||
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux
|
||||
|
||||
CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
|
||||
|
||||
CXXFLAGS=$(CFLAGS)
|
||||
CC=icc
|
||||
CXX=icc
|
||||
|
||||
LDFLAGS=-lm $(OMPLDFLAGS)
|
||||
|
||||
#BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
|
||||
BIN:=libJNILoglessPairHMM.so pairhmm-template-main checker
|
||||
|
||||
#SOURCES=pairhmm-1-base.cc input.cc
|
||||
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc convert_char.cc
|
||||
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
|
||||
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
|
||||
DEPDIR=.deps
|
||||
DF=$(DEPDIR)/$(*).d
|
||||
|
||||
all: $(BIN)
|
||||
|
||||
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
|
||||
|
||||
checker: pairhmm-1-base.o convert_char.o
|
||||
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
pairhmm-template-main: pairhmm-template-main.o convert_char.o
|
||||
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libJNILoglessPairHMM.so: $(LIBOBJECTS)
|
||||
$(CXX) $(OMPCFLAGS) -shared -o $@ $(LIBOBJECTS)
|
||||
|
||||
%.o: %.cc
|
||||
@mkdir -p $(DEPDIR)
|
||||
$(COMPILE.cpp) -MMD -MF $(DF) $(JNI_COMPILATION_FLAGS) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
|
||||
|
||||
clean:
|
||||
rm -rf $(BIN) *.o $(DEPDIR)
|
||||
|
|
@ -0,0 +1,186 @@
|
|||
#include "template.h"
|
||||
uint8_t ConvertChar::conversionTable[255];
|
||||
using namespace std;
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
}
|
||||
|
||||
int read_testcase(testcase *tc, FILE* ifp)
|
||||
{
|
||||
char *q, *i, *d, *c, *line = NULL;
|
||||
int _q, _i, _d, _c;
|
||||
int x, size = 0;
|
||||
ssize_t read;
|
||||
|
||||
read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp);
|
||||
if (read == -1)
|
||||
return -1;
|
||||
|
||||
|
||||
tc->hap = (char *) malloc(size);
|
||||
tc->rs = (char *) malloc(size);
|
||||
q = (char *) malloc(size);
|
||||
i = (char *) malloc(size);
|
||||
d = (char *) malloc(size);
|
||||
c = (char *) malloc(size);
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
|
||||
return -1;
|
||||
|
||||
tc->haplen = strlen(tc->hap);
|
||||
tc->rslen = strlen(tc->rs);
|
||||
assert(tc->rslen < MROWS);
|
||||
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
||||
//tc->q = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->i = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->d = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->c = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
|
||||
for (x = 0; x < tc->rslen; x++)
|
||||
{
|
||||
_q = normalize(q[x]);
|
||||
_i = normalize(i[x]);
|
||||
_d = normalize(d[x]);
|
||||
_c = normalize(c[x]);
|
||||
tc->q[x] = (_q < 6) ? 6 : _q;
|
||||
tc->i[x] = _i;
|
||||
tc->d[x] = _d;
|
||||
tc->c[x] = _c;
|
||||
tc->irs[x] = tc->rs[x];
|
||||
}
|
||||
for (x = 0; x < tc->haplen; x++)
|
||||
tc->ihap[x] = tc->hap[x];
|
||||
|
||||
|
||||
free(q);
|
||||
free(i);
|
||||
free(d);
|
||||
free(c);
|
||||
free(line);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned MAX_LINE_LENGTH = 65536;
|
||||
int convToInt(std::string s)
|
||||
{
|
||||
int i;
|
||||
std::istringstream strin(s);
|
||||
strin >> i;
|
||||
return i;
|
||||
}
|
||||
|
||||
void tokenize(std::ifstream& fptr, std::vector<std::string>& tokens)
|
||||
{
|
||||
int i = 0;
|
||||
std::string tmp;
|
||||
std::vector<std::string> myVec;
|
||||
vector<char> line;
|
||||
line.clear();
|
||||
line.resize(MAX_LINE_LENGTH);
|
||||
vector<char> tmpline;
|
||||
tmpline.clear();
|
||||
tmpline.resize(MAX_LINE_LENGTH);
|
||||
myVec.clear();
|
||||
|
||||
while(!fptr.eof())
|
||||
{
|
||||
i = 0;
|
||||
bool still_read_line = true;
|
||||
unsigned line_position = 0;
|
||||
while(still_read_line)
|
||||
{
|
||||
fptr.getline(&(tmpline[0]), MAX_LINE_LENGTH);
|
||||
if(line_position + MAX_LINE_LENGTH > line.size())
|
||||
line.resize(2*line.size());
|
||||
for(unsigned i=0;i<MAX_LINE_LENGTH && tmpline[i] != '\0';++i,++line_position)
|
||||
line[line_position] = tmpline[i];
|
||||
if(fptr.eof() || !fptr.fail())
|
||||
{
|
||||
still_read_line = false;
|
||||
line[line_position++] = '\0';
|
||||
}
|
||||
}
|
||||
std::istringstream kap(&(line[0]));
|
||||
|
||||
while(!kap.eof())
|
||||
{
|
||||
kap >> std::skipws >> tmp;
|
||||
if(tmp != "")
|
||||
{
|
||||
myVec.push_back(tmp);
|
||||
++i;
|
||||
//std::cout <<tmp <<"#";
|
||||
}
|
||||
tmp = "";
|
||||
}
|
||||
//std::cout << "\n";
|
||||
if(myVec.size() > 0)
|
||||
break;
|
||||
}
|
||||
tokens.clear();
|
||||
//std::cout << "Why "<<myVec.size()<<"\n";
|
||||
tokens.resize(myVec.size());
|
||||
for(i=0;i<(int)myVec.size();++i)
|
||||
tokens[i] = myVec[i];
|
||||
line.clear();
|
||||
tmpline.clear();
|
||||
}
|
||||
|
||||
int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
||||
{
|
||||
static bool first_call = true;
|
||||
vector<string> tokens;
|
||||
tokens.clear();
|
||||
tokenize(fptr, tokens);
|
||||
if(tokens.size() == 0)
|
||||
return -1;
|
||||
tc->hap = new char[tokens[0].size()+2];
|
||||
tc->haplen = tokens[0].size();
|
||||
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
|
||||
tc->rs = new char[tokens[1].size()+2];
|
||||
tc->rslen = tokens[1].size();
|
||||
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
|
||||
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
|
||||
assert(tokens.size() == 2 + 4*(tc->rslen));
|
||||
assert(tc->rslen < MROWS);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->q[j] = convToInt(tokens[2+0*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->i[j] = convToInt(tokens[2+1*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->d[j] = convToInt(tokens[2+2*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->c[j] = convToInt(tokens[2+3*tc->rslen+j]);
|
||||
|
||||
if(reformat)
|
||||
{
|
||||
ofstream ofptr;
|
||||
ofptr.open("reformat/debug_dump.txt",first_call ? ios::out : ios::app);
|
||||
assert(ofptr.is_open());
|
||||
ofptr << tokens[0] << " ";
|
||||
ofptr << tokens[1] << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->q[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->i[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->d[j]+33));
|
||||
ofptr << " ";
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
ofptr << ((char)(tc->c[j]+33));
|
||||
ofptr << " 0 false\n";
|
||||
|
||||
ofptr.close();
|
||||
first_call = false;
|
||||
}
|
||||
|
||||
|
||||
return tokens.size();
|
||||
}
|
||||
|
|
@ -0,0 +1,158 @@
|
|||
#include <iostream>
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef _256_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef MAVX_COUNT
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef print256b
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS
|
||||
#undef _256_INT_TYPE
|
||||
#endif
|
||||
|
||||
#define PRECISION d
|
||||
#define MAIN_TYPE double
|
||||
#define MAIN_TYPE_SIZE 64
|
||||
#define UNION_TYPE mix_D
|
||||
#define IF_128 IF_128d
|
||||
#define IF_MAIN_TYPE IF_64
|
||||
#define SHIFT_CONST1 8
|
||||
#define SHIFT_CONST2 1
|
||||
#define SHIFT_CONST3 8
|
||||
#define _128_TYPE __m128d
|
||||
#define _256_TYPE __m256d
|
||||
#define _256_INT_TYPE __m256i
|
||||
#define AVX_LENGTH 4
|
||||
#define MAVX_COUNT (MROWS+7)/AVX_LENGTH
|
||||
#define HAP_TYPE __m128i
|
||||
#define MASK_TYPE uint64_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFF
|
||||
#define MASK_VEC MaskVec_D
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_pd()
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_pd(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_pd(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm256_sub_pd(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_pd(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_pd(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm256_blend_pd(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_pd(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_CAST_256_128(__v1) \
|
||||
_mm256_castpd256_pd128 (__v1)
|
||||
|
||||
#define VEC_EXTRACT_128(__v1, __im) \
|
||||
_mm256_extractf128_pd (__v1, __im)
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi64(__v1, __im)
|
||||
|
||||
#define VEC_SET1_VAL128(__val) \
|
||||
_mm_set1_pd(__val)
|
||||
|
||||
#define VEC_MOVE(__v1, __val) \
|
||||
_mm_move_sd(__v1, __val)
|
||||
|
||||
#define VEC_CAST_128_256(__v1) \
|
||||
_mm256_castpd128_pd256(__v1)
|
||||
|
||||
#define VEC_INSERT_VAL(__v1, __val, __pos) \
|
||||
_mm256_insertf128_pd(__v1, __val, __pos)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm256_cvtepi32_pd(__v1)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm256_set1_pd(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm256_cvtepi32_pd(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm256_cvtepi32_pd(_mm_load_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm256_cmp_pd(__v1, __v2, _CMP_EQ_OQ)
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm256_set_pd(zero, zero, zero, __val);
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
__v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0)
|
||||
|
||||
#define print256b(__v1) \
|
||||
print256bDP(__v1)
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castpd128_pd256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_pd(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi64(__vs, 1)
|
||||
|
||||
|
||||
#define COMPARE_VECS(__v1, __v2, __first, __last) { \
|
||||
double* ptr1 = (double*) (&__v1) ; \
|
||||
double* ptr2 = (double*) (&__v2) ; \
|
||||
for (int ei=__first; ei <= __last; ++ei) { \
|
||||
if (ptr1[ei] != ptr2[ei]) { \
|
||||
std::cout << "Double Mismatch at " << ei << ": " \
|
||||
<< ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \
|
||||
exit(0) ; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
|
@ -0,0 +1,159 @@
|
|||
#include <iostream>
|
||||
|
||||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef _256_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef MAVX_COUNT
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef print256b(__v1)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#endif
|
||||
|
||||
#define PRECISION s
|
||||
|
||||
#define MAIN_TYPE float
|
||||
#define MAIN_TYPE_SIZE 32
|
||||
#define UNION_TYPE mix_F
|
||||
#define IF_128 IF_128f
|
||||
#define IF_MAIN_TYPE IF_32
|
||||
#define SHIFT_CONST1 12
|
||||
#define SHIFT_CONST2 3
|
||||
#define SHIFT_CONST3 4
|
||||
#define _128_TYPE __m128
|
||||
#define _256_TYPE __m256
|
||||
#define _256_INT_TYPE __m256i
|
||||
#define AVX_LENGTH 8
|
||||
#define MAVX_COUNT (MROWS+7)/AVX_LENGTH
|
||||
#define HAP_TYPE UNION_TYPE
|
||||
#define MASK_TYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
#define MASK_VEC MaskVec_F
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_ps()
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm256_sub_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm256_blend_ps(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_CAST_256_128(__v1) \
|
||||
_mm256_castps256_ps128 (__v1)
|
||||
|
||||
#define VEC_EXTRACT_128(__v1, __im) \
|
||||
_mm256_extractf128_ps (__v1, __im)
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi32(__v1, __im)
|
||||
|
||||
#define VEC_SET1_VAL128(__val) \
|
||||
_mm_set1_ps(__val)
|
||||
|
||||
#define VEC_MOVE(__v1, __val) \
|
||||
_mm_move_ss(__v1, __val)
|
||||
|
||||
#define VEC_CAST_128_256(__v1) \
|
||||
_mm256_castps128_ps256(__v1)
|
||||
|
||||
#define VEC_INSERT_VAL(__v1, __val, __pos) \
|
||||
_mm256_insertf128_ps(__v1, __val, __pos)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm256_cvtepi32_ps(__v1.i)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm256_set1_ps(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm256_cvtepi32_ps(_mm256_set1_epi32(__ch))
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm256_cvtepi32_ps(_mm256_loadu_si256((__m256i const *)__addr))
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm256_cmp_ps(__v1, __v2, _CMP_EQ_OQ)
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm256_set_ps(zero, zero, zero, zero, zero, zero, zero, __val);
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
_vector_shift_lasts(__v1, __val.f);
|
||||
|
||||
#define print256b(__v1) \
|
||||
print256bFP(__v1)
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castps128_ps256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi32(__vs, 1)
|
||||
|
||||
#define COMPARE_VECS(__v1, __v2, __first, __last) { \
|
||||
float* ptr1 = (float*) (&__v1) ; \
|
||||
float* ptr2 = (float*) (&__v2) ; \
|
||||
for (int ei=__first; ei <= __last; ++ei) { \
|
||||
if (ptr1[ei] != ptr2[ei]) { \
|
||||
std::cout << "Float Mismatch at " << ei << ": " \
|
||||
<< ptr1[ei] << " vs. " << ptr2[ei] << std::endl ; \
|
||||
exit(0) ; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,451 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
// /usr/intel/pkgs/icc/13.0.0e/bin/icc -o ed -O3 ed.cpp -xAVX -openmp -openmp-link static
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <malloc.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <memory.h>
|
||||
#include "template.h"
|
||||
|
||||
using namespace std ;
|
||||
|
||||
#define CHECK_MASK_CORRECTNESS
|
||||
|
||||
template <class T>
|
||||
string getBinaryStr (T val, int numBitsToWrite) {
|
||||
|
||||
ostringstream oss ;
|
||||
uint64_t mask = ((T) 0x1) << (numBitsToWrite-1) ;
|
||||
for (int i=numBitsToWrite-1; i >= 0; --i) {
|
||||
oss << ((val & mask) >> i) ;
|
||||
mask >>= 1 ;
|
||||
}
|
||||
return oss.str() ;
|
||||
}
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
}
|
||||
|
||||
|
||||
uint8_t ConvertChar::conversionTable[255] ;
|
||||
|
||||
int read_testcase(testcase *tc, FILE* ifp)
|
||||
{
|
||||
char *q, *i, *d, *c, *line = NULL;
|
||||
int _q, _i, _d, _c;
|
||||
int x, size = 0;
|
||||
ssize_t read;
|
||||
|
||||
read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp);
|
||||
if (read == -1)
|
||||
return -1;
|
||||
|
||||
|
||||
tc->hap = (char *) malloc(size);
|
||||
tc->rs = (char *) malloc(size);
|
||||
q = (char *) malloc(size);
|
||||
i = (char *) malloc(size);
|
||||
d = (char *) malloc(size);
|
||||
c = (char *) malloc(size);
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
|
||||
return -1;
|
||||
|
||||
tc->haplen = strlen(tc->hap);
|
||||
tc->rslen = strlen(tc->rs);
|
||||
assert(tc->rslen < MROWS);
|
||||
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
||||
//tc->q = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->i = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->d = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->c = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
|
||||
for (x = 0; x < tc->rslen; x++)
|
||||
{
|
||||
_q = normalize(q[x]);
|
||||
_i = normalize(i[x]);
|
||||
_d = normalize(d[x]);
|
||||
_c = normalize(c[x]);
|
||||
tc->q[x] = (_q < 6) ? 6 : _q;
|
||||
tc->i[x] = _i;
|
||||
tc->d[x] = _d;
|
||||
tc->c[x] = _c;
|
||||
tc->irs[x] = tc->rs[x];
|
||||
}
|
||||
for (x = 0; x < tc->haplen; x++)
|
||||
tc->ihap[x] = tc->hap[x];
|
||||
|
||||
|
||||
free(q);
|
||||
free(i);
|
||||
free(d);
|
||||
free(c);
|
||||
free(line);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define ALIGN __attribute__ ((aligned(32)))
|
||||
|
||||
|
||||
typedef union ALIGN {
|
||||
//__m256i vi;
|
||||
__m128 vf ;
|
||||
__m128i vi ;
|
||||
__m128d vd;
|
||||
|
||||
uint8_t b[16];
|
||||
//uint16_t hw[8] ;
|
||||
uint32_t w[4] ;
|
||||
uint64_t dw[2] ;
|
||||
|
||||
float f[4] ;
|
||||
//double d[2] ;
|
||||
|
||||
} v128 ;
|
||||
|
||||
typedef union ALIGN {
|
||||
__m256i vi;
|
||||
__m256 vf ;
|
||||
__m256d vd;
|
||||
//__m128i vi128[2] ;
|
||||
|
||||
uint8_t b[32];
|
||||
//uint16_t hw[16] ;
|
||||
uint32_t w[8] ;
|
||||
uint64_t dw[4] ;
|
||||
|
||||
float f[8] ;
|
||||
double d[4] ;
|
||||
|
||||
} v256 ;
|
||||
|
||||
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
#define VEC_ENTRY_CNT 8
|
||||
#define VEC_LEN 256
|
||||
#define VTYPE vf
|
||||
#define VTYPEI vi
|
||||
#define VENTRY f
|
||||
#define VENTRYI w
|
||||
#define MTYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
|
||||
|
||||
#define VECTOR v256
|
||||
#define VECTOR_SSE v128
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_ps()
|
||||
|
||||
#define SET_VEC_ONES(__vec) \
|
||||
__vec = _mm256_set1_epi32(0xFFFFFFFF)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castps128_ps256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VECS_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi32(__vs, 1)
|
||||
|
||||
|
||||
#define VECS_SHIFT_RIGHT_WHOLE(__vs, __cnt) { \
|
||||
uint64_t __mask = ; \
|
||||
uint64_t __shiftWord = ((((uint64_t) 0x1) << __cnt) - 1 ) & __vs.dw[1] ; \
|
||||
__shiftWord <<= (64-cnt) ; \
|
||||
__vs = _mm_slri_epi64(__vs, __cnt) ; \
|
||||
__vs.dw[0] |= __shiftWord ; \
|
||||
}
|
||||
|
||||
|
||||
#define GET_MASK_WORD_OLD(__mask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MTYPE __nextShiftOut = (__mask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__mask >>= __shiftBy ; \
|
||||
__mask |= __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MTYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
void precompute_masks_avx(const testcase& tc, int COLS, int numMaskVecs,
|
||||
MTYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
|
||||
const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ;
|
||||
|
||||
for (int vi=0; vi < numMaskVecs; ++vi) {
|
||||
for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) {
|
||||
maskArr[vi][rs] = 0 ;
|
||||
}
|
||||
maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ;
|
||||
}
|
||||
|
||||
for (int col=1; col < COLS; ++col) {
|
||||
int mIndex = (col-1) / maskBitCnt ;
|
||||
int mOffset = (col-1) % maskBitCnt ;
|
||||
MTYPE bitMask = ((MTYPE)0x1) << (maskBitCnt-1-mOffset) ;
|
||||
|
||||
char hapChar = tc.hap[col-1] ;
|
||||
|
||||
if (hapChar == AMBIG_CHAR) {
|
||||
maskArr[mIndex][0] |= bitMask ;
|
||||
maskArr[mIndex][1] |= bitMask ;
|
||||
maskArr[mIndex][2] |= bitMask ;
|
||||
maskArr[mIndex][3] |= bitMask ;
|
||||
}
|
||||
|
||||
//cout << hapChar << " " << mIndex << " " << getBinaryStr<MTYPE>(bitMask, 32)
|
||||
// << endl ;
|
||||
//cout << getBinaryStr<MTYPE>(maskArr[0][hapChar],32) << endl ;
|
||||
//exit(0) ;
|
||||
|
||||
maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ;
|
||||
|
||||
|
||||
// bit corresponding to col 1 will be the MSB of the mask 0
|
||||
// bit corresponding to col 2 will be the MSB-1 of the mask 0
|
||||
// ...
|
||||
// bit corresponding to col 32 will be the LSB of the mask 0
|
||||
// bit corresponding to col 33 will be the MSB of the mask 1
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) {
|
||||
|
||||
int ROWS = tc.rslen + 1 ;
|
||||
int COLS = tc.haplen + 1 ;
|
||||
|
||||
// only for testing
|
||||
VECTOR mismatchData, matchData ;
|
||||
//SET_VEC_ZERO(mismatchData.VTYPE) ;
|
||||
//SET_VEC_ONES(matchData.VTYPEI) ;
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT; ++ei) {
|
||||
matchData.VENTRY[ei] = 1.0 ;
|
||||
mismatchData.VENTRY[ei] = 0.0 ;
|
||||
}
|
||||
|
||||
const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ;
|
||||
const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function
|
||||
|
||||
MTYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ;
|
||||
precompute_masks_avx(tc, COLS, numMaskVecs, maskArr) ;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug) {
|
||||
cout << "The first 32 hap chars are: " ;
|
||||
for (int i=0; i < 32; ++i) {
|
||||
cout << tc.hap[i] ;
|
||||
}
|
||||
cout << endl ;
|
||||
|
||||
cout << "Masks computed for A, C, T, G, N are: " << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][0], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][1], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][2], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][3], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][4], 32) << endl ;
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
int beginRowIndex = 1 ;
|
||||
while (beginRowIndex < ROWS) {
|
||||
|
||||
int numRowsToProcess = min(VEC_ENTRY_CNT, ROWS - beginRowIndex) ;
|
||||
|
||||
char rsArr[VEC_ENTRY_CNT] ;
|
||||
for (int ri=0; ri < numRowsToProcess; ++ri) {
|
||||
rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ;
|
||||
}
|
||||
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
VECTOR_SSE currMaskVecLow ; // corresponding to entries 0-3
|
||||
VECTOR_SSE currMaskVecHigh ; // corresponding to entries 4-7
|
||||
|
||||
MTYPE lastMaskShiftOut[VEC_ENTRY_CNT] ;
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT; ++ei)
|
||||
lastMaskShiftOut[ei] = 0 ;
|
||||
|
||||
int col = 1 ;
|
||||
int diag = 1 ;
|
||||
for (int maskIndex=0; maskIndex < numMaskVecs; ++maskIndex) {
|
||||
// set up the mask vectors for the next maskBitCnt columns
|
||||
|
||||
// For AVX, maskBitCnt = 32 (so, the operation below is amortized over 32 cols)
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT/2; ++ei) {
|
||||
SET_MASK_WORD(currMaskVecLow.VENTRYI[ei], maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + VEC_ENTRY_CNT/2 ; // the second entry index
|
||||
SET_MASK_WORD(currMaskVecHigh.VENTRYI[ei], maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug && maskIndex == 0) {
|
||||
cout << "The masks for entry 1: " << endl
|
||||
<< getBinaryStr<MTYPE>(maskArr[0][rsArr[1]], 32) << endl
|
||||
<< getBinaryStr<MTYPE>(currMaskVecLow.VENTRYI[1], 32) << endl ;
|
||||
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
// iterate over mask bit indices and columns
|
||||
for (int mbi=0; mbi < maskBitCnt && diag < COLS + ROWS -2; ++mbi, ++diag) {
|
||||
|
||||
VECTOR maskV ;
|
||||
VEC_SSE_TO_AVX(currMaskVecLow.VTYPE, currMaskVecHigh.VTYPE, maskV.VTYPE) ;
|
||||
|
||||
VECTOR testData ;
|
||||
testData.VTYPE = VEC_BLENDV(mismatchData.VTYPE, matchData.VTYPE,
|
||||
maskV.VTYPE) ;
|
||||
|
||||
VECS_SHIFT_LEFT_1BIT(currMaskVecLow.VTYPEI) ;
|
||||
VECS_SHIFT_LEFT_1BIT(currMaskVecHigh.VTYPEI) ;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug && maskIndex == 0) {
|
||||
cout << "The mask for entry 1, mbi=" << mbi << ": "
|
||||
<< getBinaryStr<MTYPE>(maskV.VENTRYI[1], 32) << endl ;
|
||||
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
#ifdef CHECK_MASK_CORRECTNESS
|
||||
|
||||
int firstRowIndex = (diag < COLS) ? 0 : (diag - COLS) ;
|
||||
int lastRowIndex = min(col-1, numRowsToProcess-1) ;
|
||||
|
||||
for (int ri=firstRowIndex; ri <= lastRowIndex; ++ri) {
|
||||
int currRow = beginRowIndex + ri ;
|
||||
int currCol = col - ri + firstRowIndex ;
|
||||
|
||||
char hapChar = tc.hap[currCol-1] ;
|
||||
char rsChar = tc.rs[currRow-1] ;
|
||||
|
||||
bool match = (hapChar == rsChar || hapChar == 'N' || rsChar == 'N') ;
|
||||
|
||||
if ((bool) testData.VENTRYI[ri] != match) {
|
||||
cout << "Error: Incorrect mask for tc " << tcID << ", diag = " << diag
|
||||
<< " (" << currRow << ", " << currCol << ")" << endl ;
|
||||
|
||||
cout << "The chars are: " << hapChar << " and " << rsChar << endl ;
|
||||
cout << "The selected value is: " << testData.VENTRYI[ri] << endl ;
|
||||
|
||||
exit(0) ;
|
||||
}
|
||||
}
|
||||
#endif // #ifdef CHECK_MASK_CORRECTNESS
|
||||
|
||||
if (diag < COLS)
|
||||
++col ;
|
||||
|
||||
} // mbi
|
||||
} // maskIndex
|
||||
|
||||
beginRowIndex += VEC_ENTRY_CNT ;
|
||||
} // end of stripe
|
||||
|
||||
//cout << "Finished validating entry " << endl ;
|
||||
}
|
||||
|
||||
#ifdef HMM_MASK_MAIN
|
||||
int main () {
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
|
||||
ConvertChar::init() ;
|
||||
|
||||
testcase* tcBatch = new testcase[BATCH_SIZE] ;
|
||||
|
||||
int numBatches = 0 ;
|
||||
int numRead = 0 ;
|
||||
|
||||
const int DEBUG_TC = -1 ;
|
||||
|
||||
FILE* ifp = stdin ;
|
||||
|
||||
do {
|
||||
numRead = 0 ;
|
||||
|
||||
while (numRead < BATCH_SIZE) {
|
||||
if (read_testcase(tcBatch+numRead, ifp) < 0)
|
||||
break ;
|
||||
|
||||
++numRead ;
|
||||
}
|
||||
|
||||
if (numRead == 0)
|
||||
break ;
|
||||
|
||||
for (int ti=0; ti < numRead; ++ti) {
|
||||
|
||||
int tcID = numBatches * BATCH_SIZE + ti ;
|
||||
test_mask_computations(tcBatch[ti], tcID, tcID == DEBUG_TC) ;
|
||||
}
|
||||
|
||||
++numBatches ;
|
||||
} while (numRead == BATCH_SIZE) ;
|
||||
|
||||
fclose(ifp) ;
|
||||
|
||||
delete[] tcBatch ;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
#endif
|
||||
|
|
@ -0,0 +1,349 @@
|
|||
#include <jni.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h"
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
using namespace std;
|
||||
//#define DEBUG 1
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
public:
|
||||
LoadTimeInitializer() //will be called when library is loaded
|
||||
{
|
||||
ConvertChar::init();
|
||||
}
|
||||
jfieldID m_readBasesFID;
|
||||
jfieldID m_readQualsFID;
|
||||
jfieldID m_insertionGOPFID;
|
||||
jfieldID m_deletionGOPFID;
|
||||
jfieldID m_overallGCPFID;
|
||||
jfieldID m_haplotypeBasesFID;
|
||||
};
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
void debug_dump(string filename, string s, bool to_append, bool add_newline)
|
||||
{
|
||||
ofstream fptr;
|
||||
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
fptr << s;
|
||||
if(add_newline)
|
||||
fptr << "\n";
|
||||
fptr.close();
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities
|
||||
(JNIEnv* env, jclass thisObject,
|
||||
jobjectArray transition, jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP
|
||||
)
|
||||
{}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitialize
|
||||
(JNIEnv* env, jobject thisObject,
|
||||
jint readMaxLength, jint haplotypeMaxLength)
|
||||
{}
|
||||
|
||||
JNIEXPORT jdouble JNICALL
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePriorsAndUpdateCells(
|
||||
JNIEnv* env, jobject thisObject,
|
||||
jboolean doInitialization, jint paddedReadLength, jint paddedHaplotypeLength,
|
||||
jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals,
|
||||
jint hapStartIndex
|
||||
)
|
||||
|
||||
{ return 0.0; }
|
||||
|
||||
#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1
|
||||
|
||||
#ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY
|
||||
//Gets direct access to Java arrays
|
||||
#define GET_BYTE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
|
||||
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
|
||||
#define JNI_RELEASE_MODE JNI_ABORT
|
||||
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
|
||||
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
|
||||
|
||||
#else
|
||||
//Likely makes copy of Java arrays to JNI C++ space
|
||||
#define GET_BYTE_ARRAY_ELEMENTS env->GetByteArrayElements
|
||||
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleaseByteArrayElements
|
||||
#define JNI_RELEASE_MODE JNI_ABORT
|
||||
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetDoubleArrayElements
|
||||
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleaseDoubleArrayElements
|
||||
|
||||
#endif //ifdef DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY
|
||||
|
||||
JNIEXPORT jdouble JNICALL
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10(
|
||||
JNIEnv* env, jobject thisObject,
|
||||
jint readLength, jint haplotypeLength,
|
||||
jbyteArray readBases, jbyteArray haplotypeBases, jbyteArray readQuals,
|
||||
jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP,
|
||||
jint hapStartIndex
|
||||
)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy);
|
||||
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy);
|
||||
jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy);
|
||||
jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy);
|
||||
jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy);
|
||||
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
|
||||
#ifdef DEBUG
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
|
||||
assert(readQualsArray && "readQualsArray not initialized in JNI");
|
||||
assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
|
||||
assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
|
||||
assert(overallGCPArray && "OverallGCP array not initialized in JNI");
|
||||
assert(readLength < MROWS);
|
||||
#endif
|
||||
testcase tc;
|
||||
tc.rslen = readLength;
|
||||
tc.haplen = haplotypeLength;
|
||||
tc.rs = (char*)readBasesArray;
|
||||
tc.hap = (char*)haplotypeBasesArray;
|
||||
for(unsigned i=0;i<readLength;++i)
|
||||
{
|
||||
tc.q[i] = (int)readQualsArray[i];
|
||||
tc.i[i] = (int)insertionGOPArray[i];
|
||||
tc.d[i] = (int)deletionGOPArray[i];
|
||||
tc.c[i] = (int)overallGCPArray[i];
|
||||
}
|
||||
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
#ifdef DEBUG
|
||||
debug_dump("return_values_jni.txt",to_string(result),true);
|
||||
#endif
|
||||
|
||||
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RELEASE_MODE);
|
||||
|
||||
return 0.0;
|
||||
}
|
||||
|
||||
//Should be called only once for the whole Java process - initializes field ids for the classes JNIReadDataHolderClass
|
||||
//and JNIHaplotypeDataHolderClass
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit
|
||||
(JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass)
|
||||
{
|
||||
assert(readDataHolderClass);
|
||||
assert(haplotypeDataHolderClass);
|
||||
jfieldID fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "readBases", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for readBases");
|
||||
g_load_time_initializer.m_readBasesFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "readQuals", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for readQuals");
|
||||
g_load_time_initializer.m_readQualsFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "insertionGOP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for insertionGOP");
|
||||
g_load_time_initializer.m_insertionGOPFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "deletionGOP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for deletionGOP");
|
||||
g_load_time_initializer.m_deletionGOPFID = fid;
|
||||
fid = env->GetFieldID(readDataHolderClass, "overallGCP", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for overallGCP");
|
||||
g_load_time_initializer.m_overallGCPFID = fid;
|
||||
|
||||
fid = env->GetFieldID(haplotypeDataHolderClass, "haplotypeBases", "[B");
|
||||
assert(fid && "JNI pairHMM: Could not get FID for haplotypeBases");
|
||||
g_load_time_initializer.m_haplotypeBasesFID = fid;
|
||||
}
|
||||
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
|
||||
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
|
||||
{
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
//To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector
|
||||
vector<pair<jbyteArray, jbyte*> > haplotypeBasesArrayVector;
|
||||
haplotypeBasesArrayVector.clear();
|
||||
haplotypeBasesArrayVector.resize(numHaplotypes);
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
|
||||
#endif
|
||||
for(unsigned j=0;j<numHaplotypes;++j)
|
||||
{
|
||||
jobject haplotypeObject = env->GetObjectArrayElement(haplotypeDataArray, j);
|
||||
jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID);
|
||||
#ifdef DEBUG
|
||||
assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str());
|
||||
#endif
|
||||
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy);
|
||||
#ifdef DEBUG
|
||||
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
|
||||
assert(env->GetArrayLength(haplotypeBases) < MCOLS);
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI haplotype length "<<env->GetArrayLength(haplotypeBases)<<"\n";
|
||||
#endif
|
||||
#endif
|
||||
haplotypeBasesArrayVector[j] = make_pair(haplotypeBases, haplotypeBasesArray);
|
||||
#ifdef DEBUG3
|
||||
for(unsigned k=0;k<env->GetArrayLength(haplotypeBases);++k)
|
||||
debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned numTestCases = numReads*numHaplotypes;
|
||||
vector<testcase> tc_array;
|
||||
tc_array.clear();
|
||||
tc_array.resize(numTestCases);
|
||||
unsigned tc_idx = 0;
|
||||
//Store arrays for release later
|
||||
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
|
||||
readBasesArrayVector.clear();
|
||||
readBasesArrayVector.resize(numReads);
|
||||
for(unsigned i=0;i<numReads;++i)
|
||||
{
|
||||
//Get bytearray fields from read
|
||||
jobject readObject = env->GetObjectArrayElement(readDataArray, i);
|
||||
jbyteArray readBases = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readBasesFID);
|
||||
jbyteArray insertionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_insertionGOPFID);
|
||||
jbyteArray deletionGOP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_deletionGOPFID);
|
||||
jbyteArray overallGCP = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_overallGCPFID);
|
||||
jbyteArray readQuals = (jbyteArray)env->GetObjectField(readObject, g_load_time_initializer.m_readQualsFID);
|
||||
|
||||
#ifdef DEBUG
|
||||
assert(readBases && ("readBases is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(insertionGOP && ("insertionGOP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(deletionGOP && ("deletionGOP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(overallGCP && ("overallGCP is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
assert(readQuals && ("readQuals is NULL at index : "+to_string(i)+"\n").c_str());
|
||||
#endif
|
||||
jsize readLength = env->GetArrayLength(readBases);
|
||||
|
||||
jbyte* readBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readBases, &is_copy); //order of GET-RELEASE is important
|
||||
jbyte* readQualsArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(readQuals, &is_copy);
|
||||
jbyte* insertionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(insertionGOP, &is_copy);
|
||||
jbyte* deletionGOPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(deletionGOP, &is_copy);
|
||||
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
|
||||
#ifdef DEBUG
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
assert(readQualsArray && "readQualsArray not initialized in JNI");
|
||||
assert(insertionGOPArray && "insertionGOP array not initialized in JNI");
|
||||
assert(deletionGOPArray && "deletionGOP array not initialized in JNI");
|
||||
assert(overallGCPArray && "overallGCP array not initialized in JNI");
|
||||
assert(readLength < MROWS);
|
||||
assert(readLength == env->GetArrayLength(readQuals));
|
||||
assert(readLength == env->GetArrayLength(insertionGOP));
|
||||
assert(readLength == env->GetArrayLength(deletionGOP));
|
||||
assert(readLength == env->GetArrayLength(overallGCP));
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI read length "<<readLength<<"\n";
|
||||
#endif
|
||||
#endif
|
||||
#ifdef DEBUG3
|
||||
for(unsigned j=0;j<readLength;++j)
|
||||
{
|
||||
debug_dump("reads_jni.txt",to_string((int)readBasesArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)readQualsArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)insertionGOPArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)deletionGOPArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(unsigned j=0;j<numHaplotypes;++j)
|
||||
{
|
||||
jsize haplotypeLength = env->GetArrayLength(haplotypeBasesArrayVector[j].first);
|
||||
jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second;
|
||||
tc_array[tc_idx].rslen = (int)readLength;
|
||||
tc_array[tc_idx].haplen = (int)haplotypeLength;
|
||||
tc_array[tc_idx].rs = (char*)readBasesArray;
|
||||
tc_array[tc_idx].hap = (char*)haplotypeBasesArray;
|
||||
//Can be avoided
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
{
|
||||
tc_array[tc_idx].q[k] = (int)readQualsArray[k];
|
||||
tc_array[tc_idx].i[k] = (int)insertionGOPArray[k];
|
||||
tc_array[tc_idx].d[k] = (int)deletionGOPArray[k];
|
||||
tc_array[tc_idx].c[k] = (int)overallGCPArray[k];
|
||||
}
|
||||
|
||||
++tc_idx;
|
||||
}
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RELEASE_MODE); //order of GET-RELEASE is important
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RELEASE_MODE);
|
||||
|
||||
//Release readBases at end because it is used by compute_full_prob
|
||||
readBasesArrayVector[i].clear();
|
||||
readBasesArrayVector[i].resize(1);
|
||||
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
|
||||
}
|
||||
|
||||
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
|
||||
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
||||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&(tc_array[tc_idx]));
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
|
||||
}
|
||||
#endif
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0);
|
||||
|
||||
//Release read arrays first
|
||||
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
|
||||
{
|
||||
for(int j=readBasesArrayVector.size()-1;j>=0;--j)
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RELEASE_MODE);
|
||||
readBasesArrayVector[i].clear();
|
||||
}
|
||||
readBasesArrayVector.clear();
|
||||
|
||||
//Now release haplotype arrays
|
||||
for(int j=numHaplotypes-1;j>=0;--j) //note the order - reverse of GET
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RELEASE_MODE);
|
||||
haplotypeBasesArrayVector.clear();
|
||||
tc_array.clear();
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,76 @@
|
|||
/* DO NOT EDIT THIS FILE - it is machine generated */
|
||||
#include <jni.h>
|
||||
/* Header for class org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM */
|
||||
|
||||
#ifndef _Included_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
#define _Included_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_TRISTATE_CORRECTION
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_TRISTATE_CORRECTION 3.0
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToMatch 0L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_indelToMatch
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_indelToMatch 1L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToInsertion 2L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_insertionToInsertion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_insertionToInsertion 3L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToDeletion 4L
|
||||
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion
|
||||
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion 5L
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniInitialize
|
||||
* Signature: (II)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitialize
|
||||
(JNIEnv *, jobject, jint, jint);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniInitializeProbabilities
|
||||
* Signature: ([[D[B[B[B)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities
|
||||
(JNIEnv *, jclass, jobjectArray, jbyteArray, jbyteArray, jbyteArray);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniInitializePriorsAndUpdateCells
|
||||
* Signature: (ZII[B[B[BI)D
|
||||
*/
|
||||
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePriorsAndUpdateCells
|
||||
(JNIEnv *, jobject, jboolean, jint, jint, jbyteArray, jbyteArray, jbyteArray, jint);
|
||||
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniSubComputeReadLikelihoodGivenHaplotypeLog10
|
||||
* Signature: (II[B[B[B[B[B[BI)D
|
||||
*/
|
||||
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10
|
||||
(JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniGlobalInit
|
||||
* Signature: (Ljava/lang/Class;Ljava/lang/Class;)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit
|
||||
(JNIEnv *, jobject, jclass, jclass);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniComputeLikelihoods
|
||||
* Signature: ([Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;[D)V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
|
@ -0,0 +1,238 @@
|
|||
//#define DEBUG 1
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <omp.h>
|
||||
#include <emmintrin.h>
|
||||
#include "template.h"
|
||||
|
||||
//#include "define-float.h"
|
||||
//#include "shift_template.c"
|
||||
//#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
|
||||
using namespace std;
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
public:
|
||||
LoadTimeInitializer() //will be called when library is loaded
|
||||
{
|
||||
ConvertChar::init();
|
||||
}
|
||||
};
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
|
||||
NUMBER M[MROWS][MCOLS];
|
||||
NUMBER X[MROWS][MCOLS];
|
||||
NUMBER Y[MROWS][MCOLS];
|
||||
NUMBER p[MROWS][6];
|
||||
|
||||
p[0][MM] = ctx._(0.0);
|
||||
p[0][GapM] = ctx._(0.0);
|
||||
p[0][MX] = ctx._(0.0);
|
||||
p[0][XX] = ctx._(0.0);
|
||||
p[0][MY] = ctx._(0.0);
|
||||
p[0][YY] = ctx._(0.0);
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
p[r][MX] = ctx.ph2pr[_i];
|
||||
p[r][XX] = ctx.ph2pr[_c];
|
||||
p[r][MY] = ctx.ph2pr[_d];
|
||||
p[r][YY] = ctx.ph2pr[_c];
|
||||
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
M[0][c] = ctx._(0.0);
|
||||
X[0][c] = ctx._(0.0);
|
||||
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
M[r][0] = ctx._(0.0);
|
||||
X[r][0] = X[r-1][0] * p[r][XX];
|
||||
Y[r][0] = ctx._(0.0);
|
||||
}
|
||||
|
||||
NUMBER result = ctx._(0.0);
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
for (c = 1; c < COLS; c++)
|
||||
{
|
||||
char _rs = tc->rs[r-1];
|
||||
char _hap = tc->hap[c-1];
|
||||
int _q = tc->q[r-1] & 127;
|
||||
NUMBER distm = ctx.ph2pr[_q];
|
||||
if (_rs == _hap || _rs == 'N' || _hap == 'N')
|
||||
distm = ctx._(1.0) - distm;
|
||||
else
|
||||
distm = distm/3;
|
||||
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
|
||||
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
|
||||
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
result += M[ROWS-1][c] + X[ROWS-1][c];
|
||||
}
|
||||
|
||||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
}
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
||||
int main(int argc, char** argv)
|
||||
{
|
||||
if(argc < 2)
|
||||
{
|
||||
cerr << "Needs path to input file as argument\n";
|
||||
exit(0);
|
||||
}
|
||||
bool use_old_read_testcase = false;
|
||||
if(argc >= 3 && string(argv[2]) == "1")
|
||||
use_old_read_testcase = true;
|
||||
|
||||
testcase tc;
|
||||
if(use_old_read_testcase)
|
||||
{
|
||||
FILE* fptr = fopen(argv[1],"r");
|
||||
while(!feof(fptr))
|
||||
{
|
||||
if(read_testcase(&tc, fptr) >= 0)
|
||||
{
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
|
||||
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
|
||||
delete tc.rs;
|
||||
delete tc.hap;
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ifstream ifptr;
|
||||
std::vector<std::string> tokens;
|
||||
ifptr.open(argv[1]);
|
||||
assert(ifptr.is_open());
|
||||
while(1)
|
||||
{
|
||||
tokens.clear();
|
||||
if(read_mod_testcase(ifptr, &tc, false) < 0)
|
||||
break;
|
||||
//double result = 0;
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
|
||||
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
|
||||
delete tc.rs;
|
||||
delete tc.hap;
|
||||
}
|
||||
ifptr.close();
|
||||
}
|
||||
return 0;
|
||||
|
||||
#if 0
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
struct timeval start, end;
|
||||
long long aggregateTimeRead = 0L;
|
||||
long long aggregateTimeCompute = 0L;
|
||||
long long aggregateTimeWrite = 0L;
|
||||
|
||||
bool noMoreData = false;
|
||||
int count =0;
|
||||
while (!noMoreData)
|
||||
{
|
||||
int read_count = BATCH_SIZE;
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<BATCH_SIZE;b++)
|
||||
if (read_testcase(&tc[b], NULL)==-1)
|
||||
{
|
||||
read_count = b;
|
||||
noMoreData = true;
|
||||
break;
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeRead += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<read_count;b++)
|
||||
{
|
||||
result_avxf = compute_full_prob<float>(&tc[b]);
|
||||
|
||||
#ifdef RUN_HYBRID
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
count++;
|
||||
result_avxd = compute_full_prob<double>(&tc[b]);
|
||||
result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f));
|
||||
}
|
||||
else
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
#ifndef RUN_HYBRID
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeCompute += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<read_count;b++)
|
||||
printf("%E\n", result[b]);
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeWrite += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
}
|
||||
|
||||
printf("AVX Read Time: %ld\n", aggregateTimeRead);
|
||||
printf("AVX Compute Time: %ld\n", aggregateTimeCompute);
|
||||
printf("AVX Write Time: %ld\n", aggregateTimeWrite);
|
||||
printf("AVX Total Time: %ld\n", aggregateTimeRead + aggregateTimeCompute + aggregateTimeWrite);
|
||||
printf("# Double called: %d\n", count);
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,417 @@
|
|||
#ifdef PRECISION
|
||||
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/*
|
||||
template <class T>
|
||||
string getBinaryStr (T val, int numBitsToWrite) {
|
||||
|
||||
ostringstream oss ;
|
||||
uint64_t mask = ((T) 0x1) << (numBitsToWrite-1) ;
|
||||
for (int i=numBitsToWrite-1; i >= 0; --i) {
|
||||
oss << ((val & mask) >> i) ;
|
||||
mask >>= 1 ;
|
||||
}
|
||||
return oss.str() ;
|
||||
}
|
||||
*/
|
||||
|
||||
void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
|
||||
for (int vi=0; vi < numMaskVecs; ++vi) {
|
||||
for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) {
|
||||
maskArr[vi][rs] = 0 ;
|
||||
}
|
||||
maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ;
|
||||
}
|
||||
|
||||
for (int col=1; col < COLS; ++col) {
|
||||
int mIndex = (col-1) / maskBitCnt ;
|
||||
int mOffset = (col-1) % maskBitCnt ;
|
||||
MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt-1-mOffset) ;
|
||||
|
||||
char hapChar = tc.hap[col-1] ;
|
||||
|
||||
if (hapChar == AMBIG_CHAR) {
|
||||
for (int ci=0; ci < NUM_DISTINCT_CHARS; ++ci)
|
||||
maskArr[mIndex][ci] |= bitMask ;
|
||||
}
|
||||
|
||||
maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ;
|
||||
// bit corresponding to col 1 will be the MSB of the mask 0
|
||||
// bit corresponding to col 2 will be the MSB-1 of the mask 0
|
||||
// ...
|
||||
// bit corresponding to col 32 will be the LSB of the mask 0
|
||||
// bit corresponding to col 33 will be the MSB of the mask 1
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) {
|
||||
|
||||
for (int ri=0; ri < numRowsToProcess; ++ri) {
|
||||
rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ;
|
||||
}
|
||||
|
||||
for (int ei=0; ei < AVX_LENGTH; ++ei) {
|
||||
lastMaskShiftOut[ei] = 0 ;
|
||||
}
|
||||
}
|
||||
|
||||
#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MASK_TYPE __bitMask = (((MASK_TYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MASK_TYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt) {
|
||||
|
||||
for (int ei=0; ei < AVX_LENGTH/2; ++ei) {
|
||||
SET_MASK_WORD(currMaskVecLow.masks[ei], maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + AVX_LENGTH/2 ; // the second entry index
|
||||
SET_MASK_WORD(currMaskVecHigh.masks[ei], maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
//void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen, const _256_TYPE& distmSel, int firstRowIndex, int lastRowIndex) {
|
||||
|
||||
inline void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen) {
|
||||
//#define computeDistVec() {
|
||||
|
||||
_256_TYPE maskV ;
|
||||
VEC_SSE_TO_AVX(currMaskVecLow.vecf, currMaskVecHigh.vecf, maskV) ;
|
||||
|
||||
distmChosen = VEC_BLENDV(distm, _1_distm, maskV) ;
|
||||
|
||||
/*COMPARE_VECS(distmChosen, distmSel, firstRowIndex, lastRowIndex) ;*/
|
||||
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecLow.vec) ;
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecHigh.vec) ;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
template<class NUMBER>
|
||||
struct HmmData {
|
||||
int ROWS ;
|
||||
int COLS ;
|
||||
|
||||
NUMBER shiftOutM[MROWS+MCOLS+AVX_LENGTH], shiftOutX[MROWS+MCOLS+AVX_LENGTH], shiftOutY[MROWS+MCOLS+AVX_LENGTH] ;
|
||||
Context<NUMBER> ctx ;
|
||||
testcase* tc ;
|
||||
_256_TYPE p_MM[MAVX_COUNT], p_GAPM[MAVX_COUNT], p_MX[MAVX_COUNT], p_XX[MAVX_COUNT], p_MY[MAVX_COUNT], p_YY[MAVX_COUNT], distm1D[MAVX_COUNT] ;
|
||||
_256_TYPE pGAPM, pMM, pMX, pXX, pMY, pYY ;
|
||||
|
||||
UNION_TYPE M_t, M_t_1, M_t_2, X_t, X_t_1, X_t_2, Y_t, Y_t_1, Y_t_2, M_t_y, M_t_1_y ;
|
||||
UNION_TYPE rs , rsN ;
|
||||
_256_TYPE distmSel;
|
||||
_256_TYPE distm, _1_distm;
|
||||
|
||||
} ;
|
||||
*/
|
||||
|
||||
template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D)
|
||||
{
|
||||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
for (int s=0;s<ROWS+COLS+AVX_LENGTH;s++)
|
||||
{
|
||||
shiftOutM[s] = zero;
|
||||
shiftOutX[s] = zero;
|
||||
shiftOutY[s] = init_Y;
|
||||
}
|
||||
|
||||
NUMBER *ptr_p_MM = (NUMBER *)p_MM;
|
||||
NUMBER *ptr_p_XX = (NUMBER *)p_XX;
|
||||
NUMBER *ptr_p_YY = (NUMBER *)p_YY;
|
||||
NUMBER *ptr_p_MX = (NUMBER *)p_MX;
|
||||
NUMBER *ptr_p_MY = (NUMBER *)p_MY;
|
||||
NUMBER *ptr_p_GAPM = (NUMBER *)p_GAPM;
|
||||
|
||||
*ptr_p_MM = ctx._(0.0);
|
||||
*ptr_p_XX = ctx._(0.0);
|
||||
*ptr_p_YY = ctx._(0.0);
|
||||
*ptr_p_MX = ctx._(0.0);
|
||||
*ptr_p_MY = ctx._(0.0);
|
||||
*ptr_p_GAPM = ctx._(0.0);
|
||||
|
||||
|
||||
for (int r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
|
||||
*(ptr_p_MM+r-1) = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
|
||||
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
|
||||
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
|
||||
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
|
||||
#ifdef DEBUG3
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1) ),true);
|
||||
#endif
|
||||
//*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
}
|
||||
|
||||
NUMBER *ptr_distm1D = (NUMBER *)distm1D;
|
||||
for (int r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _q = tc->q[r-1] & 127;
|
||||
ptr_distm1D[r-1] = ctx.ph2pr[_q];
|
||||
#ifdef DEBUG3
|
||||
debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM)
|
||||
{
|
||||
int i = stripIdx;
|
||||
pGAPM = p_GAPM[i];
|
||||
pMM = p_MM[i];
|
||||
pMX = p_MX[i];
|
||||
pXX = p_XX[i];
|
||||
pMY = p_MY[i];
|
||||
pYY = p_YY[i];
|
||||
|
||||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
|
||||
#define TRISTATE_CORRECTION_FACTOR 3.0
|
||||
UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR);
|
||||
|
||||
/* compare rs and N */
|
||||
//rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH));
|
||||
//rsN.d = VEC_CMP_EQ(N_packed256, rs);
|
||||
|
||||
distm = distm1D[i];
|
||||
_1_distm = VEC_SUB(packed1.d, distm);
|
||||
#ifndef DO_NOT_USE_TRISTATE_CORRECTION
|
||||
distm = VEC_DIV(distm, packed3.d);
|
||||
#endif
|
||||
|
||||
/* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */
|
||||
M_t_2.d = VEC_SET1_VAL(zero);
|
||||
X_t_2.d = VEC_SET1_VAL(zero);
|
||||
|
||||
if (i==0) {
|
||||
M_t_1.d = VEC_SET1_VAL(zero);
|
||||
X_t_1.d = VEC_SET1_VAL(zero);
|
||||
Y_t_2.d = VEC_SET_LSE(init_Y);
|
||||
Y_t_1.d = VEC_SET1_VAL(zero);
|
||||
}
|
||||
else {
|
||||
X_t_1.d = VEC_SET_LSE(shiftOutX[AVX_LENGTH]);
|
||||
M_t_1.d = VEC_SET_LSE(shiftOutM[AVX_LENGTH]);
|
||||
Y_t_2.d = VEC_SET1_VAL(zero);
|
||||
Y_t_1.d = VEC_SET1_VAL(zero);
|
||||
}
|
||||
M_t_1_y = M_t_1;
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline _256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm)
|
||||
{
|
||||
UNION_TYPE hapN, rshap;
|
||||
_256_TYPE cond;
|
||||
IF_32 shiftInHap;
|
||||
|
||||
int *hap_ptr = tc->ihap;
|
||||
|
||||
shiftInHap.i = (d<COLS) ? hap_ptr[d-1] : hap_ptr[COLS-1];
|
||||
|
||||
/* shift hap */
|
||||
SHIFT_HAP(hap, shiftInHap);
|
||||
_256_TYPE hapF = VEC_CVT_128_256(hap);
|
||||
|
||||
rshap.d = VEC_CMP_EQ(rs, hapF);
|
||||
hapN.d = VEC_CMP_EQ(N_packed256, hapF);
|
||||
|
||||
/* OR rsN, rshap, hapN */
|
||||
cond = VEC_OR(rsN.d, rshap.d);
|
||||
cond = VEC_OR(cond, hapN.d);
|
||||
|
||||
/* distm1D = (cond) ? 1-distm1D : distm1D; */
|
||||
_256_TYPE distmSel = VEC_BLENDV(distm, _1_distm, cond);
|
||||
|
||||
return distmSel;
|
||||
}
|
||||
|
||||
|
||||
inline void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel)
|
||||
{
|
||||
/* Compute M_t <= distm * (p_MM*M_t_2 + p_GAPM*X_t_2 + p_GAPM*Y_t_2) */
|
||||
M_t.d = VEC_MUL(VEC_ADD(VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(X_t_2.d, pGAPM)), VEC_MUL(Y_t_2.d, pGAPM)), distmSel);
|
||||
M_t_y = M_t;
|
||||
|
||||
/* Compute X_t */
|
||||
X_t.d = VEC_ADD(VEC_MUL(M_t_1.d, pMX) , VEC_MUL(X_t_1.d, pXX));
|
||||
|
||||
/* Compute Y_t */
|
||||
Y_t.d = VEC_ADD(VEC_MUL(M_t_1_y.d, pMY) , VEC_MUL(Y_t_1.d, pYY));
|
||||
}
|
||||
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
_256_TYPE p_MM [MAVX_COUNT], p_GAPM [MAVX_COUNT], p_MX [MAVX_COUNT];
|
||||
_256_TYPE p_XX [MAVX_COUNT], p_MY [MAVX_COUNT], p_YY [MAVX_COUNT];
|
||||
_256_TYPE distm1D[MAVX_COUNT];
|
||||
NUMBER shiftOutM[MROWS+MCOLS+AVX_LENGTH], shiftOutX[MROWS+MCOLS+AVX_LENGTH], shiftOutY[MROWS+MCOLS+AVX_LENGTH];
|
||||
UNION_TYPE M_t, M_t_1, M_t_2, X_t, X_t_1, X_t_2, Y_t, Y_t_1, Y_t_2, M_t_y, M_t_1_y;
|
||||
_256_TYPE pGAPM, pMM, pMX, pXX, pMY, pYY;
|
||||
|
||||
struct timeval start, end;
|
||||
NUMBER result_avx2;
|
||||
Context<NUMBER> ctx;
|
||||
UNION_TYPE rs , rsN;
|
||||
HAP_TYPE hap;
|
||||
_256_TYPE distmSel, distmChosen ;
|
||||
_256_TYPE distm, _1_distm;
|
||||
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
int AVX_COUNT = (ROWS+7)/8;
|
||||
NUMBER zero = ctx._(0.0);
|
||||
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
|
||||
_256_TYPE N_packed256 = VEC_POPCVT_CHAR('N');
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
int remainingRows = (ROWS-1) % AVX_LENGTH;
|
||||
int strip_cnt = ((ROWS-1) / AVX_LENGTH) + (remainingRows!=0);
|
||||
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function
|
||||
|
||||
MASK_TYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ;
|
||||
GEN_INTRINSIC(precompute_masks_, PRECISION)(*tc, COLS, numMaskVecs, maskArr) ;
|
||||
|
||||
char rsArr[AVX_LENGTH] ;
|
||||
MASK_TYPE lastMaskShiftOut[AVX_LENGTH] ;
|
||||
|
||||
GEN_INTRINSIC(initializeVectors, PRECISION)<NUMBER>(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY,
|
||||
ctx, tc, p_MM, p_GAPM, p_MX, p_XX, p_MY, p_YY, distm1D);
|
||||
|
||||
//for (int __ii=0; __ii < 10; ++__ii)
|
||||
for (int i=0;i<strip_cnt-1;i++)
|
||||
{
|
||||
//STRIP_INITIALIZATION
|
||||
GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
|
||||
GEN_INTRINSIC(init_masks_for_row_, PRECISION)(*tc, rsArr, lastMaskShiftOut,
|
||||
i*AVX_LENGTH+1, AVX_LENGTH) ;
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
MASK_VEC currMaskVecLow ; // corresponding to lower half
|
||||
MASK_VEC currMaskVecHigh ; // corresponding to upper half
|
||||
|
||||
for (int d=1;d<COLS+AVX_LENGTH;d++)
|
||||
{
|
||||
if (d % MAIN_TYPE_SIZE == 1)
|
||||
GEN_INTRINSIC(update_masks_for_cols_, PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
int ShiftIdx = d+AVX_LENGTH;
|
||||
//distmSel = GEN_INTRINSIC(computeDISTM, PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
|
||||
//int firstRowIndex = (d < COLS) ? 0 : (d-COLS+1) ;
|
||||
//int lastRowIndex = std::min(d-1, AVX_LENGTH-1) ;
|
||||
//GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen, distmSel, firstRowIndex, lastRowIndex) ;
|
||||
|
||||
GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
|
||||
|
||||
GEN_INTRINSIC(computeMXY, PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(M_t, shiftOutM[ShiftIdx], shiftOutM[d]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(X_t, shiftOutX[ShiftIdx], shiftOutX[d]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(Y_t_1, shiftOutY[ShiftIdx], shiftOutY[d]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
int i = strip_cnt-1;
|
||||
{
|
||||
//STRIP_INITIALIZATION
|
||||
GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
|
||||
if (remainingRows==0) remainingRows=AVX_LENGTH;
|
||||
|
||||
GEN_INTRINSIC(init_masks_for_row_, PRECISION)(*tc, rsArr, lastMaskShiftOut,
|
||||
i*AVX_LENGTH+1, remainingRows) ;
|
||||
|
||||
_256_TYPE sumM, sumX;
|
||||
sumM = VEC_SET1_VAL(zero);
|
||||
sumX = VEC_SET1_VAL(zero);
|
||||
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
MASK_VEC currMaskVecLow ; // corresponding to lower half
|
||||
MASK_VEC currMaskVecHigh ; // corresponding to upper half
|
||||
|
||||
for (int d=1;d<COLS+remainingRows-1;d++)
|
||||
{
|
||||
|
||||
if (d % MAIN_TYPE_SIZE == 1)
|
||||
GEN_INTRINSIC(update_masks_for_cols_, PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
int ShiftIdx = d+AVX_LENGTH;
|
||||
//distmSel = GEN_INTRINSIC(computeDISTM, PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
|
||||
GEN_INTRINSIC(computeMXY, PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
sumM = VEC_ADD(sumM, M_t.d);
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(M_t, shiftOutM[ShiftIdx]);
|
||||
|
||||
sumX = VEC_ADD(sumX, X_t.d);
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(X_t, shiftOutX[ShiftIdx]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(Y_t_1, shiftOutY[ShiftIdx]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
|
||||
}
|
||||
UNION_TYPE sumMX;
|
||||
sumMX.d = VEC_ADD(sumM, sumX);
|
||||
result_avx2 = sumMX.f[remainingRows-1];
|
||||
}
|
||||
//printf("result_avx2: %f\n", result_avx2);
|
||||
return result_avx2;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,116 @@
|
|||
#include <stdio.h>
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include "template.h"
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
//#define RUN_HYBRID
|
||||
|
||||
//uint8_t ConvertChar::conversionTable[255] ;
|
||||
int thread_level_parallelism_enabled = false ;
|
||||
|
||||
double getCurrClk() {
|
||||
struct timeval tv ;
|
||||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
testcase tc[BATCH_SIZE];
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
//struct timeval start, end;
|
||||
double lastClk = 0.0 ;
|
||||
double aggregateTimeRead = 0.0;
|
||||
double aggregateTimeCompute = 0.0;
|
||||
double aggregateTimeWrite = 0.0;
|
||||
|
||||
// Need to call it once to initialize the static array
|
||||
ConvertChar::init() ;
|
||||
|
||||
|
||||
char* ompEnvVar = getenv("OMP_NUM_THREADS") ;
|
||||
if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) {
|
||||
thread_level_parallelism_enabled = true ;
|
||||
}
|
||||
|
||||
bool noMoreData = false;
|
||||
int count =0;
|
||||
while (!noMoreData)
|
||||
{
|
||||
int read_count = BATCH_SIZE;
|
||||
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<BATCH_SIZE;b++)
|
||||
if (read_testcase(&tc[b], NULL)==-1)
|
||||
{
|
||||
read_count = b;
|
||||
noMoreData = true;
|
||||
break;
|
||||
}
|
||||
//gettimeofday(&end, NULL);
|
||||
aggregateTimeRead += (getCurrClk() - lastClk) ;
|
||||
//((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
//gettimeofday(&start, NULL);
|
||||
lastClk = getCurrClk() ;
|
||||
|
||||
#pragma omp parallel for schedule(dynamic) if(thread_level_parallelism_enabled)
|
||||
for (int b=0;b<read_count;b++)
|
||||
{
|
||||
result_avxf = GEN_INTRINSIC(compute_full_prob_avx, s)<float>(&tc[b]);
|
||||
|
||||
#ifdef RUN_HYBRID
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
count++;
|
||||
result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc[b]);
|
||||
result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f));
|
||||
}
|
||||
else
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
#ifndef RUN_HYBRID
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
}
|
||||
//gettimeofday(&end, NULL);
|
||||
aggregateTimeCompute += (getCurrClk() - lastClk) ;
|
||||
//((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
//gettimeofday(&start, NULL);
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<read_count;b++)
|
||||
printf("%E\n", result[b]);
|
||||
//gettimeofday(&end, NULL);
|
||||
aggregateTimeWrite += (getCurrClk() - lastClk) ;
|
||||
//((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
}
|
||||
|
||||
printf("AVX Read Time: %.2f\n", aggregateTimeRead);
|
||||
printf("AVX Compute Time: %.2f\n", aggregateTimeCompute);
|
||||
printf("AVX Write Time: %.2f\n", aggregateTimeWrite);
|
||||
printf("AVX Total Time: %.2f\n", aggregateTimeRead + aggregateTimeCompute + aggregateTimeWrite);
|
||||
printf("# Double called: %d\n", count);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,68 @@
|
|||
#ifdef PRECISION
|
||||
|
||||
#include "template.h"
|
||||
|
||||
|
||||
inline void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
{
|
||||
|
||||
IF_128 xlow , xhigh;
|
||||
/* cast x to xlow */
|
||||
xlow.f = VEC_CAST_256_128(x.d);
|
||||
/* extract x,1 to xhigh */
|
||||
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
||||
/* extract xlow[3] */
|
||||
IF_128 shiftOutL128;
|
||||
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
||||
/* extract xhigh[3] */
|
||||
IF_MAIN_TYPE shiftOutH;
|
||||
shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2);
|
||||
shiftOut = shiftOutH.f;
|
||||
/* shift xlow */
|
||||
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
||||
/* shift xhigh */
|
||||
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
||||
/*movss shiftIn to xlow[0] */
|
||||
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
||||
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
||||
/*movss xlow[3] to xhigh[0] */
|
||||
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
||||
/* cast xlow to x */
|
||||
x.d = VEC_CAST_128_256(xlow.f);
|
||||
/* insert xhigh to x,1 */
|
||||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
{
|
||||
|
||||
IF_128 xlow , xhigh;
|
||||
/* cast x to xlow */
|
||||
xlow.f = VEC_CAST_256_128(x.d);
|
||||
/* extract x,1 to xhigh */
|
||||
xhigh.f = VEC_EXTRACT_128(x.d, 1);
|
||||
/* extract xlow[3] */
|
||||
IF_128 shiftOutL128;
|
||||
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
||||
/* extract xhigh[3] */
|
||||
// IF_MAIN_TYPE shiftOutH;
|
||||
// shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2);
|
||||
// shiftOut = shiftOutH.f;
|
||||
/* shift xlow */
|
||||
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
||||
/* shift xhigh */
|
||||
xhigh.i = _mm_slli_si128 (xhigh.i, SHIFT_CONST3);
|
||||
/*movss shiftIn to xlow[0] */
|
||||
_128_TYPE shiftIn128 = VEC_SET1_VAL128(shiftIn);
|
||||
xlow.f = VEC_MOVE(xlow.f , shiftIn128);
|
||||
/*movss xlow[3] to xhigh[0] */
|
||||
xhigh.f = VEC_MOVE(xhigh.f, shiftOutL128.f);
|
||||
/* cast xlow to x */
|
||||
x.d = VEC_CAST_128_256(xlow.f);
|
||||
/* insert xhigh to x,1 */
|
||||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
|
@ -0,0 +1,190 @@
|
|||
#ifndef TEMPLATES_H_
|
||||
#define TEMPLATES_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
|
||||
#define MROWS 500
|
||||
#define MCOLS 1000
|
||||
|
||||
#define CAT(X,Y) X####Y
|
||||
#define GEN_INTRINSIC(X,Y) CAT(X,Y)
|
||||
|
||||
#define ALIGNED __attribute__((aligned(32)))
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256 ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED float ALIGNED f[8];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_F ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128 vecf ;
|
||||
uint32_t masks[4] ;
|
||||
} MaskVec_F ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
ALIGNED __m128 ALIGNED f;
|
||||
} ALIGNED IF_128f ALIGNED;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED int ALIGNED i;
|
||||
ALIGNED float ALIGNED f;
|
||||
} ALIGNED IF_32 ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m256d ALIGNED d;
|
||||
ALIGNED __m128i ALIGNED s[2];
|
||||
ALIGNED double ALIGNED f[4];
|
||||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_D ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128d vecf ;
|
||||
uint64_t masks[2] ;
|
||||
} MaskVec_D ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
ALIGNED __m128d ALIGNED f;
|
||||
} ALIGNED IF_128d ALIGNED;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED int64_t ALIGNED i;
|
||||
ALIGNED double ALIGNED f;
|
||||
} ALIGNED IF_64 ALIGNED;
|
||||
|
||||
template<class T>
|
||||
struct Context{};
|
||||
|
||||
template<>
|
||||
struct Context<double>
|
||||
{
|
||||
Context()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
ph2pr[x] = pow(10.0, -((double)x) / 10.0);
|
||||
|
||||
INITIAL_CONSTANT = ldexp(1.0, 1020.0);
|
||||
LOG10_INITIAL_CONSTANT = log10(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = 0.0;
|
||||
}
|
||||
|
||||
double LOG10(double v){ return log10(v); }
|
||||
|
||||
static double _(double n){ return n; }
|
||||
static double _(float n){ return ((double) n); }
|
||||
double ph2pr[128];
|
||||
double INITIAL_CONSTANT;
|
||||
double LOG10_INITIAL_CONSTANT;
|
||||
double RESULT_THRESHOLD;
|
||||
};
|
||||
|
||||
template<>
|
||||
struct Context<float>
|
||||
{
|
||||
Context()
|
||||
{
|
||||
for (int x = 0; x < 128; x++)
|
||||
{
|
||||
ph2pr[x] = powf(10.f, -((float)x) / 10.f);
|
||||
}
|
||||
|
||||
INITIAL_CONSTANT = ldexpf(1.f, 120.f);
|
||||
LOG10_INITIAL_CONSTANT = log10f(INITIAL_CONSTANT);
|
||||
RESULT_THRESHOLD = ldexpf(1.f, -110.f);
|
||||
}
|
||||
|
||||
float LOG10(float v){ return log10f(v); }
|
||||
|
||||
static float _(double n){ return ((float) n); }
|
||||
static float _(float n){ return n; }
|
||||
float ph2pr[128];
|
||||
float INITIAL_CONSTANT;
|
||||
float LOG10_INITIAL_CONSTANT;
|
||||
float RESULT_THRESHOLD;
|
||||
};
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
/*int *q, *i, *d, *c;*/
|
||||
int q[MROWS], i[MROWS], d[MROWS], c[MROWS];
|
||||
char *hap, *rs;
|
||||
int *ihap;
|
||||
int *irs;
|
||||
} testcase;
|
||||
|
||||
|
||||
template<class T>
|
||||
std::string to_string(T obj)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::string ret_string;
|
||||
ss.clear();
|
||||
ss << std::scientific << obj;
|
||||
ss >> ret_string;
|
||||
ss.clear();
|
||||
return ret_string;
|
||||
}
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
|
||||
int normalize(char c);
|
||||
int read_testcase(testcase *tc, FILE* ifp);
|
||||
int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false);
|
||||
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
class ConvertChar {
|
||||
|
||||
static uint8_t conversionTable[255] ;
|
||||
|
||||
public:
|
||||
|
||||
static void init() {
|
||||
assert (NUM_DISTINCT_CHARS == 5) ;
|
||||
assert (AMBIG_CHAR == 4) ;
|
||||
|
||||
conversionTable['A'] = 0 ;
|
||||
conversionTable['C'] = 1 ;
|
||||
conversionTable['T'] = 2 ;
|
||||
conversionTable['G'] = 3 ;
|
||||
conversionTable['N'] = 4 ;
|
||||
}
|
||||
|
||||
static inline uint8_t get(uint8_t input) {
|
||||
return conversionTable[input] ;
|
||||
}
|
||||
|
||||
} ;
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
Loading…
Reference in New Issue