Added support for dynamic selection between AVX and un-vectorized C++,
still to include SSE code from Mohammad. Debug flags turned on in this commit.
This commit is contained in:
parent
f1c772ceea
commit
25aecb96e0
|
|
@ -24,3 +24,16 @@ dump/
|
|||
lib/
|
||||
out/
|
||||
/atlassian-ide-plugin.xml
|
||||
kg_tmp/
|
||||
maven-ant-tasks-2.1.3.jar
|
||||
null-sequenceGraph.25.0.0.raw_readthreading_graph.dot
|
||||
null-sequenceGraph.25.0.1.cleaned_readthreading_graph.dot
|
||||
null-sequenceGraph.25.0.1.initial_seqgraph.dot
|
||||
null-sequenceGraph.3.0.0.raw_readthreading_graph.dot
|
||||
null-sequenceGraph.3.0.1.cleaned_readthreading_graph.dot
|
||||
null-sequenceGraph.3.0.1.initial_seqgraph.dot
|
||||
org/
|
||||
package-list
|
||||
resources/
|
||||
velocity.log
|
||||
|
||||
|
|
|
|||
|
|
@ -7,40 +7,59 @@ OMPCFLAGS=-fopenmp
|
|||
JAVA_ROOT=/opt/jdk1.7.0_25/
|
||||
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux
|
||||
|
||||
CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
|
||||
|
||||
CXXFLAGS=$(CFLAGS)
|
||||
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
CC=icc
|
||||
CXX=icc
|
||||
|
||||
LDFLAGS=-lm $(OMPLDFLAGS)
|
||||
|
||||
#BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
|
||||
BIN:=libJNILoglessPairHMM.so pairhmm-template-main checker
|
||||
BIN=libJNILoglessPairHMM.so pairhmm-template-main checker
|
||||
#BIN=checker
|
||||
|
||||
#SOURCES=pairhmm-1-base.cc input.cc
|
||||
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc convert_char.cc
|
||||
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
|
||||
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
|
||||
DEPDIR=.deps
|
||||
DF=$(DEPDIR)/$(*).d
|
||||
|
||||
#Common across libJNI and sandbox
|
||||
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc
|
||||
#Part of libJNI
|
||||
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES)
|
||||
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
|
||||
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
|
||||
COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o)
|
||||
|
||||
|
||||
#No vectorization for these files
|
||||
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc
|
||||
#Use -xAVX for these files
|
||||
AVX_SOURCES=avx_function_instantiations.cc
|
||||
#Use -xSSE4.2 for these files
|
||||
SSE_SOURCES=
|
||||
|
||||
NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o)
|
||||
AVX_OBJECTS=$(AVX_SOURCES:.cc=.o)
|
||||
SSE_OBJECTS=$(SSE_SOURCES:.cc=.o)
|
||||
$(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS)
|
||||
$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX
|
||||
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2
|
||||
OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS)
|
||||
|
||||
all: $(BIN)
|
||||
|
||||
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
|
||||
|
||||
checker: pairhmm-1-base.o convert_char.o
|
||||
checker: pairhmm-1-base.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
pairhmm-template-main: pairhmm-template-main.o convert_char.o
|
||||
pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
|
||||
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libJNILoglessPairHMM.so: $(LIBOBJECTS)
|
||||
$(CXX) $(OMPCFLAGS) -shared -o $@ $(LIBOBJECTS)
|
||||
|
||||
%.o: %.cc
|
||||
|
||||
$(OBJECTS): %.o: %.cc
|
||||
@mkdir -p $(DEPDIR)
|
||||
$(COMPILE.cpp) -MMD -MF $(DF) $(JNI_COMPILATION_FLAGS) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
$(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
|
||||
|
||||
|
||||
clean:
|
||||
|
|
|
|||
|
|
@ -0,0 +1,12 @@
|
|||
#include "template.h"
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
template double compute_full_prob_avxd<double>(testcase* tc, double* nextlog);
|
||||
template float compute_full_prob_avxs<float>(testcase* tc, float* nextlog);
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
|
||||
template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
|
||||
template<class NUMBER> void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
_256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm);
|
||||
void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
||||
|
|
@ -0,0 +1,85 @@
|
|||
#include "headers.h"
|
||||
#include "template.h"
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
|
||||
NUMBER M[MROWS][MCOLS];
|
||||
NUMBER X[MROWS][MCOLS];
|
||||
NUMBER Y[MROWS][MCOLS];
|
||||
NUMBER p[MROWS][6];
|
||||
|
||||
p[0][MM] = ctx._(0.0);
|
||||
p[0][GapM] = ctx._(0.0);
|
||||
p[0][MX] = ctx._(0.0);
|
||||
p[0][XX] = ctx._(0.0);
|
||||
p[0][MY] = ctx._(0.0);
|
||||
p[0][YY] = ctx._(0.0);
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
p[r][MX] = ctx.ph2pr[_i];
|
||||
p[r][XX] = ctx.ph2pr[_c];
|
||||
p[r][MY] = ctx.ph2pr[_d];
|
||||
p[r][YY] = ctx.ph2pr[_c];
|
||||
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
M[0][c] = ctx._(0.0);
|
||||
X[0][c] = ctx._(0.0);
|
||||
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
M[r][0] = ctx._(0.0);
|
||||
X[r][0] = X[r-1][0] * p[r][XX];
|
||||
Y[r][0] = ctx._(0.0);
|
||||
}
|
||||
|
||||
NUMBER result = ctx._(0.0);
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
for (c = 1; c < COLS; c++)
|
||||
{
|
||||
char _rs = tc->rs[r-1];
|
||||
char _hap = tc->hap[c-1];
|
||||
int _q = tc->q[r-1] & 127;
|
||||
NUMBER distm = ctx.ph2pr[_q];
|
||||
if (_rs == _hap || _rs == 'N' || _hap == 'N')
|
||||
distm = ctx._(1.0) - distm;
|
||||
else
|
||||
distm = distm/3;
|
||||
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
|
||||
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
|
||||
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
result += M[ROWS-1][c] + X[ROWS-1][c];
|
||||
}
|
||||
|
||||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
return result;
|
||||
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
}
|
||||
|
||||
template double compute_full_prob<double>(testcase* tc, double* nextbuf);
|
||||
template float compute_full_prob<float>(testcase* tc, float* nextbuf);
|
||||
|
||||
|
|
@ -0,0 +1,24 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
|
||||
Binary file not shown.
|
|
@ -1,44 +1,25 @@
|
|||
#include "headers.h"
|
||||
#include <jni.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include "org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h"
|
||||
#include <vector>
|
||||
|
||||
#include <cstdio>
|
||||
#include <string>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
using namespace std;
|
||||
|
||||
#define ENABLE_ASSERTIONS 1
|
||||
//#define DEBUG 1
|
||||
#define DO_PROFILING 1
|
||||
#define DEBUG 1
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
|
||||
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#include "define-float.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
using namespace std;
|
||||
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
|
|
@ -46,6 +27,54 @@ class LoadTimeInitializer
|
|||
LoadTimeInitializer() //will be called when library is loaded
|
||||
{
|
||||
ConvertChar::init();
|
||||
m_sumNumReads = 0;
|
||||
m_sumSquareNumReads = 0;
|
||||
m_sumNumHaplotypes = 0;
|
||||
m_sumSquareNumHaplotypes = 0;
|
||||
m_sumNumTestcases = 0;
|
||||
m_sumSquareNumTestcases = 0;
|
||||
m_num_invocations = 0;
|
||||
|
||||
if(is_avx_supported())
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Using un-vectorized C++ implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
}
|
||||
cout.flush();
|
||||
//if(is_sse42_supported())
|
||||
//{
|
||||
//g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
//g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
//}
|
||||
}
|
||||
void print_profiling()
|
||||
{
|
||||
double mean_val;
|
||||
cout <<"Invocations : "<<m_num_invocations<<"\n";
|
||||
cout << "term\tsum\tsumSq\tmean\tvar\n";
|
||||
mean_val = m_sumNumReads/m_num_invocations;
|
||||
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
mean_val = m_sumNumHaplotypes/m_num_invocations;
|
||||
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
mean_val = m_sumNumTestcases/m_num_invocations;
|
||||
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
cout.flush();
|
||||
}
|
||||
~LoadTimeInitializer()
|
||||
{
|
||||
#ifdef DO_PROFILING
|
||||
print_profiling();
|
||||
#endif
|
||||
}
|
||||
jfieldID m_readBasesFID;
|
||||
jfieldID m_readQualsFID;
|
||||
|
|
@ -53,18 +82,18 @@ class LoadTimeInitializer
|
|||
jfieldID m_deletionGOPFID;
|
||||
jfieldID m_overallGCPFID;
|
||||
jfieldID m_haplotypeBasesFID;
|
||||
//used to compute avg, variance of #testcases
|
||||
double m_sumNumReads;
|
||||
double m_sumSquareNumReads;
|
||||
double m_sumNumHaplotypes;
|
||||
double m_sumSquareNumHaplotypes;
|
||||
double m_sumNumTestcases;
|
||||
double m_sumSquareNumTestcases;
|
||||
unsigned m_num_invocations;
|
||||
|
||||
};
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
void debug_dump(string filename, string s, bool to_append, bool add_newline)
|
||||
{
|
||||
ofstream fptr;
|
||||
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
fptr << s;
|
||||
if(add_newline)
|
||||
fptr << "\n";
|
||||
fptr.close();
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities
|
||||
(JNIEnv* env, jclass thisObject,
|
||||
|
|
@ -327,8 +356,15 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&(tc_array[tc_idx]));
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
#ifdef DEBUG
|
||||
|
|
@ -353,5 +389,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RELEASE_MODE);
|
||||
haplotypeBasesArrayVector.clear();
|
||||
tc_array.clear();
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_sumNumReads += numReads;
|
||||
g_load_time_initializer.m_sumSquareNumReads += numReads*numReads;
|
||||
g_load_time_initializer.m_sumNumHaplotypes += numHaplotypes;
|
||||
g_load_time_initializer.m_sumSquareNumHaplotypes += numHaplotypes*numHaplotypes;
|
||||
g_load_time_initializer.m_sumNumTestcases += numTestCases;
|
||||
g_load_time_initializer.m_sumSquareNumTestcases += numTestCases*numTestCases;
|
||||
++(g_load_time_initializer.m_num_invocations);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,29 +1,15 @@
|
|||
//#define DEBUG 1
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
#include <cmath>
|
||||
#include <omp.h>
|
||||
#include <emmintrin.h>
|
||||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
|
||||
//#include "define-float.h"
|
||||
//#include "shift_template.c"
|
||||
//#include "pairhmm-template-kernel.cc"
|
||||
#include "define-float.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
using namespace std;
|
||||
class LoadTimeInitializer
|
||||
|
|
@ -36,85 +22,6 @@ class LoadTimeInitializer
|
|||
};
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
int COLS = tc->haplen + 1;
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
|
||||
NUMBER M[MROWS][MCOLS];
|
||||
NUMBER X[MROWS][MCOLS];
|
||||
NUMBER Y[MROWS][MCOLS];
|
||||
NUMBER p[MROWS][6];
|
||||
|
||||
p[0][MM] = ctx._(0.0);
|
||||
p[0][GapM] = ctx._(0.0);
|
||||
p[0][MX] = ctx._(0.0);
|
||||
p[0][XX] = ctx._(0.0);
|
||||
p[0][MY] = ctx._(0.0);
|
||||
p[0][YY] = ctx._(0.0);
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
int _i = tc->i[r-1] & 127;
|
||||
int _d = tc->d[r-1] & 127;
|
||||
int _c = tc->c[r-1] & 127;
|
||||
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
|
||||
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
p[r][MX] = ctx.ph2pr[_i];
|
||||
p[r][XX] = ctx.ph2pr[_c];
|
||||
p[r][MY] = ctx.ph2pr[_d];
|
||||
p[r][YY] = ctx.ph2pr[_c];
|
||||
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
M[0][c] = ctx._(0.0);
|
||||
X[0][c] = ctx._(0.0);
|
||||
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
}
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
{
|
||||
M[r][0] = ctx._(0.0);
|
||||
X[r][0] = X[r-1][0] * p[r][XX];
|
||||
Y[r][0] = ctx._(0.0);
|
||||
}
|
||||
|
||||
NUMBER result = ctx._(0.0);
|
||||
|
||||
for (r = 1; r < ROWS; r++)
|
||||
for (c = 1; c < COLS; c++)
|
||||
{
|
||||
char _rs = tc->rs[r-1];
|
||||
char _hap = tc->hap[c-1];
|
||||
int _q = tc->q[r-1] & 127;
|
||||
NUMBER distm = ctx.ph2pr[_q];
|
||||
if (_rs == _hap || _rs == 'N' || _hap == 'N')
|
||||
distm = ctx._(1.0) - distm;
|
||||
else
|
||||
distm = distm/3;
|
||||
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
|
||||
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
|
||||
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
|
||||
}
|
||||
|
||||
for (c = 0; c < COLS; c++)
|
||||
{
|
||||
result += M[ROWS-1][c] + X[ROWS-1][c];
|
||||
}
|
||||
|
||||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
}
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
||||
|
|
@ -129,45 +36,55 @@ int main(int argc, char** argv)
|
|||
if(argc >= 3 && string(argv[2]) == "1")
|
||||
use_old_read_testcase = true;
|
||||
|
||||
testcase tc;
|
||||
if(use_old_read_testcase)
|
||||
if(true)
|
||||
{
|
||||
FILE* fptr = fopen(argv[1],"r");
|
||||
while(!feof(fptr))
|
||||
{
|
||||
if(read_testcase(&tc, fptr) >= 0)
|
||||
{
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
|
||||
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
|
||||
delete tc.rs;
|
||||
delete tc.hap;
|
||||
}
|
||||
}
|
||||
fclose(fptr);
|
||||
g_compute_full_prob_double = GEN_INTRINSIC(compute_full_prob_avx, d)<double>;
|
||||
g_compute_full_prob_float = GEN_INTRINSIC(compute_full_prob_avx, s)<float>;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
}
|
||||
|
||||
std::ifstream ifptr;
|
||||
FILE* fptr = 0;
|
||||
if(use_old_read_testcase)
|
||||
{
|
||||
fptr = fopen(argv[1],"r");
|
||||
assert(fptr);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::ifstream ifptr;
|
||||
std::vector<std::string> tokens;
|
||||
ifptr.open(argv[1]);
|
||||
assert(ifptr.is_open());
|
||||
while(1)
|
||||
{
|
||||
tokens.clear();
|
||||
if(read_mod_testcase(ifptr, &tc, false) < 0)
|
||||
break;
|
||||
//double result = 0;
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
|
||||
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
|
||||
delete tc.rs;
|
||||
delete tc.hap;
|
||||
}
|
||||
ifptr.close();
|
||||
}
|
||||
|
||||
testcase tc;
|
||||
while(1)
|
||||
{
|
||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||
if(break_value < 0)
|
||||
break;
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
|
||||
double baseline_result = compute_full_prob<double>(&tc);
|
||||
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
||||
cout << std::scientific << baseline_result << " "<<result<<"\n";
|
||||
delete tc.rs;
|
||||
delete tc.hap;
|
||||
}
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
ifptr.close();
|
||||
return 0;
|
||||
|
||||
#if 0
|
||||
|
|
|
|||
|
|
@ -34,14 +34,14 @@ void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, i
|
|||
int mOffset = (col-1) % maskBitCnt ;
|
||||
MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt-1-mOffset) ;
|
||||
|
||||
char hapChar = tc.hap[col-1] ;
|
||||
char hapChar = ConvertChar::get(tc.hap[col-1]);
|
||||
|
||||
if (hapChar == AMBIG_CHAR) {
|
||||
for (int ci=0; ci < NUM_DISTINCT_CHARS; ++ci)
|
||||
maskArr[mIndex][ci] |= bitMask ;
|
||||
}
|
||||
|
||||
maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ;
|
||||
maskArr[mIndex][hapChar] |= bitMask ;
|
||||
// bit corresponding to col 1 will be the MSB of the mask 0
|
||||
// bit corresponding to col 2 will be the MSB-1 of the mask 0
|
||||
// ...
|
||||
|
|
@ -75,11 +75,11 @@ void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& c
|
|||
|
||||
for (int ei=0; ei < AVX_LENGTH/2; ++ei) {
|
||||
SET_MASK_WORD(currMaskVecLow.masks[ei], maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + AVX_LENGTH/2 ; // the second entry index
|
||||
SET_MASK_WORD(currMaskVecHigh.masks[ei], maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include <stdio.h>
|
||||
#include "headers.h"
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
|
|
@ -7,11 +8,11 @@
|
|||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
//#define RUN_HYBRID
|
||||
|
|
|
|||
|
|
@ -3,8 +3,8 @@ rm -f *.txt
|
|||
export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
|
||||
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
|
||||
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
|
||||
-I /data/simulated/sim1M_pairs_final.bam \
|
||||
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
-stand_call_conf 50.0 \
|
||||
-stand_emit_conf 10.0 \
|
||||
|
|
@ -14,3 +14,8 @@ java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/
|
|||
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
#-I /data/simulated/sim1M_pairs_final.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/ucsc.hg19.fasta \
|
||||
#-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/dbsnp_138.hg19.vcf \
|
||||
|
|
|
|||
|
|
@ -1,24 +1,14 @@
|
|||
#ifndef TEMPLATES_H_
|
||||
#define TEMPLATES_H_
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
|
||||
#include <sys/time.h>
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string>
|
||||
#include <iostream>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include "headers.h"
|
||||
|
||||
#define MM 0
|
||||
#define GapM 1
|
||||
#define MX 2
|
||||
#define XX 3
|
||||
#define MY 4
|
||||
#define YY 5
|
||||
|
||||
#define MROWS 500
|
||||
#define MCOLS 1000
|
||||
|
|
@ -130,6 +120,7 @@ struct Context<float>
|
|||
};
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
|
|
@ -182,7 +173,8 @@ public:
|
|||
return conversionTable[input] ;
|
||||
}
|
||||
|
||||
} ;
|
||||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,7 +1,36 @@
|
|||
#include "headers.h"
|
||||
#include "template.h"
|
||||
|
||||
uint8_t ConvertChar::conversionTable[255];
|
||||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
||||
|
||||
using namespace std;
|
||||
|
||||
bool is_avx_supported()
|
||||
{
|
||||
int ecx = 0, edx = 0, ebx = 0;
|
||||
__asm__("cpuid"
|
||||
: "=b" (ebx),
|
||||
"=c" (ecx),
|
||||
"=d" (edx)
|
||||
: "a" (1)
|
||||
);
|
||||
return ((ecx >> 28)&1) == 1;
|
||||
}
|
||||
|
||||
bool is_sse42_supported()
|
||||
{
|
||||
int ecx = 0, edx = 0, ebx = 0;
|
||||
__asm__("cpuid"
|
||||
: "=b" (ebx),
|
||||
"=c" (ecx),
|
||||
"=d" (edx)
|
||||
: "a" (1)
|
||||
);
|
||||
return ((ecx >> 20)&1) == 1;
|
||||
}
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
|
|
@ -184,3 +213,13 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|||
|
||||
return tokens.size();
|
||||
}
|
||||
|
||||
void debug_dump(string filename, string s, bool to_append, bool add_newline)
|
||||
{
|
||||
ofstream fptr;
|
||||
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
fptr << s;
|
||||
if(add_newline)
|
||||
fptr << "\n";
|
||||
fptr.close();
|
||||
}
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
#ifndef PAIRHMM_UTIL_H
|
||||
#define PAIRHMM_UTIL_H
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
bool is_avx_supported();
|
||||
bool is_sse42_supported();
|
||||
extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log);
|
||||
extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log);
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
|
||||
#endif
|
||||
|
|
@ -66,13 +66,13 @@ import java.util.Map;
|
|||
*/
|
||||
public class JNILoglessPairHMM extends LoglessPairHMM {
|
||||
|
||||
private static final boolean debug = false; //simulates ifdef
|
||||
private static final boolean debug = true; //simulates ifdef
|
||||
private static final boolean debug0_1 = false; //simulates ifdef
|
||||
private static final boolean debug1 = false; //simulates ifdef
|
||||
private static final boolean debug2 = false;
|
||||
private static final boolean debug3 = false;
|
||||
private int numComputeLikelihoodCalls = 0;
|
||||
|
||||
|
||||
//Used to copy references to byteArrays to JNI from reads
|
||||
protected class JNIReadDataHolderClass
|
||||
{
|
||||
|
|
@ -138,6 +138,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
jniInitialize(readMaxLength, haplotypeMaxLength);
|
||||
}
|
||||
|
||||
|
||||
//Real compute kernel
|
||||
private native void jniComputeLikelihoods(int numReads, int numHaplotypes,
|
||||
JNIReadDataHolderClass[] readDataArray, JNIHaplotypeDataHolderClass[] haplotypeDataArray,
|
||||
|
|
@ -160,6 +161,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
}
|
||||
int readListSize = reads.size();
|
||||
int alleleHaplotypeMapSize = alleleHaplotypeMap.size();
|
||||
int numTestcases = readListSize*alleleHaplotypeMapSize;
|
||||
if(debug0_1)
|
||||
System.out.println("Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize);
|
||||
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
|
||||
|
|
@ -275,6 +277,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
return likelihoodMap;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* {@inheritDoc}
|
||||
*/
|
||||
|
|
|
|||
Loading…
Reference in New Issue