Added support for dynamic selection between AVX and un-vectorized C++,

still to include SSE code from Mohammad.
Debug flags turned on in this commit.
This commit is contained in:
Karthik Gururaj 2014-01-18 11:07:23 -08:00
parent f1c772ceea
commit 25aecb96e0
16 changed files with 399 additions and 212 deletions

13
.gitignore vendored
View File

@ -24,3 +24,16 @@ dump/
lib/
out/
/atlassian-ide-plugin.xml
kg_tmp/
maven-ant-tasks-2.1.3.jar
null-sequenceGraph.25.0.0.raw_readthreading_graph.dot
null-sequenceGraph.25.0.1.cleaned_readthreading_graph.dot
null-sequenceGraph.25.0.1.initial_seqgraph.dot
null-sequenceGraph.3.0.0.raw_readthreading_graph.dot
null-sequenceGraph.3.0.1.cleaned_readthreading_graph.dot
null-sequenceGraph.3.0.1.initial_seqgraph.dot
org/
package-list
resources/
velocity.log

View File

@ -7,40 +7,59 @@ OMPCFLAGS=-fopenmp
JAVA_ROOT=/opt/jdk1.7.0_25/
JNI_COMPILATION_FLAGS=-D_REENTRANT -fPIC -I${JAVA_ROOT}/include -I${JAVA_ROOT}/include/linux
CFLAGS=-O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas -xAVX
CXXFLAGS=$(CFLAGS)
COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
CC=icc
CXX=icc
LDFLAGS=-lm $(OMPLDFLAGS)
#BIN:=pairhmm-1-base #pairhmm-2-omp pairhmm-3-hybrid-float-double pairhmm-4-hybrid-diagonal pairhmm-5-hybrid-diagonal-homogeneus pairhmm-6-onlythreediags pairhmm-7-presse pairhmm-8-sse #pairhmm-dev
BIN:=libJNILoglessPairHMM.so pairhmm-template-main checker
BIN=libJNILoglessPairHMM.so pairhmm-template-main checker
#BIN=checker
#SOURCES=pairhmm-1-base.cc input.cc
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc convert_char.cc
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
DEPDIR=.deps
DF=$(DEPDIR)/$(*).d
#Common across libJNI and sandbox
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc
#Part of libJNI
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES)
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o)
#No vectorization for these files
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc
#Use -xAVX for these files
AVX_SOURCES=avx_function_instantiations.cc
#Use -xSSE4.2 for these files
SSE_SOURCES=
NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o)
AVX_OBJECTS=$(AVX_SOURCES:.cc=.o)
SSE_OBJECTS=$(SSE_SOURCES:.cc=.o)
$(NO_VECTOR_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS)
$(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2
OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS)
all: $(BIN)
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
checker: pairhmm-1-base.o convert_char.o
checker: pairhmm-1-base.o $(COMMON_OBJECTS)
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
pairhmm-template-main: pairhmm-template-main.o convert_char.o
pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
libJNILoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) $(OMPCFLAGS) -shared -o $@ $(LIBOBJECTS)
%.o: %.cc
$(OBJECTS): %.o: %.cc
@mkdir -p $(DEPDIR)
$(COMPILE.cpp) -MMD -MF $(DF) $(JNI_COMPILATION_FLAGS) $(CXXFLAGS) $(OUTPUT_OPTION) $<
$(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
clean:

View File

@ -0,0 +1,12 @@
#include "template.h"
#include "define-float.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
#include "define-double.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
template double compute_full_prob_avxd<double>(testcase* tc, double* nextlog);
template float compute_full_prob_avxs<float>(testcase* tc, float* nextlog);

View File

@ -0,0 +1,19 @@
void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
template<class NUMBER> void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
_256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
_256_TYPE distm, _256_TYPE _1_distm);
void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);

View File

@ -0,0 +1,85 @@
#include "headers.h"
#include "template.h"
template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
{
int r, c;
int ROWS = tc->rslen + 1;
int COLS = tc->haplen + 1;
Context<NUMBER> ctx;
NUMBER M[MROWS][MCOLS];
NUMBER X[MROWS][MCOLS];
NUMBER Y[MROWS][MCOLS];
NUMBER p[MROWS][6];
p[0][MM] = ctx._(0.0);
p[0][GapM] = ctx._(0.0);
p[0][MX] = ctx._(0.0);
p[0][XX] = ctx._(0.0);
p[0][MY] = ctx._(0.0);
p[0][YY] = ctx._(0.0);
for (r = 1; r < ROWS; r++)
{
int _i = tc->i[r-1] & 127;
int _d = tc->d[r-1] & 127;
int _c = tc->c[r-1] & 127;
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
p[r][MX] = ctx.ph2pr[_i];
p[r][XX] = ctx.ph2pr[_c];
p[r][MY] = ctx.ph2pr[_d];
p[r][YY] = ctx.ph2pr[_c];
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
}
for (c = 0; c < COLS; c++)
{
M[0][c] = ctx._(0.0);
X[0][c] = ctx._(0.0);
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
}
for (r = 1; r < ROWS; r++)
{
M[r][0] = ctx._(0.0);
X[r][0] = X[r-1][0] * p[r][XX];
Y[r][0] = ctx._(0.0);
}
NUMBER result = ctx._(0.0);
for (r = 1; r < ROWS; r++)
for (c = 1; c < COLS; c++)
{
char _rs = tc->rs[r-1];
char _hap = tc->hap[c-1];
int _q = tc->q[r-1] & 127;
NUMBER distm = ctx.ph2pr[_q];
if (_rs == _hap || _rs == 'N' || _hap == 'N')
distm = ctx._(1.0) - distm;
else
distm = distm/3;
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
}
for (c = 0; c < COLS; c++)
{
result += M[ROWS-1][c] + X[ROWS-1][c];
}
if (before_last_log != NULL)
*before_last_log = result;
return result;
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
}
template double compute_full_prob<double>(testcase* tc, double* nextbuf);
template float compute_full_prob<float>(testcase* tc, float* nextbuf);

View File

@ -0,0 +1,24 @@
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#include <assert.h>
#include <ctype.h>
#include <sys/time.h>
#include <immintrin.h>
#include <emmintrin.h>
#include <omp.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cmath>

Binary file not shown.

View File

@ -1,44 +1,25 @@
#include "headers.h"
#include <jni.h>
#include <assert.h>
#include <stdio.h>
#include "org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h"
#include <vector>
#include <cstdio>
#include <string>
#include <cstring>
#include <cstdlib>
#include <cmath>
#include <iostream>
#include <fstream>
#include <sstream>
#include <immintrin.h>
#include <emmintrin.h>
#include <omp.h>
using namespace std;
#define ENABLE_ASSERTIONS 1
//#define DEBUG 1
#define DO_PROFILING 1
#define DEBUG 1
//#define DEBUG0_1 1
//#define DEBUG3 1
#include "template.h"
#include "utils.h"
#include "define-float.h"
#include "avx_function_prototypes.h"
#include "define-double.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
#include "avx_function_prototypes.h"
#define MM 0
#define GapM 1
#define MX 2
#define XX 3
#define MY 4
#define YY 5
using namespace std;
class LoadTimeInitializer
{
@ -46,6 +27,54 @@ class LoadTimeInitializer
LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
m_sumNumReads = 0;
m_sumSquareNumReads = 0;
m_sumNumHaplotypes = 0;
m_sumSquareNumHaplotypes = 0;
m_sumNumTestcases = 0;
m_sumSquareNumTestcases = 0;
m_num_invocations = 0;
if(is_avx_supported())
{
cout << "Using AVX accelerated implementation of PairHMM\n";
g_compute_full_prob_float = compute_full_prob_avxs<float>;
g_compute_full_prob_double = compute_full_prob_avxd<double>;
}
else
{
cout << "Using un-vectorized C++ implementation of PairHMM\n";
g_compute_full_prob_float = compute_full_prob<float>;
g_compute_full_prob_double = compute_full_prob<double>;
}
cout.flush();
//if(is_sse42_supported())
//{
//g_compute_full_prob_float = compute_full_prob_avxs<float>;
//g_compute_full_prob_double = compute_full_prob_avxd<double>;
//}
}
void print_profiling()
{
double mean_val;
cout <<"Invocations : "<<m_num_invocations<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\n";
mean_val = m_sumNumReads/m_num_invocations;
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumHaplotypes/m_num_invocations;
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumTestcases/m_num_invocations;
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\n";
cout.flush();
}
~LoadTimeInitializer()
{
#ifdef DO_PROFILING
print_profiling();
#endif
}
jfieldID m_readBasesFID;
jfieldID m_readQualsFID;
@ -53,18 +82,18 @@ class LoadTimeInitializer
jfieldID m_deletionGOPFID;
jfieldID m_overallGCPFID;
jfieldID m_haplotypeBasesFID;
//used to compute avg, variance of #testcases
double m_sumNumReads;
double m_sumSquareNumReads;
double m_sumNumHaplotypes;
double m_sumSquareNumHaplotypes;
double m_sumNumTestcases;
double m_sumSquareNumTestcases;
unsigned m_num_invocations;
};
LoadTimeInitializer g_load_time_initializer;
void debug_dump(string filename, string s, bool to_append, bool add_newline)
{
ofstream fptr;
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
fptr << s;
if(add_newline)
fptr << "\n";
fptr.close();
}
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities
(JNIEnv* env, jclass thisObject,
@ -327,8 +356,15 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&(tc_array[tc_idx]));
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
likelihoodDoubleArray[tc_idx] = result;
}
#ifdef DEBUG
@ -353,5 +389,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RELEASE_MODE);
haplotypeBasesArrayVector.clear();
tc_array.clear();
#ifdef DO_PROFILING
g_load_time_initializer.m_sumNumReads += numReads;
g_load_time_initializer.m_sumSquareNumReads += numReads*numReads;
g_load_time_initializer.m_sumNumHaplotypes += numHaplotypes;
g_load_time_initializer.m_sumSquareNumHaplotypes += numHaplotypes*numHaplotypes;
g_load_time_initializer.m_sumNumTestcases += numTestCases;
g_load_time_initializer.m_sumSquareNumTestcases += numTestCases*numTestCases;
++(g_load_time_initializer.m_num_invocations);
#endif
}

View File

@ -1,29 +1,15 @@
//#define DEBUG 1
//#define DEBUG0_1 1
//#define DEBUG3 1
#define MM 0
#define GapM 1
#define MX 2
#define XX 3
#define MY 4
#define YY 5
#include <cstdio>
#include <cstring>
#include <cstdlib>
#include <cmath>
#include <omp.h>
#include <emmintrin.h>
#include "headers.h"
#include "template.h"
#include "utils.h"
//#include "define-float.h"
//#include "shift_template.c"
//#include "pairhmm-template-kernel.cc"
#include "define-float.h"
#include "avx_function_prototypes.h"
#include "define-double.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
#include "avx_function_prototypes.h"
using namespace std;
class LoadTimeInitializer
@ -36,85 +22,6 @@ class LoadTimeInitializer
};
LoadTimeInitializer g_load_time_initializer;
template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
{
int r, c;
int ROWS = tc->rslen + 1;
int COLS = tc->haplen + 1;
Context<NUMBER> ctx;
NUMBER M[MROWS][MCOLS];
NUMBER X[MROWS][MCOLS];
NUMBER Y[MROWS][MCOLS];
NUMBER p[MROWS][6];
p[0][MM] = ctx._(0.0);
p[0][GapM] = ctx._(0.0);
p[0][MX] = ctx._(0.0);
p[0][XX] = ctx._(0.0);
p[0][MY] = ctx._(0.0);
p[0][YY] = ctx._(0.0);
for (r = 1; r < ROWS; r++)
{
int _i = tc->i[r-1] & 127;
int _d = tc->d[r-1] & 127;
int _c = tc->c[r-1] & 127;
p[r][MM] = ctx._(1.0) - ctx.ph2pr[(_i + _d) & 127];
p[r][GapM] = ctx._(1.0) - ctx.ph2pr[_c];
p[r][MX] = ctx.ph2pr[_i];
p[r][XX] = ctx.ph2pr[_c];
p[r][MY] = ctx.ph2pr[_d];
p[r][YY] = ctx.ph2pr[_c];
//p[r][MY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
//p[r][YY] = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
}
for (c = 0; c < COLS; c++)
{
M[0][c] = ctx._(0.0);
X[0][c] = ctx._(0.0);
Y[0][c] = ctx.INITIAL_CONSTANT / (tc->haplen);
}
for (r = 1; r < ROWS; r++)
{
M[r][0] = ctx._(0.0);
X[r][0] = X[r-1][0] * p[r][XX];
Y[r][0] = ctx._(0.0);
}
NUMBER result = ctx._(0.0);
for (r = 1; r < ROWS; r++)
for (c = 1; c < COLS; c++)
{
char _rs = tc->rs[r-1];
char _hap = tc->hap[c-1];
int _q = tc->q[r-1] & 127;
NUMBER distm = ctx.ph2pr[_q];
if (_rs == _hap || _rs == 'N' || _hap == 'N')
distm = ctx._(1.0) - distm;
else
distm = distm/3;
M[r][c] = distm * (M[r-1][c-1] * p[r][MM] + X[r-1][c-1] * p[r][GapM] + Y[r-1][c-1] * p[r][GapM]);
X[r][c] = M[r-1][c] * p[r][MX] + X[r-1][c] * p[r][XX];
Y[r][c] = M[r][c-1] * p[r][MY] + Y[r][c-1] * p[r][YY];
}
for (c = 0; c < COLS; c++)
{
result += M[ROWS-1][c] + X[ROWS-1][c];
}
if (before_last_log != NULL)
*before_last_log = result;
return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
}
#define BATCH_SIZE 10000
#define RUN_HYBRID
@ -129,45 +36,55 @@ int main(int argc, char** argv)
if(argc >= 3 && string(argv[2]) == "1")
use_old_read_testcase = true;
testcase tc;
if(use_old_read_testcase)
if(true)
{
FILE* fptr = fopen(argv[1],"r");
while(!feof(fptr))
{
if(read_testcase(&tc, fptr) >= 0)
{
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
delete tc.rs;
delete tc.hap;
}
}
fclose(fptr);
g_compute_full_prob_double = GEN_INTRINSIC(compute_full_prob_avx, d)<double>;
g_compute_full_prob_float = GEN_INTRINSIC(compute_full_prob_avx, s)<float>;
}
else
{
g_compute_full_prob_double = compute_full_prob<double>;
g_compute_full_prob_float = compute_full_prob<float>;
}
std::ifstream ifptr;
FILE* fptr = 0;
if(use_old_read_testcase)
{
fptr = fopen(argv[1],"r");
assert(fptr);
}
else
{
std::ifstream ifptr;
std::vector<std::string> tokens;
ifptr.open(argv[1]);
assert(ifptr.is_open());
while(1)
{
tokens.clear();
if(read_mod_testcase(ifptr, &tc, false) < 0)
break;
//double result = 0;
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
cout << std::scientific << compute_full_prob<double>(&tc) << " "<<result<<"\n";
delete tc.rs;
delete tc.hap;
}
ifptr.close();
}
testcase tc;
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
if(break_value < 0)
break;
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
cout << std::scientific << baseline_result << " "<<result<<"\n";
delete tc.rs;
delete tc.hap;
}
if(use_old_read_testcase)
fclose(fptr);
else
ifptr.close();
return 0;
#if 0

View File

@ -34,14 +34,14 @@ void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, i
int mOffset = (col-1) % maskBitCnt ;
MASK_TYPE bitMask = ((MASK_TYPE)0x1) << (maskBitCnt-1-mOffset) ;
char hapChar = tc.hap[col-1] ;
char hapChar = ConvertChar::get(tc.hap[col-1]);
if (hapChar == AMBIG_CHAR) {
for (int ci=0; ci < NUM_DISTINCT_CHARS; ++ci)
maskArr[mIndex][ci] |= bitMask ;
}
maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ;
maskArr[mIndex][hapChar] |= bitMask ;
// bit corresponding to col 1 will be the MSB of the mask 0
// bit corresponding to col 2 will be the MSB-1 of the mask 0
// ...
@ -75,11 +75,11 @@ void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& c
for (int ei=0; ei < AVX_LENGTH/2; ++ei) {
SET_MASK_WORD(currMaskVecLow.masks[ei], maskArr[maskIndex][rsArr[ei]],
lastMaskShiftOut[ei], ei, maskBitCnt) ;
lastMaskShiftOut[ei], ei, maskBitCnt) ;
int ei2 = ei + AVX_LENGTH/2 ; // the second entry index
SET_MASK_WORD(currMaskVecHigh.masks[ei], maskArr[maskIndex][rsArr[ei2]],
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
}
}

View File

@ -1,4 +1,5 @@
#include <stdio.h>
#include "headers.h"
#include <immintrin.h>
#include <emmintrin.h>
#include <omp.h>
@ -7,11 +8,11 @@
#include "define-float.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
#include "avx_function_prototypes.h"
#include "define-double.h"
#include "shift_template.c"
#include "pairhmm-template-kernel.cc"
#include "avx_function_prototypes.h"
#define BATCH_SIZE 10000
//#define RUN_HYBRID

View File

@ -3,8 +3,8 @@ rm -f *.txt
export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
-I /data/simulated/sim1M_pairs_final.bam \
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
-stand_call_conf 50.0 \
-stand_emit_conf 10.0 \
@ -14,3 +14,8 @@ java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
#-I /data/simulated/sim1M_pairs_final.bam \
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
#-R /data/broad/samples/joint_variant_calling/broad_reference/ucsc.hg19.fasta \
#-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
#--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/dbsnp_138.hg19.vcf \

View File

@ -1,24 +1,14 @@
#ifndef TEMPLATES_H_
#define TEMPLATES_H_
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <math.h>
#include <stdint.h>
#include <assert.h>
#include <sys/time.h>
#include <immintrin.h>
#include <ctype.h>
#include <string>
#include <iostream>
#include <sstream>
#include <fstream>
#include <vector>
#include "headers.h"
#define MM 0
#define GapM 1
#define MX 2
#define XX 3
#define MY 4
#define YY 5
#define MROWS 500
#define MCOLS 1000
@ -130,6 +120,7 @@ struct Context<float>
};
typedef struct
{
int rslen, haplen;
@ -182,7 +173,8 @@ public:
return conversionTable[input] ;
}
} ;
};
#endif

View File

@ -1,7 +1,36 @@
#include "headers.h"
#include "template.h"
uint8_t ConvertChar::conversionTable[255];
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
using namespace std;
bool is_avx_supported()
{
int ecx = 0, edx = 0, ebx = 0;
__asm__("cpuid"
: "=b" (ebx),
"=c" (ecx),
"=d" (edx)
: "a" (1)
);
return ((ecx >> 28)&1) == 1;
}
bool is_sse42_supported()
{
int ecx = 0, edx = 0, ebx = 0;
__asm__("cpuid"
: "=b" (ebx),
"=c" (ecx),
"=d" (edx)
: "a" (1)
);
return ((ecx >> 20)&1) == 1;
}
int normalize(char c)
{
return ((int) (c - 33));
@ -184,3 +213,13 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
return tokens.size();
}
void debug_dump(string filename, string s, bool to_append, bool add_newline)
{
ofstream fptr;
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
fptr << s;
if(add_newline)
fptr << "\n";
fptr.close();
}

View File

@ -0,0 +1,13 @@
#ifndef PAIRHMM_UTIL_H
#define PAIRHMM_UTIL_H
#define MIN_ACCEPTED 1e-28f
bool is_avx_supported();
bool is_sse42_supported();
extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log);
extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log);
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
#endif

View File

@ -66,13 +66,13 @@ import java.util.Map;
*/
public class JNILoglessPairHMM extends LoglessPairHMM {
private static final boolean debug = false; //simulates ifdef
private static final boolean debug = true; //simulates ifdef
private static final boolean debug0_1 = false; //simulates ifdef
private static final boolean debug1 = false; //simulates ifdef
private static final boolean debug2 = false;
private static final boolean debug3 = false;
private int numComputeLikelihoodCalls = 0;
//Used to copy references to byteArrays to JNI from reads
protected class JNIReadDataHolderClass
{
@ -138,6 +138,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
jniInitialize(readMaxLength, haplotypeMaxLength);
}
//Real compute kernel
private native void jniComputeLikelihoods(int numReads, int numHaplotypes,
JNIReadDataHolderClass[] readDataArray, JNIHaplotypeDataHolderClass[] haplotypeDataArray,
@ -160,6 +161,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
}
int readListSize = reads.size();
int alleleHaplotypeMapSize = alleleHaplotypeMap.size();
int numTestcases = readListSize*alleleHaplotypeMapSize;
if(debug0_1)
System.out.println("Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize);
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
@ -275,6 +277,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
return likelihoodMap;
}
/**
* {@inheritDoc}
*/