1. Integrated Mohammad's SSE4.2 code, Mustafa's bug fix and code to fix the
SSE compilation warning. 2. Added code to dynamically select between AVX, SSE4.2 and normal C++ (in that order) 3. Created multiple files to compile with different compilation flags: avx_function_prototypes.cc is compiled with -xAVX while sse_function_instantiations.cc is compiled with -xSSE4.2 flag. 4. Added jniClose() and support in Java (HaplotypeCaller, PairHMMLikelihoodCalculationEngine) to call this function at the end of the program. 5. Removed debug code, kept assertions and profiling in C++ 6. Disabled OpenMP for now.
This commit is contained in:
parent
25aecb96e0
commit
7180c392af
|
|
@ -8,3 +8,6 @@ pairhmm-template-main
|
|||
*.swp
|
||||
checker
|
||||
reformat
|
||||
subdir_checkout.sh
|
||||
avx/
|
||||
sse/
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
OMPCFLAGS=-fopenmp
|
||||
#OMPCFLAGS=-fopenmp
|
||||
#OMPLDFLAGS=-lgomp
|
||||
|
||||
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
|
|
@ -20,7 +20,7 @@ DEPDIR=.deps
|
|||
DF=$(DEPDIR)/$(*).d
|
||||
|
||||
#Common across libJNI and sandbox
|
||||
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc
|
||||
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc
|
||||
#Part of libJNI
|
||||
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES)
|
||||
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
|
||||
|
|
@ -33,7 +33,7 @@ NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pa
|
|||
#Use -xAVX for these files
|
||||
AVX_SOURCES=avx_function_instantiations.cc
|
||||
#Use -xSSE4.2 for these files
|
||||
SSE_SOURCES=
|
||||
SSE_SOURCES=sse_function_instantiations.cc
|
||||
|
||||
NO_VECTOR_OBJECTS=$(NO_VECTOR_SOURCES:.cc=.o)
|
||||
AVX_OBJECTS=$(AVX_SOURCES:.cc=.o)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,11 @@
|
|||
#include "template.h"
|
||||
|
||||
#undef SIMD_TYPE
|
||||
#undef SIMD_TYPE_SSE
|
||||
|
||||
#define SIMD_TYPE avx
|
||||
#define SIMD_TYPE_AVX
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
|
|
|||
|
|
@ -1,19 +0,0 @@
|
|||
void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
|
||||
template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
|
||||
template<class NUMBER> void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
_256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm);
|
||||
void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
||||
|
|
@ -18,34 +18,34 @@
|
|||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef SET_VEC_ZERO
|
||||
#undef VEC_OR
|
||||
#undef VEC_ADD
|
||||
#undef VEC_SUB
|
||||
#undef VEC_MUL
|
||||
#undef VEC_DIV
|
||||
#undef VEC_BLEND
|
||||
#undef VEC_BLENDV
|
||||
#undef VEC_CAST_256_128
|
||||
#undef VEC_EXTRACT_128
|
||||
#undef VEC_EXTRACT_UNIT
|
||||
#undef VEC_SET1_VAL128
|
||||
#undef VEC_MOVE
|
||||
#undef VEC_CAST_128_256
|
||||
#undef VEC_INSERT_VAL
|
||||
#undef VEC_CVT_128_256
|
||||
#undef VEC_SET1_VAL
|
||||
#undef VEC_POPCVT_CHAR
|
||||
#undef VEC_LDPOPCVT_CHAR
|
||||
#undef VEC_CMP_EQ
|
||||
#undef VEC_SET_LSE
|
||||
#undef SHIFT_HAP
|
||||
#undef print256b
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef print256b(__v1)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX
|
||||
#undef VEC_SHIFT_LEFT_1BIT
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
#endif
|
||||
|
||||
|
|
@ -83,7 +83,7 @@
|
|||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_pd(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_pd(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@
|
|||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm256_div_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
|
|
@ -133,7 +133,7 @@
|
|||
_mm256_set_ps(zero, zero, zero, zero, zero, zero, zero, __val);
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
_vector_shift_lasts(__v1, __val.f);
|
||||
_vector_shift_lastavxs(__v1, __val.f);
|
||||
|
||||
#define print256b(__v1) \
|
||||
print256bFP(__v1)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,128 @@
|
|||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef _256_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef MAVX_COUNT
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef print256b(__v1)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
|
||||
#endif
|
||||
|
||||
#define SSE
|
||||
#define PRECISION d
|
||||
|
||||
#define MAIN_TYPE double
|
||||
#define MAIN_TYPE_SIZE 64
|
||||
#define UNION_TYPE mix_D128
|
||||
#define IF_128 IF_128d
|
||||
#define IF_MAIN_TYPE IF_64
|
||||
#define SHIFT_CONST1 1
|
||||
#define SHIFT_CONST2 8
|
||||
#define SHIFT_CONST3 0
|
||||
#define _128_TYPE __m128d
|
||||
#define _256_TYPE __m128d
|
||||
#define _256_INT_TYPE __m128i
|
||||
#define AVX_LENGTH 2
|
||||
#define MAVX_COUNT (MROWS+3)/AVX_LENGTH
|
||||
#define HAP_TYPE __m128i
|
||||
#define MASK_TYPE uint64_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFFFFFFFFFFL
|
||||
#define MASK_VEC MaskVec_D128
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi64(__v1, __im)
|
||||
|
||||
#define VEC_INSERT_UNIT(__v1,__ins,__im) \
|
||||
_mm_insert_epi64(__v1,__ins,__im)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm_or_pd(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm_add_pd(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm_sub_pd(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm_mul_pd(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm_div_pd(__v1, __v2)
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm_cmpeq_pd(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm_blend_pd(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm_blendv_pd(__v1, __v2, __maskV)
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
__v1 = _mm_insert_epi32(_mm_slli_si128(__v1, 4), __val.i, 0)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm_cvtepi32_pd(__v1)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm_set1_pd(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm_cvtepi32_pd(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm_set_pd(zero, __val);
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm_cvtepi32_pd(_mm_loadu_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm_castsi128_pd(_mm_set_epi64(__vsHigh, __vsLow))
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_si64(__vs, 1)
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,127 @@
|
|||
#ifdef PRECISION
|
||||
#undef PRECISION
|
||||
#undef MAIN_TYPE
|
||||
#undef MAIN_TYPE_SIZE
|
||||
#undef UNION_TYPE
|
||||
#undef IF_128
|
||||
#undef IF_MAIN_TYPE
|
||||
#undef SHIFT_CONST1
|
||||
#undef SHIFT_CONST2
|
||||
#undef SHIFT_CONST3
|
||||
#undef _128_TYPE
|
||||
#undef _256_TYPE
|
||||
#undef AVX_LENGTH
|
||||
#undef MAVX_COUNT
|
||||
#undef HAP_TYPE
|
||||
#undef MASK_TYPE
|
||||
#undef MASK_ALL_ONES
|
||||
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_INSERT_UNIT(__v1,__ins,__im)
|
||||
#undef SET_VEC_ZERO(__vec)
|
||||
#undef VEC_OR(__v1, __v2)
|
||||
#undef VEC_ADD(__v1, __v2)
|
||||
#undef VEC_SUB(__v1, __v2)
|
||||
#undef VEC_MUL(__v1, __v2)
|
||||
#undef VEC_DIV(__v1, __v2)
|
||||
#undef VEC_BLEND(__v1, __v2, __mask)
|
||||
#undef VEC_BLENDV(__v1, __v2, __maskV)
|
||||
#undef VEC_CAST_256_128(__v1)
|
||||
#undef VEC_EXTRACT_128(__v1, __im)
|
||||
#undef VEC_EXTRACT_UNIT(__v1, __im)
|
||||
#undef VEC_SET1_VAL128(__val)
|
||||
#undef VEC_MOVE(__v1, __val)
|
||||
#undef VEC_CAST_128_256(__v1)
|
||||
#undef VEC_INSERT_VAL(__v1, __val, __pos)
|
||||
#undef VEC_CVT_128_256(__v1)
|
||||
#undef VEC_SET1_VAL(__val)
|
||||
#undef VEC_POPCVT_CHAR(__ch)
|
||||
#undef VEC_LDPOPCVT_CHAR(__addr)
|
||||
#undef VEC_CMP_EQ(__v1, __v2)
|
||||
#undef VEC_SET_LSE(__val)
|
||||
#undef SHIFT_HAP(__v1, __val)
|
||||
#undef print256b(__v1)
|
||||
#undef MASK_VEC
|
||||
#undef VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst)
|
||||
#undef VEC_SHIFT_LEFT_1BIT(__vs)
|
||||
#undef MASK_ALL_ONES
|
||||
#undef COMPARE_VECS(__v1, __v2)
|
||||
#undef _256_INT_TYPE
|
||||
|
||||
#endif
|
||||
|
||||
#define SSE
|
||||
#define PRECISION s
|
||||
|
||||
#define MAIN_TYPE float
|
||||
#define MAIN_TYPE_SIZE 32
|
||||
#define UNION_TYPE mix_F128
|
||||
#define IF_128 IF_128f
|
||||
#define IF_MAIN_TYPE IF_32
|
||||
#define SHIFT_CONST1 3
|
||||
#define SHIFT_CONST2 4
|
||||
#define SHIFT_CONST3 0
|
||||
#define _128_TYPE __m128
|
||||
#define _256_TYPE __m128
|
||||
#define _256_INT_TYPE __m128i
|
||||
#define AVX_LENGTH 4
|
||||
#define MAVX_COUNT (MROWS+3)/AVX_LENGTH
|
||||
#define HAP_TYPE UNION_TYPE
|
||||
#define MASK_TYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
#define MASK_VEC MaskVec_F128
|
||||
|
||||
#define VEC_EXTRACT_UNIT(__v1, __im) \
|
||||
_mm_extract_epi32(__v1, __im)
|
||||
|
||||
#define VEC_INSERT_UNIT(__v1,__ins,__im) \
|
||||
_mm_insert_epi32(__v1,__ins,__im)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_SUB(__v1, __v2) \
|
||||
_mm_sub_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_DIV(__v1, __v2) \
|
||||
_mm_div_ps(__v1, __v2)
|
||||
|
||||
#define VEC_CMP_EQ(__v1, __v2) \
|
||||
_mm_cmpeq_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLEND(__v1, __v2, __mask) \
|
||||
_mm_blend_ps(__v1, __v2, __mask)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define SHIFT_HAP(__v1, __val) \
|
||||
_vector_shift_lastsses(__v1, __val.f)
|
||||
|
||||
#define VEC_CVT_128_256(__v1) \
|
||||
_mm_cvtepi32_ps(__v1.i)
|
||||
|
||||
#define VEC_SET1_VAL(__val) \
|
||||
_mm_set1_ps(__val)
|
||||
|
||||
#define VEC_POPCVT_CHAR(__ch) \
|
||||
_mm_cvtepi32_ps(_mm_set1_epi32(__ch))
|
||||
|
||||
#define VEC_SET_LSE(__val) \
|
||||
_mm_set_ps(zero, zero, zero, __val);
|
||||
|
||||
#define VEC_LDPOPCVT_CHAR(__addr) \
|
||||
_mm_cvtepi32_ps(_mm_loadu_si128((__m128i const *)__addr))
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm_cvtpi32x2_ps(__vsLow, __vsHigh)
|
||||
|
||||
#define VEC_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_pi32(__vs, 1)
|
||||
|
||||
|
|
@ -17,6 +17,7 @@
|
|||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
|
|
|||
|
|
@ -1,451 +0,0 @@
|
|||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
// /usr/intel/pkgs/icc/13.0.0e/bin/icc -o ed -O3 ed.cpp -xAVX -openmp -openmp-link static
|
||||
|
||||
|
||||
#include <iostream>
|
||||
|
||||
#include <malloc.h>
|
||||
#include <assert.h>
|
||||
#include <sys/time.h>
|
||||
#include <omp.h>
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <tmmintrin.h>
|
||||
#include <smmintrin.h>
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <map>
|
||||
#include <memory.h>
|
||||
#include "template.h"
|
||||
|
||||
using namespace std ;
|
||||
|
||||
#define CHECK_MASK_CORRECTNESS
|
||||
|
||||
template <class T>
|
||||
string getBinaryStr (T val, int numBitsToWrite) {
|
||||
|
||||
ostringstream oss ;
|
||||
uint64_t mask = ((T) 0x1) << (numBitsToWrite-1) ;
|
||||
for (int i=numBitsToWrite-1; i >= 0; --i) {
|
||||
oss << ((val & mask) >> i) ;
|
||||
mask >>= 1 ;
|
||||
}
|
||||
return oss.str() ;
|
||||
}
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
}
|
||||
|
||||
|
||||
uint8_t ConvertChar::conversionTable[255] ;
|
||||
|
||||
int read_testcase(testcase *tc, FILE* ifp)
|
||||
{
|
||||
char *q, *i, *d, *c, *line = NULL;
|
||||
int _q, _i, _d, _c;
|
||||
int x, size = 0;
|
||||
ssize_t read;
|
||||
|
||||
read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp);
|
||||
if (read == -1)
|
||||
return -1;
|
||||
|
||||
|
||||
tc->hap = (char *) malloc(size);
|
||||
tc->rs = (char *) malloc(size);
|
||||
q = (char *) malloc(size);
|
||||
i = (char *) malloc(size);
|
||||
d = (char *) malloc(size);
|
||||
c = (char *) malloc(size);
|
||||
|
||||
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
|
||||
return -1;
|
||||
|
||||
tc->haplen = strlen(tc->hap);
|
||||
tc->rslen = strlen(tc->rs);
|
||||
assert(tc->rslen < MROWS);
|
||||
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
||||
//tc->q = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->i = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->d = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->c = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
|
||||
for (x = 0; x < tc->rslen; x++)
|
||||
{
|
||||
_q = normalize(q[x]);
|
||||
_i = normalize(i[x]);
|
||||
_d = normalize(d[x]);
|
||||
_c = normalize(c[x]);
|
||||
tc->q[x] = (_q < 6) ? 6 : _q;
|
||||
tc->i[x] = _i;
|
||||
tc->d[x] = _d;
|
||||
tc->c[x] = _c;
|
||||
tc->irs[x] = tc->rs[x];
|
||||
}
|
||||
for (x = 0; x < tc->haplen; x++)
|
||||
tc->ihap[x] = tc->hap[x];
|
||||
|
||||
|
||||
free(q);
|
||||
free(i);
|
||||
free(d);
|
||||
free(c);
|
||||
free(line);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
#define ALIGN __attribute__ ((aligned(32)))
|
||||
|
||||
|
||||
typedef union ALIGN {
|
||||
//__m256i vi;
|
||||
__m128 vf ;
|
||||
__m128i vi ;
|
||||
__m128d vd;
|
||||
|
||||
uint8_t b[16];
|
||||
//uint16_t hw[8] ;
|
||||
uint32_t w[4] ;
|
||||
uint64_t dw[2] ;
|
||||
|
||||
float f[4] ;
|
||||
//double d[2] ;
|
||||
|
||||
} v128 ;
|
||||
|
||||
typedef union ALIGN {
|
||||
__m256i vi;
|
||||
__m256 vf ;
|
||||
__m256d vd;
|
||||
//__m128i vi128[2] ;
|
||||
|
||||
uint8_t b[32];
|
||||
//uint16_t hw[16] ;
|
||||
uint32_t w[8] ;
|
||||
uint64_t dw[4] ;
|
||||
|
||||
float f[8] ;
|
||||
double d[4] ;
|
||||
|
||||
} v256 ;
|
||||
|
||||
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
#define VEC_ENTRY_CNT 8
|
||||
#define VEC_LEN 256
|
||||
#define VTYPE vf
|
||||
#define VTYPEI vi
|
||||
#define VENTRY f
|
||||
#define VENTRYI w
|
||||
#define MTYPE uint32_t
|
||||
#define MASK_ALL_ONES 0xFFFFFFFF
|
||||
|
||||
|
||||
#define VECTOR v256
|
||||
#define VECTOR_SSE v128
|
||||
|
||||
#define SET_VEC_ZERO(__vec) \
|
||||
__vec= _mm256_setzero_ps()
|
||||
|
||||
#define SET_VEC_ONES(__vec) \
|
||||
__vec = _mm256_set1_epi32(0xFFFFFFFF)
|
||||
|
||||
#define VEC_OR(__v1, __v2) \
|
||||
_mm256_or_ps(__v1, __v2)
|
||||
|
||||
#define VEC_ADD(__v1, __v2) \
|
||||
_mm256_add_ps(__v1, __v2)
|
||||
|
||||
#define VEC_MUL(__v1, __v2) \
|
||||
_mm256_mul_ps(__v1, __v2)
|
||||
|
||||
#define VEC_BLENDV(__v1, __v2, __maskV) \
|
||||
_mm256_blendv_ps(__v1, __v2, __maskV)
|
||||
|
||||
#define VEC_SSE_TO_AVX(__vsLow, __vsHigh, __vdst) \
|
||||
__vdst = _mm256_castps128_ps256(__vsLow) ; \
|
||||
__vdst = _mm256_insertf128_ps(__vdst, __vsHigh, 1) ;
|
||||
|
||||
#define VECS_SHIFT_LEFT_1BIT(__vs) \
|
||||
__vs = _mm_slli_epi32(__vs, 1)
|
||||
|
||||
|
||||
#define VECS_SHIFT_RIGHT_WHOLE(__vs, __cnt) { \
|
||||
uint64_t __mask = ; \
|
||||
uint64_t __shiftWord = ((((uint64_t) 0x1) << __cnt) - 1 ) & __vs.dw[1] ; \
|
||||
__shiftWord <<= (64-cnt) ; \
|
||||
__vs = _mm_slri_epi64(__vs, __cnt) ; \
|
||||
__vs.dw[0] |= __shiftWord ; \
|
||||
}
|
||||
|
||||
|
||||
#define GET_MASK_WORD_OLD(__mask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MTYPE __nextShiftOut = (__mask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__mask >>= __shiftBy ; \
|
||||
__mask |= __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MTYPE __bitMask = (((MTYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MTYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
__dstMask = (__srcMask >> __shiftBy) | __lastShiftOut ; \
|
||||
__lastShiftOut = __nextShiftOut ; \
|
||||
}
|
||||
|
||||
|
||||
void precompute_masks_avx(const testcase& tc, int COLS, int numMaskVecs,
|
||||
MTYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
|
||||
const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ;
|
||||
|
||||
for (int vi=0; vi < numMaskVecs; ++vi) {
|
||||
for (int rs=0; rs < NUM_DISTINCT_CHARS; ++rs) {
|
||||
maskArr[vi][rs] = 0 ;
|
||||
}
|
||||
maskArr[vi][AMBIG_CHAR] = MASK_ALL_ONES ;
|
||||
}
|
||||
|
||||
for (int col=1; col < COLS; ++col) {
|
||||
int mIndex = (col-1) / maskBitCnt ;
|
||||
int mOffset = (col-1) % maskBitCnt ;
|
||||
MTYPE bitMask = ((MTYPE)0x1) << (maskBitCnt-1-mOffset) ;
|
||||
|
||||
char hapChar = tc.hap[col-1] ;
|
||||
|
||||
if (hapChar == AMBIG_CHAR) {
|
||||
maskArr[mIndex][0] |= bitMask ;
|
||||
maskArr[mIndex][1] |= bitMask ;
|
||||
maskArr[mIndex][2] |= bitMask ;
|
||||
maskArr[mIndex][3] |= bitMask ;
|
||||
}
|
||||
|
||||
//cout << hapChar << " " << mIndex << " " << getBinaryStr<MTYPE>(bitMask, 32)
|
||||
// << endl ;
|
||||
//cout << getBinaryStr<MTYPE>(maskArr[0][hapChar],32) << endl ;
|
||||
//exit(0) ;
|
||||
|
||||
maskArr[mIndex][ConvertChar::get(hapChar)] |= bitMask ;
|
||||
|
||||
|
||||
// bit corresponding to col 1 will be the MSB of the mask 0
|
||||
// bit corresponding to col 2 will be the MSB-1 of the mask 0
|
||||
// ...
|
||||
// bit corresponding to col 32 will be the LSB of the mask 0
|
||||
// bit corresponding to col 33 will be the MSB of the mask 1
|
||||
// ...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
void test_mask_computations (testcase& tc, int tcID, bool printDebug=false) {
|
||||
|
||||
int ROWS = tc.rslen + 1 ;
|
||||
int COLS = tc.haplen + 1 ;
|
||||
|
||||
// only for testing
|
||||
VECTOR mismatchData, matchData ;
|
||||
//SET_VEC_ZERO(mismatchData.VTYPE) ;
|
||||
//SET_VEC_ONES(matchData.VTYPEI) ;
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT; ++ei) {
|
||||
matchData.VENTRY[ei] = 1.0 ;
|
||||
mismatchData.VENTRY[ei] = 0.0 ;
|
||||
}
|
||||
|
||||
const int maskBitCnt = VEC_LEN / VEC_ENTRY_CNT ;
|
||||
const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function
|
||||
|
||||
MTYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ;
|
||||
precompute_masks_avx(tc, COLS, numMaskVecs, maskArr) ;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug) {
|
||||
cout << "The first 32 hap chars are: " ;
|
||||
for (int i=0; i < 32; ++i) {
|
||||
cout << tc.hap[i] ;
|
||||
}
|
||||
cout << endl ;
|
||||
|
||||
cout << "Masks computed for A, C, T, G, N are: " << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][0], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][1], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][2], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][3], 32) << endl ;
|
||||
cout << getBinaryStr<MTYPE>(maskArr[0][4], 32) << endl ;
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
int beginRowIndex = 1 ;
|
||||
while (beginRowIndex < ROWS) {
|
||||
|
||||
int numRowsToProcess = min(VEC_ENTRY_CNT, ROWS - beginRowIndex) ;
|
||||
|
||||
char rsArr[VEC_ENTRY_CNT] ;
|
||||
for (int ri=0; ri < numRowsToProcess; ++ri) {
|
||||
rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ;
|
||||
}
|
||||
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
VECTOR_SSE currMaskVecLow ; // corresponding to entries 0-3
|
||||
VECTOR_SSE currMaskVecHigh ; // corresponding to entries 4-7
|
||||
|
||||
MTYPE lastMaskShiftOut[VEC_ENTRY_CNT] ;
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT; ++ei)
|
||||
lastMaskShiftOut[ei] = 0 ;
|
||||
|
||||
int col = 1 ;
|
||||
int diag = 1 ;
|
||||
for (int maskIndex=0; maskIndex < numMaskVecs; ++maskIndex) {
|
||||
// set up the mask vectors for the next maskBitCnt columns
|
||||
|
||||
// For AVX, maskBitCnt = 32 (so, the operation below is amortized over 32 cols)
|
||||
for (int ei=0; ei < VEC_ENTRY_CNT/2; ++ei) {
|
||||
SET_MASK_WORD(currMaskVecLow.VENTRYI[ei], maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + VEC_ENTRY_CNT/2 ; // the second entry index
|
||||
SET_MASK_WORD(currMaskVecHigh.VENTRYI[ei], maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug && maskIndex == 0) {
|
||||
cout << "The masks for entry 1: " << endl
|
||||
<< getBinaryStr<MTYPE>(maskArr[0][rsArr[1]], 32) << endl
|
||||
<< getBinaryStr<MTYPE>(currMaskVecLow.VENTRYI[1], 32) << endl ;
|
||||
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
// iterate over mask bit indices and columns
|
||||
for (int mbi=0; mbi < maskBitCnt && diag < COLS + ROWS -2; ++mbi, ++diag) {
|
||||
|
||||
VECTOR maskV ;
|
||||
VEC_SSE_TO_AVX(currMaskVecLow.VTYPE, currMaskVecHigh.VTYPE, maskV.VTYPE) ;
|
||||
|
||||
VECTOR testData ;
|
||||
testData.VTYPE = VEC_BLENDV(mismatchData.VTYPE, matchData.VTYPE,
|
||||
maskV.VTYPE) ;
|
||||
|
||||
VECS_SHIFT_LEFT_1BIT(currMaskVecLow.VTYPEI) ;
|
||||
VECS_SHIFT_LEFT_1BIT(currMaskVecHigh.VTYPEI) ;
|
||||
|
||||
#ifdef DEBUG
|
||||
if (printDebug && maskIndex == 0) {
|
||||
cout << "The mask for entry 1, mbi=" << mbi << ": "
|
||||
<< getBinaryStr<MTYPE>(maskV.VENTRYI[1], 32) << endl ;
|
||||
|
||||
}
|
||||
#endif // #ifdef DEBUG
|
||||
|
||||
#ifdef CHECK_MASK_CORRECTNESS
|
||||
|
||||
int firstRowIndex = (diag < COLS) ? 0 : (diag - COLS) ;
|
||||
int lastRowIndex = min(col-1, numRowsToProcess-1) ;
|
||||
|
||||
for (int ri=firstRowIndex; ri <= lastRowIndex; ++ri) {
|
||||
int currRow = beginRowIndex + ri ;
|
||||
int currCol = col - ri + firstRowIndex ;
|
||||
|
||||
char hapChar = tc.hap[currCol-1] ;
|
||||
char rsChar = tc.rs[currRow-1] ;
|
||||
|
||||
bool match = (hapChar == rsChar || hapChar == 'N' || rsChar == 'N') ;
|
||||
|
||||
if ((bool) testData.VENTRYI[ri] != match) {
|
||||
cout << "Error: Incorrect mask for tc " << tcID << ", diag = " << diag
|
||||
<< " (" << currRow << ", " << currCol << ")" << endl ;
|
||||
|
||||
cout << "The chars are: " << hapChar << " and " << rsChar << endl ;
|
||||
cout << "The selected value is: " << testData.VENTRYI[ri] << endl ;
|
||||
|
||||
exit(0) ;
|
||||
}
|
||||
}
|
||||
#endif // #ifdef CHECK_MASK_CORRECTNESS
|
||||
|
||||
if (diag < COLS)
|
||||
++col ;
|
||||
|
||||
} // mbi
|
||||
} // maskIndex
|
||||
|
||||
beginRowIndex += VEC_ENTRY_CNT ;
|
||||
} // end of stripe
|
||||
|
||||
//cout << "Finished validating entry " << endl ;
|
||||
}
|
||||
|
||||
#ifdef HMM_MASK_MAIN
|
||||
int main () {
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
|
||||
ConvertChar::init() ;
|
||||
|
||||
testcase* tcBatch = new testcase[BATCH_SIZE] ;
|
||||
|
||||
int numBatches = 0 ;
|
||||
int numRead = 0 ;
|
||||
|
||||
const int DEBUG_TC = -1 ;
|
||||
|
||||
FILE* ifp = stdin ;
|
||||
|
||||
do {
|
||||
numRead = 0 ;
|
||||
|
||||
while (numRead < BATCH_SIZE) {
|
||||
if (read_testcase(tcBatch+numRead, ifp) < 0)
|
||||
break ;
|
||||
|
||||
++numRead ;
|
||||
}
|
||||
|
||||
if (numRead == 0)
|
||||
break ;
|
||||
|
||||
for (int ti=0; ti < numRead; ++ti) {
|
||||
|
||||
int tcID = numBatches * BATCH_SIZE + ti ;
|
||||
test_mask_computations(tcBatch[ti], tcID, tcID == DEBUG_TC) ;
|
||||
}
|
||||
|
||||
++numBatches ;
|
||||
} while (numRead == BATCH_SIZE) ;
|
||||
|
||||
fclose(ifp) ;
|
||||
|
||||
delete[] tcBatch ;
|
||||
|
||||
return 0 ;
|
||||
}
|
||||
#endif
|
||||
Binary file not shown.
|
|
@ -5,20 +5,13 @@
|
|||
|
||||
#define ENABLE_ASSERTIONS 1
|
||||
#define DO_PROFILING 1
|
||||
#define DEBUG 1
|
||||
//#define DEBUG 1
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
|
||||
|
||||
#include "define-float.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
class LoadTimeInitializer
|
||||
|
|
@ -33,32 +26,18 @@ class LoadTimeInitializer
|
|||
m_sumSquareNumHaplotypes = 0;
|
||||
m_sumNumTestcases = 0;
|
||||
m_sumSquareNumTestcases = 0;
|
||||
m_maxNumTestcases = 0;
|
||||
m_num_invocations = 0;
|
||||
m_filename_to_fptr.clear();
|
||||
|
||||
if(is_avx_supported())
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Using un-vectorized C++ implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
}
|
||||
initialize_function_pointers();
|
||||
cout.flush();
|
||||
//if(is_sse42_supported())
|
||||
//{
|
||||
//g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
//g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
//}
|
||||
}
|
||||
void print_profiling()
|
||||
{
|
||||
double mean_val;
|
||||
cout <<"Invocations : "<<m_num_invocations<<"\n";
|
||||
cout << "term\tsum\tsumSq\tmean\tvar\n";
|
||||
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
|
||||
mean_val = m_sumNumReads/m_num_invocations;
|
||||
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
|
|
@ -67,14 +46,39 @@ class LoadTimeInitializer
|
|||
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
mean_val = m_sumNumTestcases/m_num_invocations;
|
||||
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\t"<<m_maxNumTestcases<<"\n";
|
||||
cout.flush();
|
||||
}
|
||||
~LoadTimeInitializer()
|
||||
|
||||
void debug_dump(string filename, string s, bool to_append, bool add_newline=true)
|
||||
{
|
||||
#ifdef DO_PROFILING
|
||||
print_profiling();
|
||||
#endif
|
||||
map<string, ofstream*>::iterator mI = m_filename_to_fptr.find(filename);
|
||||
ofstream* fptr = 0;
|
||||
if(mI == m_filename_to_fptr.end())
|
||||
{
|
||||
m_filename_to_fptr[filename] = new ofstream();
|
||||
fptr = m_filename_to_fptr[filename];
|
||||
fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
|
||||
assert(fptr->is_open());
|
||||
}
|
||||
else
|
||||
fptr = (*mI).second;
|
||||
//ofstream fptr;
|
||||
//fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
(*fptr) << s;
|
||||
if(add_newline)
|
||||
(*fptr) << "\n";
|
||||
//fptr.close();
|
||||
}
|
||||
void debug_close()
|
||||
{
|
||||
for(map<string,ofstream*>::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end();
|
||||
mB != mE;mB++)
|
||||
{
|
||||
(*mB).second->close();
|
||||
delete (*mB).second;
|
||||
}
|
||||
m_filename_to_fptr.clear();
|
||||
}
|
||||
jfieldID m_readBasesFID;
|
||||
jfieldID m_readQualsFID;
|
||||
|
|
@ -89,8 +93,10 @@ class LoadTimeInitializer
|
|||
double m_sumSquareNumHaplotypes;
|
||||
double m_sumNumTestcases;
|
||||
double m_sumSquareNumTestcases;
|
||||
unsigned m_maxNumTestcases;
|
||||
unsigned m_num_invocations;
|
||||
|
||||
private:
|
||||
map<string, ofstream*> m_filename_to_fptr;
|
||||
};
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
|
@ -174,10 +180,10 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadL
|
|||
tc.c[i] = (int)overallGCPArray[i];
|
||||
}
|
||||
|
||||
double result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc);
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
double result = log10(result_avxd) - log10(ldexp(1.0, 1020));
|
||||
#ifdef DEBUG
|
||||
debug_dump("return_values_jni.txt",to_string(result),true);
|
||||
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(result),true);
|
||||
#endif
|
||||
|
||||
|
||||
|
|
@ -255,7 +261,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
haplotypeBasesArrayVector[j] = make_pair(haplotypeBases, haplotypeBasesArray);
|
||||
#ifdef DEBUG3
|
||||
for(unsigned k=0;k<env->GetArrayLength(haplotypeBases);++k)
|
||||
debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
|
||||
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
@ -310,11 +316,11 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
#ifdef DEBUG3
|
||||
for(unsigned j=0;j<readLength;++j)
|
||||
{
|
||||
debug_dump("reads_jni.txt",to_string((int)readBasesArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)readQualsArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)insertionGOPArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)deletionGOPArray[j]),true);
|
||||
debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)readBasesArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)readQualsArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)insertionGOPArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)deletionGOPArray[j]),true);
|
||||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
@ -370,7 +376,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
#ifdef DEBUG
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
|
||||
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
|
||||
}
|
||||
#endif
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0);
|
||||
|
|
@ -396,7 +402,23 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
g_load_time_initializer.m_sumSquareNumHaplotypes += numHaplotypes*numHaplotypes;
|
||||
g_load_time_initializer.m_sumNumTestcases += numTestCases;
|
||||
g_load_time_initializer.m_sumSquareNumTestcases += numTestCases*numTestCases;
|
||||
g_load_time_initializer.m_maxNumTestcases = numTestCases > g_load_time_initializer.m_maxNumTestcases ? numTestCases
|
||||
: g_load_time_initializer.m_maxNumTestcases;
|
||||
++(g_load_time_initializer.m_num_invocations);
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
}
|
||||
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose
|
||||
(JNIEnv* env, jobject thisObject)
|
||||
{
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.print_profiling();
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -70,6 +70,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
|
||||
|
||||
/*
|
||||
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
|
||||
* Method: jniClose
|
||||
* Signature: ()V
|
||||
*/
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose
|
||||
(JNIEnv *, jobject);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -5,12 +5,6 @@
|
|||
#include "template.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include "define-float.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
using namespace std;
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
|
|
@ -36,16 +30,7 @@ int main(int argc, char** argv)
|
|||
if(argc >= 3 && string(argv[2]) == "1")
|
||||
use_old_read_testcase = true;
|
||||
|
||||
if(true)
|
||||
{
|
||||
g_compute_full_prob_double = GEN_INTRINSIC(compute_full_prob_avx, d)<double>;
|
||||
g_compute_full_prob_float = GEN_INTRINSIC(compute_full_prob_avx, s)<float>;
|
||||
}
|
||||
else
|
||||
{
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
}
|
||||
initialize_function_pointers();
|
||||
|
||||
std::ifstream ifptr;
|
||||
FILE* fptr = 0;
|
||||
|
|
@ -86,70 +71,5 @@ int main(int argc, char** argv)
|
|||
else
|
||||
ifptr.close();
|
||||
return 0;
|
||||
|
||||
#if 0
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
struct timeval start, end;
|
||||
long long aggregateTimeRead = 0L;
|
||||
long long aggregateTimeCompute = 0L;
|
||||
long long aggregateTimeWrite = 0L;
|
||||
|
||||
bool noMoreData = false;
|
||||
int count =0;
|
||||
while (!noMoreData)
|
||||
{
|
||||
int read_count = BATCH_SIZE;
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<BATCH_SIZE;b++)
|
||||
if (read_testcase(&tc[b], NULL)==-1)
|
||||
{
|
||||
read_count = b;
|
||||
noMoreData = true;
|
||||
break;
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeRead += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<read_count;b++)
|
||||
{
|
||||
result_avxf = compute_full_prob<float>(&tc[b]);
|
||||
|
||||
#ifdef RUN_HYBRID
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
count++;
|
||||
result_avxd = compute_full_prob<double>(&tc[b]);
|
||||
result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f));
|
||||
}
|
||||
else
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
#ifndef RUN_HYBRID
|
||||
result[b] = log10f(result_avxf) - log10f(ldexpf(1.f, 120.f));
|
||||
#endif
|
||||
|
||||
}
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeCompute += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
gettimeofday(&start, NULL);
|
||||
for (int b=0;b<read_count;b++)
|
||||
printf("%E\n", result[b]);
|
||||
gettimeofday(&end, NULL);
|
||||
aggregateTimeWrite += ((end.tv_sec * 1000000 + end.tv_usec) - (start.tv_sec * 1000000 + start.tv_usec));
|
||||
|
||||
}
|
||||
|
||||
printf("AVX Read Time: %ld\n", aggregateTimeRead);
|
||||
printf("AVX Compute Time: %ld\n", aggregateTimeCompute);
|
||||
printf("AVX Write Time: %ld\n", aggregateTimeWrite);
|
||||
printf("AVX Total Time: %ld\n", aggregateTimeRead + aggregateTimeCompute + aggregateTimeWrite);
|
||||
printf("# Double called: %d\n", count);
|
||||
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,6 +4,10 @@
|
|||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
//#define DEBUG
|
||||
#define MUSTAFA
|
||||
#define KARTHIK
|
||||
|
||||
/*
|
||||
template <class T>
|
||||
string getBinaryStr (T val, int numBitsToWrite) {
|
||||
|
|
@ -17,8 +21,9 @@ string getBinaryStr (T val, int numBitsToWrite) {
|
|||
return oss.str() ;
|
||||
}
|
||||
*/
|
||||
#ifdef MUSTAFA
|
||||
|
||||
void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]) {
|
||||
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
|
||||
|
|
@ -52,7 +57,7 @@ void GEN_INTRINSIC(precompute_masks_, PRECISION)(const testcase& tc, int COLS, i
|
|||
|
||||
}
|
||||
|
||||
void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) {
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess) {
|
||||
|
||||
for (int ri=0; ri < numRowsToProcess; ++ri) {
|
||||
rsArr[ri] = ConvertChar::get(tc.rs[ri+beginRowIndex-1]) ;
|
||||
|
|
@ -63,6 +68,7 @@ void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsA
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
#define SET_MASK_WORD(__dstMask, __srcMask, __lastShiftOut, __shiftBy, __maskBitCnt){ \
|
||||
MASK_TYPE __bitMask = (((MASK_TYPE)0x1) << __shiftBy) - 1 ; \
|
||||
MASK_TYPE __nextShiftOut = (__srcMask & __bitMask) << (__maskBitCnt - __shiftBy) ; \
|
||||
|
|
@ -71,33 +77,43 @@ void GEN_INTRINSIC(init_masks_for_row_, PRECISION)(const testcase& tc, char* rsA
|
|||
}
|
||||
|
||||
|
||||
void GEN_INTRINSIC(update_masks_for_cols_, PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt) {
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_, SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt) {
|
||||
|
||||
for (int ei=0; ei < AVX_LENGTH/2; ++ei) {
|
||||
SET_MASK_WORD(currMaskVecLow.masks[ei], maskArr[maskIndex][rsArr[ei]],
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
lastMaskShiftOut[ei], ei, maskBitCnt) ;
|
||||
|
||||
int ei2 = ei + AVX_LENGTH/2 ; // the second entry index
|
||||
SET_MASK_WORD(currMaskVecHigh.masks[ei], maskArr[maskIndex][rsArr[ei2]],
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
lastMaskShiftOut[ei2], ei2, maskBitCnt) ;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
//void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen, const _256_TYPE& distmSel, int firstRowIndex, int lastRowIndex) {
|
||||
|
||||
inline void GEN_INTRINSIC(computeDistVec, PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen) {
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec, SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen) {
|
||||
//#define computeDistVec() {
|
||||
|
||||
_256_TYPE maskV ;
|
||||
VEC_SSE_TO_AVX(currMaskVecLow.vecf, currMaskVecHigh.vecf, maskV) ;
|
||||
|
||||
#ifdef DEBUGG
|
||||
long long *temp1 = (long long *)(&maskV);
|
||||
double *temp2 = (double *)(&distm);
|
||||
double *temp3 = (double *)(&_1_distm);
|
||||
printf("***\n%lx\n%lx\n%f\n%f\n%f\n%f\n***\n", temp1[0], temp1[1], temp2[0], temp2[1], temp3[0], temp3[1]);
|
||||
#endif
|
||||
|
||||
distmChosen = VEC_BLENDV(distm, _1_distm, maskV) ;
|
||||
|
||||
/*COMPARE_VECS(distmChosen, distmSel, firstRowIndex, lastRowIndex) ;*/
|
||||
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecLow.vec) ;
|
||||
VEC_SHIFT_LEFT_1BIT(currMaskVecHigh.vec) ;
|
||||
|
||||
_mm_empty();
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -120,8 +136,9 @@ struct HmmData {
|
|||
|
||||
} ;
|
||||
*/
|
||||
#endif // MUSTAFA
|
||||
|
||||
template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D)
|
||||
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors, SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D)
|
||||
{
|
||||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
|
|
@ -157,18 +174,14 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
|
|||
*(ptr_p_GAPM+r-1) = ctx._(1.0) - ctx.ph2pr[_c];
|
||||
*(ptr_p_MX+r-1) = ctx.ph2pr[_i];
|
||||
*(ptr_p_XX+r-1) = ctx.ph2pr[_c];
|
||||
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
|
||||
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
|
||||
#ifdef DEBUG3
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MM+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_GAPM+r-1)),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MX+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_XX+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_MY+r-1) ),true);
|
||||
debug_dump("transitions_jni.txt",to_string(*(ptr_p_YY+r-1) ),true);
|
||||
#endif
|
||||
//*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
//*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
#ifdef KARTHIK
|
||||
*(ptr_p_MY+r-1) = ctx.ph2pr[_d];
|
||||
*(ptr_p_YY+r-1) = ctx.ph2pr[_c];
|
||||
#else
|
||||
*(ptr_p_MY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_d];
|
||||
*(ptr_p_YY+r-1) = (r == ROWS - 1) ? ctx._(1.0) : ctx.ph2pr[_c];
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
NUMBER *ptr_distm1D = (NUMBER *)distm1D;
|
||||
|
|
@ -176,14 +189,11 @@ template<class NUMBER> void GEN_INTRINSIC(initializeVectors, PRECISION)(int ROWS
|
|||
{
|
||||
int _q = tc->q[r-1] & 127;
|
||||
ptr_distm1D[r-1] = ctx.ph2pr[_q];
|
||||
#ifdef DEBUG3
|
||||
debug_dump("priors_jni.txt",to_string(ptr_distm1D[r-1]),true);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(
|
||||
template<class NUMBER> inline void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION, SIMD_TYPE), PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
|
|
@ -200,19 +210,18 @@ template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)
|
|||
NUMBER zero = ctx._(0.0);
|
||||
NUMBER init_Y = ctx.INITIAL_CONSTANT / (tc->haplen);
|
||||
UNION_TYPE packed1; packed1.d = VEC_SET1_VAL(1.0);
|
||||
#define TRISTATE_CORRECTION_FACTOR 3.0
|
||||
UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(TRISTATE_CORRECTION_FACTOR);
|
||||
|
||||
UNION_TYPE packed3; packed3.d = VEC_SET1_VAL(3.0);
|
||||
/* compare rs and N */
|
||||
//rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH));
|
||||
//rsN.d = VEC_CMP_EQ(N_packed256, rs);
|
||||
|
||||
#ifndef MUSTAFA
|
||||
rs = VEC_LDPOPCVT_CHAR((tc->irs+i*AVX_LENGTH));
|
||||
rsN.d = VEC_CMP_EQ(N_packed256, rs);
|
||||
#endif
|
||||
distm = distm1D[i];
|
||||
_1_distm = VEC_SUB(packed1.d, distm);
|
||||
#ifndef DO_NOT_USE_TRISTATE_CORRECTION
|
||||
distm = VEC_DIV(distm, packed3.d);
|
||||
#endif
|
||||
|
||||
_1_distm = VEC_SUB(packed1.d, distm);
|
||||
|
||||
#ifdef KARTHIK
|
||||
distm = VEC_DIV(distm, packed3.d);
|
||||
#endif
|
||||
/* initialize M_t_2, M_t_1, X_t_2, X_t_1, Y_t_2, Y_t_1 */
|
||||
M_t_2.d = VEC_SET1_VAL(zero);
|
||||
X_t_2.d = VEC_SET1_VAL(zero);
|
||||
|
|
@ -234,7 +243,7 @@ template<class NUMBER> inline void GEN_INTRINSIC(stripINITIALIZATION, PRECISION)
|
|||
|
||||
|
||||
|
||||
inline _256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
inline _256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM, SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm)
|
||||
{
|
||||
UNION_TYPE hapN, rshap;
|
||||
|
|
@ -263,12 +272,21 @@ inline _256_TYPE GEN_INTRINSIC(computeDISTM, PRECISION)(int d, int COLS, testcas
|
|||
}
|
||||
|
||||
|
||||
inline void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY, SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel)
|
||||
{
|
||||
/* Compute M_t <= distm * (p_MM*M_t_2 + p_GAPM*X_t_2 + p_GAPM*Y_t_2) */
|
||||
M_t.d = VEC_MUL(VEC_ADD(VEC_ADD(VEC_MUL(M_t_2.d, pMM), VEC_MUL(X_t_2.d, pGAPM)), VEC_MUL(Y_t_2.d, pGAPM)), distmSel);
|
||||
|
||||
#ifdef DEBUG
|
||||
double *temp1 = (double *)(&pGAPM);
|
||||
double *temp2 = (double *)(&pMM);
|
||||
double *temp3 = (double *)(&distmSel);
|
||||
printf("%f\n%f\n%f\n%f\n%f\n%f\n", temp1[0], temp1[1], temp2[0], temp2[1], temp3[0], temp3[1]);
|
||||
//printf("%f\n%f\n%f\n%f\n", X_t_2.f[0], X_t_2.f[1], Y_t_2.f[0], Y_t_2.f[1]);
|
||||
printf("%f\n%f\n----------------------------------------------------------------------------\n", M_t.f[0], M_t.f[1]);
|
||||
#endif
|
||||
M_t_y = M_t;
|
||||
|
||||
/* Compute X_t */
|
||||
|
|
@ -278,7 +296,7 @@ inline void GEN_INTRINSIC(computeMXY, PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_
|
|||
Y_t.d = VEC_ADD(VEC_MUL(M_t_1_y.d, pMY) , VEC_MUL(Y_t_1.d, pYY));
|
||||
}
|
||||
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (testcase *tc, NUMBER *before_last_log = NULL)
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL)
|
||||
{
|
||||
_256_TYPE p_MM [MAVX_COUNT], p_GAPM [MAVX_COUNT], p_MX [MAVX_COUNT];
|
||||
_256_TYPE p_XX [MAVX_COUNT], p_MY [MAVX_COUNT], p_YY [MAVX_COUNT];
|
||||
|
|
@ -306,54 +324,51 @@ template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t
|
|||
int remainingRows = (ROWS-1) % AVX_LENGTH;
|
||||
int strip_cnt = ((ROWS-1) / AVX_LENGTH) + (remainingRows!=0);
|
||||
|
||||
#ifdef MUSTAFA
|
||||
const int maskBitCnt = MAIN_TYPE_SIZE ;
|
||||
const int numMaskVecs = (COLS+ROWS+maskBitCnt-1)/maskBitCnt ; // ceil function
|
||||
|
||||
MASK_TYPE maskArr[numMaskVecs][NUM_DISTINCT_CHARS] ;
|
||||
GEN_INTRINSIC(precompute_masks_, PRECISION)(*tc, COLS, numMaskVecs, maskArr) ;
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(*tc, COLS, numMaskVecs, maskArr) ;
|
||||
|
||||
char rsArr[AVX_LENGTH] ;
|
||||
MASK_TYPE lastMaskShiftOut[AVX_LENGTH] ;
|
||||
|
||||
GEN_INTRINSIC(initializeVectors, PRECISION)<NUMBER>(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY,
|
||||
#endif
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)<NUMBER>(ROWS, COLS, shiftOutM, shiftOutX, shiftOutY,
|
||||
ctx, tc, p_MM, p_GAPM, p_MX, p_XX, p_MY, p_YY, distm1D);
|
||||
|
||||
//for (int __ii=0; __ii < 10; ++__ii)
|
||||
for (int i=0;i<strip_cnt-1;i++)
|
||||
{
|
||||
//STRIP_INITIALIZATION
|
||||
GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
|
||||
GEN_INTRINSIC(init_masks_for_row_, PRECISION)(*tc, rsArr, lastMaskShiftOut,
|
||||
i*AVX_LENGTH+1, AVX_LENGTH) ;
|
||||
#ifdef MUSTAFA
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, AVX_LENGTH) ;
|
||||
#endif
|
||||
// Since there are no shift intrinsics in AVX, keep the masks in 2 SSE vectors
|
||||
MASK_VEC currMaskVecLow ; // corresponding to lower half
|
||||
MASK_VEC currMaskVecHigh ; // corresponding to upper half
|
||||
|
||||
for (int d=1;d<COLS+AVX_LENGTH;d++)
|
||||
{
|
||||
#ifdef MUSTAFA
|
||||
if (d % MAIN_TYPE_SIZE == 1)
|
||||
GEN_INTRINSIC(update_masks_for_cols_, PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
#else
|
||||
distmChosen = GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
#endif
|
||||
int ShiftIdx = d+AVX_LENGTH;
|
||||
//distmSel = GEN_INTRINSIC(computeDISTM, PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
|
||||
//int firstRowIndex = (d < COLS) ? 0 : (d-COLS+1) ;
|
||||
//int lastRowIndex = std::min(d-1, AVX_LENGTH-1) ;
|
||||
//GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen, distmSel, firstRowIndex, lastRowIndex) ;
|
||||
|
||||
GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
|
||||
|
||||
GEN_INTRINSIC(computeMXY, PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(M_t, shiftOutM[ShiftIdx], shiftOutM[d]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift, SIMD_TYPE), PRECISION)(M_t, shiftOutM[ShiftIdx], shiftOutM[d]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(X_t, shiftOutX[ShiftIdx], shiftOutX[d]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift, SIMD_TYPE), PRECISION)(X_t, shiftOutX[ShiftIdx], shiftOutX[d]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift, PRECISION)(Y_t_1, shiftOutY[ShiftIdx], shiftOutY[d]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift, SIMD_TYPE), PRECISION)(Y_t_1, shiftOutY[ShiftIdx], shiftOutY[d]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
|
|
@ -364,14 +379,13 @@ template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t
|
|||
int i = strip_cnt-1;
|
||||
{
|
||||
//STRIP_INITIALIZATION
|
||||
GEN_INTRINSIC(stripINITIALIZATION, PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(i, ctx, tc, pGAPM, pMM, pMX, pXX, pMY, pYY, rs.d, rsN, distm, _1_distm, distm1D, N_packed256, p_MM , p_GAPM ,
|
||||
p_MX, p_XX , p_MY, p_YY, M_t_2, X_t_2, M_t_1, X_t_1, Y_t_2, Y_t_1, M_t_1_y, shiftOutX, shiftOutM);
|
||||
|
||||
if (remainingRows==0) remainingRows=AVX_LENGTH;
|
||||
|
||||
GEN_INTRINSIC(init_masks_for_row_, PRECISION)(*tc, rsArr, lastMaskShiftOut,
|
||||
i*AVX_LENGTH+1, remainingRows) ;
|
||||
|
||||
#ifdef MUSTAFA
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(*tc, rsArr, lastMaskShiftOut, i*AVX_LENGTH+1, remainingRows) ;
|
||||
#endif
|
||||
_256_TYPE sumM, sumX;
|
||||
sumM = VEC_SET1_VAL(zero);
|
||||
sumX = VEC_SET1_VAL(zero);
|
||||
|
|
@ -382,24 +396,25 @@ template<class NUMBER> NUMBER GEN_INTRINSIC(compute_full_prob_avx, PRECISION) (t
|
|||
|
||||
for (int d=1;d<COLS+remainingRows-1;d++)
|
||||
{
|
||||
|
||||
#ifdef MUSTAFA
|
||||
if (d % MAIN_TYPE_SIZE == 1)
|
||||
GEN_INTRINSIC(update_masks_for_cols_, PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_, SIMD_TYPE),PRECISION)((d-1)/MAIN_TYPE_SIZE, currMaskVecLow, currMaskVecHigh, maskArr, rsArr, lastMaskShiftOut, maskBitCnt) ;
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec, SIMD_TYPE), PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
#else
|
||||
distmChosen = GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
#endif
|
||||
int ShiftIdx = d+AVX_LENGTH;
|
||||
//distmSel = GEN_INTRINSIC(computeDISTM, PRECISION)(d, COLS, tc, hap, rs.d, rsN, N_packed256, distm, _1_distm);
|
||||
GEN_INTRINSIC(computeDistVec, PRECISION) (currMaskVecLow, currMaskVecHigh, distm, _1_distm, distmChosen) ;
|
||||
|
||||
GEN_INTRINSIC(computeMXY, PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(computeMXY, SIMD_TYPE), PRECISION)(M_t, X_t, Y_t, M_t_y, M_t_2, X_t_2, Y_t_2, M_t_1, X_t_1, M_t_1_y, Y_t_1,
|
||||
pMM, pGAPM, pMX, pXX, pMY, pYY, distmChosen);
|
||||
|
||||
sumM = VEC_ADD(sumM, M_t.d);
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(M_t, shiftOutM[ShiftIdx]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION)(M_t, shiftOutM[ShiftIdx]);
|
||||
|
||||
sumX = VEC_ADD(sumX, X_t.d);
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(X_t, shiftOutX[ShiftIdx]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION)(X_t, shiftOutX[ShiftIdx]);
|
||||
|
||||
GEN_INTRINSIC(_vector_shift_last, PRECISION)(Y_t_1, shiftOutY[ShiftIdx]);
|
||||
GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION)(Y_t_1, shiftOutY[ShiftIdx]);
|
||||
|
||||
M_t_2 = M_t_1; M_t_1 = M_t; X_t_2 = X_t_1; X_t_1 = X_t;
|
||||
Y_t_2 = Y_t_1; Y_t_1 = Y_t; M_t_1_y = M_t_y;
|
||||
|
|
|
|||
|
|
@ -1,23 +1,12 @@
|
|||
#include <stdio.h>
|
||||
#include "headers.h"
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include <omp.h>
|
||||
|
||||
#include "template.h"
|
||||
#include "vector_defs.h"
|
||||
|
||||
#include "define-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "avx_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "avx_function_prototypes.h"
|
||||
#define BATCH_SIZE 10
|
||||
#define RUN_HYBRID
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
//#define RUN_HYBRID
|
||||
|
||||
//uint8_t ConvertChar::conversionTable[255] ;
|
||||
int thread_level_parallelism_enabled = false ;
|
||||
|
||||
double getCurrClk() {
|
||||
|
|
@ -26,11 +15,45 @@ double getCurrClk() {
|
|||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
void print128b_F(__m128 x)
|
||||
{
|
||||
float *p = (float *)(&x);
|
||||
for (int i=3;i>=0;i--)
|
||||
printf("%f ", *(p+i));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
void print128b_D(__m128d x)
|
||||
{
|
||||
double *p = (double *)(&x);
|
||||
for (int i=1;i>=0;i--)
|
||||
printf("%f ", *(p+i));
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
int main()
|
||||
{
|
||||
|
||||
testcase tc[BATCH_SIZE];
|
||||
/*
|
||||
IF_128f x;
|
||||
x.f = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
|
||||
IF_32 shiftIn, shiftOut;
|
||||
shiftIn.f = 5.0f;
|
||||
print128b_F(x.f);
|
||||
GEN_INTRINSIC(_vector_shift, s)(x, shiftIn, shiftOut);
|
||||
print128b_F(x.f);
|
||||
|
||||
IF_128d y;
|
||||
y.f = _mm_set_pd(10.0, 11.0);
|
||||
IF_64 shiftInd, shiftOutd;
|
||||
shiftInd.f = 12.0;
|
||||
print128b_D(y.f);
|
||||
GEN_INTRINSIC(_vector_shift, d)(y, shiftInd, shiftOutd);
|
||||
print128b_D(y.f);
|
||||
|
||||
exit(0);
|
||||
*/
|
||||
|
||||
testcase tc[BATCH_SIZE];
|
||||
float result[BATCH_SIZE], result_avxf;
|
||||
double result_avxd;
|
||||
//struct timeval start, end;
|
||||
|
|
@ -41,12 +64,12 @@ int main()
|
|||
|
||||
// Need to call it once to initialize the static array
|
||||
ConvertChar::init() ;
|
||||
|
||||
|
||||
|
||||
char* ompEnvVar = getenv("OMP_NUM_THREADS") ;
|
||||
if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) {
|
||||
thread_level_parallelism_enabled = true ;
|
||||
}
|
||||
// char* ompEnvVar = getenv("OMP_NUM_THREADS") ;
|
||||
// if (ompEnvVar != NULL && ompEnvVar != "" && ompEnvVar != "1" ) {
|
||||
// thread_level_parallelism_enabled = true ;
|
||||
// }
|
||||
|
||||
bool noMoreData = false;
|
||||
int count =0;
|
||||
|
|
@ -56,7 +79,7 @@ int main()
|
|||
|
||||
lastClk = getCurrClk() ;
|
||||
for (int b=0;b<BATCH_SIZE;b++)
|
||||
if (read_testcase(&tc[b], NULL)==-1)
|
||||
if (read_testcase(&tc[b])==-1)
|
||||
{
|
||||
read_count = b;
|
||||
noMoreData = true;
|
||||
|
|
@ -69,16 +92,17 @@ int main()
|
|||
//gettimeofday(&start, NULL);
|
||||
lastClk = getCurrClk() ;
|
||||
|
||||
#pragma omp parallel for schedule(dynamic) if(thread_level_parallelism_enabled)
|
||||
//#pragma omp parallel for schedule(dynamic) if(thread_level_parallelism_enabled)
|
||||
for (int b=0;b<read_count;b++)
|
||||
{
|
||||
result_avxf = GEN_INTRINSIC(compute_full_prob_avx, s)<float>(&tc[b]);
|
||||
result_avxf = GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_, SIMD_TYPE), s)<float>(&tc[b]);
|
||||
|
||||
#ifdef RUN_HYBRID
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
//printf("**************** RUNNING DOUBLE ******************\n");
|
||||
count++;
|
||||
result_avxd = GEN_INTRINSIC(compute_full_prob_avx, d)<double>(&tc[b]);
|
||||
result_avxd = GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_, SIMD_TYPE), d)<double>(&tc[b]);
|
||||
result[b] = log10(result_avxd) - log10(ldexp(1.0, 1020.f));
|
||||
}
|
||||
else
|
||||
|
|
|
|||
|
|
@ -2,18 +2,21 @@
|
|||
rm -f *.txt
|
||||
export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
|
||||
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
|
||||
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar ${GSA_ROOT_DIR}/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
-I /data/broad/samples/joint_variant_calling/NA12878_high_coverage_alignment/NA12878.mapped.ILLUMINA.bwa.CEU.high_coverage_pcr_free.20130906.bam \
|
||||
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
-stand_call_conf 50.0 \
|
||||
-stand_emit_conf 10.0 \
|
||||
--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
-XL unmapped \
|
||||
-o output.raw.snps.indels.vcf
|
||||
|
||||
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
#-I /data/simulated/sim1M_pairs_final.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_high_coverage_alignment/NA12878.mapped.ILLUMINA.bwa.CEU.high_coverage_pcr_free.20130906.bam \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly18.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/Homo_sapiens_assembly19.fasta \
|
||||
#-R /data/broad/samples/joint_variant_calling/broad_reference/ucsc.hg19.fasta \
|
||||
#-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
|
|
|
|||
|
|
@ -1,9 +1,8 @@
|
|||
#ifdef PRECISION
|
||||
|
||||
#include "template.h"
|
||||
#ifdef SIMD_TYPE_AVX
|
||||
|
||||
|
||||
inline void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
{
|
||||
|
||||
IF_128 xlow , xhigh;
|
||||
|
|
@ -33,7 +32,8 @@ inline void GEN_INTRINSIC(_vector_shift, PRECISION) (UNION_TYPE &x, MAIN_TYPE sh
|
|||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
{
|
||||
|
||||
IF_128 xlow , xhigh;
|
||||
|
|
@ -44,10 +44,6 @@ inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TY
|
|||
/* extract xlow[3] */
|
||||
IF_128 shiftOutL128;
|
||||
shiftOutL128.i = _mm_srli_si128(xlow.i, SHIFT_CONST1);
|
||||
/* extract xhigh[3] */
|
||||
// IF_MAIN_TYPE shiftOutH;
|
||||
// shiftOutH.i = VEC_EXTRACT_UNIT(xhigh.i, SHIFT_CONST2);
|
||||
// shiftOut = shiftOutH.f;
|
||||
/* shift xlow */
|
||||
xlow.i = _mm_slli_si128 (xlow.i, SHIFT_CONST3);
|
||||
/* shift xhigh */
|
||||
|
|
@ -63,6 +59,32 @@ inline void GEN_INTRINSIC(_vector_shift_last, PRECISION) (UNION_TYPE &x, MAIN_TY
|
|||
x.d = VEC_INSERT_VAL(x.d, xhigh.f, 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_TYPE_SSE
|
||||
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut)
|
||||
{
|
||||
IF_MAIN_TYPE tempIn, tempOut;
|
||||
tempIn.f = shiftIn;
|
||||
/* extratc H */
|
||||
tempOut.i = VEC_EXTRACT_UNIT(x.i, SHIFT_CONST1);
|
||||
shiftOut = tempOut.f;
|
||||
/* shift */
|
||||
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
||||
/* insert L */
|
||||
x.i = VEC_INSERT_UNIT(x.i , tempIn.i, SHIFT_CONST3);
|
||||
}
|
||||
|
||||
inline void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last, SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn)
|
||||
{
|
||||
IF_MAIN_TYPE temp; temp.f = shiftIn;
|
||||
/* shift */
|
||||
x.i = _mm_slli_si128(x.i, SHIFT_CONST2);
|
||||
/* insert L */
|
||||
x.i = VEC_INSERT_UNIT(x.i , temp.i, SHIFT_CONST3);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,18 @@
|
|||
#include "template.h"
|
||||
|
||||
#undef SIMD_TYPE
|
||||
#undef SIMD_TYPE_AVX
|
||||
|
||||
#define SIMD_TYPE sse
|
||||
#define SIMD_TYPE_SSE
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
#include "define-sse-double.h"
|
||||
#include "shift_template.c"
|
||||
#include "pairhmm-template-kernel.cc"
|
||||
|
||||
template double compute_full_prob_ssed<double>(testcase* tc, double* nextlog);
|
||||
template float compute_full_prob_sses<float>(testcase* tc, float* nextlog);
|
||||
|
|
@ -25,12 +25,25 @@ typedef union __attribute__((aligned(32))) {
|
|||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_F ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128 ALIGNED d;
|
||||
ALIGNED __m64 ALIGNED s[2];
|
||||
ALIGNED float ALIGNED f[4];
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
} ALIGNED mix_F128 ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128 vecf ;
|
||||
uint32_t masks[4] ;
|
||||
} MaskVec_F ;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m64 vec ;
|
||||
__m64 vecf ;
|
||||
uint32_t masks[2] ;
|
||||
} MaskVec_F128 ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
|
|
@ -50,12 +63,25 @@ typedef union __attribute__((aligned(32))) {
|
|||
ALIGNED __m256i ALIGNED i;
|
||||
} ALIGNED mix_D ALIGNED;
|
||||
|
||||
typedef union __attribute__((aligned(32))) {
|
||||
ALIGNED __m128d ALIGNED d;
|
||||
ALIGNED __m64 ALIGNED s[2];
|
||||
ALIGNED double ALIGNED f[2];
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
} ALIGNED mix_D128 ALIGNED;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m128i vec ;
|
||||
__m128d vecf ;
|
||||
uint64_t masks[2] ;
|
||||
} MaskVec_D ;
|
||||
|
||||
typedef union ALIGNED {
|
||||
__m64 vec ;
|
||||
__m64 vecf ;
|
||||
uint64_t masks[1] ;
|
||||
} MaskVec_D128 ;
|
||||
|
||||
typedef union ALIGNED
|
||||
{
|
||||
ALIGNED __m128i ALIGNED i;
|
||||
|
|
@ -120,7 +146,6 @@ struct Context<float>
|
|||
};
|
||||
|
||||
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int rslen, haplen;
|
||||
|
|
@ -131,24 +156,11 @@ typedef struct
|
|||
int *irs;
|
||||
} testcase;
|
||||
|
||||
|
||||
template<class T>
|
||||
std::string to_string(T obj)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::string ret_string;
|
||||
ss.clear();
|
||||
ss << std::scientific << obj;
|
||||
ss >> ret_string;
|
||||
ss.clear();
|
||||
return ret_string;
|
||||
}
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
|
||||
int normalize(char c);
|
||||
int read_testcase(testcase *tc, FILE* ifp);
|
||||
int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false);
|
||||
int read_testcase(testcase *tc, FILE* ifp=0);
|
||||
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
#define NUM_DISTINCT_CHARS 5
|
||||
#define AMBIG_CHAR 4
|
||||
|
||||
|
|
@ -176,7 +188,6 @@ public:
|
|||
};
|
||||
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +1,7 @@
|
|||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "vector_defs.h"
|
||||
|
||||
uint8_t ConvertChar::conversionTable[255];
|
||||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||
|
|
@ -31,6 +33,30 @@ bool is_sse42_supported()
|
|||
return ((ecx >> 20)&1) == 1;
|
||||
}
|
||||
|
||||
void initialize_function_pointers()
|
||||
{
|
||||
//if(is_avx_supported())
|
||||
if(false)
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
||||
}
|
||||
else
|
||||
if(is_sse42_supported())
|
||||
{
|
||||
cout << "Using SSE4.2 accelerated implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob_sses<float>;
|
||||
g_compute_full_prob_double = compute_full_prob_ssed<double>;
|
||||
}
|
||||
else
|
||||
{
|
||||
cout << "Using un-vectorized C++ implementation of PairHMM\n";
|
||||
g_compute_full_prob_float = compute_full_prob<float>;
|
||||
g_compute_full_prob_double = compute_full_prob<double>;
|
||||
}
|
||||
}
|
||||
|
||||
int normalize(char c)
|
||||
{
|
||||
return ((int) (c - 33));
|
||||
|
|
@ -214,12 +240,3 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|||
return tokens.size();
|
||||
}
|
||||
|
||||
void debug_dump(string filename, string s, bool to_append, bool add_newline)
|
||||
{
|
||||
ofstream fptr;
|
||||
fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
|
||||
fptr << s;
|
||||
if(add_newline)
|
||||
fptr << "\n";
|
||||
fptr.close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,22 @@
|
|||
#ifndef PAIRHMM_UTIL_H
|
||||
#define PAIRHMM_UTIL_H
|
||||
|
||||
#define MIN_ACCEPTED 1e-28f
|
||||
|
||||
template<class T>
|
||||
std::string to_string(T obj)
|
||||
{
|
||||
std::stringstream ss;
|
||||
std::string ret_string;
|
||||
ss.clear();
|
||||
ss << std::scientific << obj;
|
||||
ss >> ret_string;
|
||||
ss.clear();
|
||||
return ret_string;
|
||||
}
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
|
||||
int read_mod_testcase(std::ifstream& fptr, testcase* tc, bool reformat=false);
|
||||
|
||||
bool is_avx_supported();
|
||||
bool is_sse42_supported();
|
||||
extern float (*g_compute_full_prob_float)(testcase *tc, float *before_last_log);
|
||||
|
|
@ -9,5 +24,5 @@ extern double (*g_compute_full_prob_double)(testcase *tc, double* before_last_lo
|
|||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline);
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
|
||||
void initialize_function_pointers();
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,26 @@
|
|||
#undef SIMD_TYPE
|
||||
#undef SIMD_TYPE_AVX
|
||||
#undef SIMD_TYPE_SSE
|
||||
|
||||
#define SIMD_TYPE avx
|
||||
#define SIMD_TYPE_AVX
|
||||
|
||||
#include "define-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#undef SIMD_TYPE
|
||||
#undef SIMD_TYPE_AVX
|
||||
|
||||
#define SIMD_TYPE sse
|
||||
#define SIMD_TYPE_SSE
|
||||
|
||||
#include "define-sse-float.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
#include "define-sse-double.h"
|
||||
#include "vector_function_prototypes.h"
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,19 @@
|
|||
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn, MAIN_TYPE &shiftOut);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(_vector_shift_last,SIMD_TYPE), PRECISION) (UNION_TYPE &x, MAIN_TYPE shiftIn);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(precompute_masks_,SIMD_TYPE), PRECISION)(const testcase& tc, int COLS, int numMaskVecs, MASK_TYPE (*maskArr)[NUM_DISTINCT_CHARS]);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(init_masks_for_row_,SIMD_TYPE), PRECISION)(const testcase& tc, char* rsArr, MASK_TYPE* lastMaskShiftOut, int beginRowIndex, int numRowsToProcess);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(update_masks_for_cols_,SIMD_TYPE), PRECISION)(int maskIndex, MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, MASK_TYPE (*maskArr) [NUM_DISTINCT_CHARS], char* rsArr, MASK_TYPE* lastMaskShiftOut, MASK_TYPE maskBitCnt);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(computeDistVec,SIMD_TYPE), PRECISION) (MASK_VEC& currMaskVecLow, MASK_VEC& currMaskVecHigh, _256_TYPE& distm, _256_TYPE& _1_distm, _256_TYPE& distmChosen);
|
||||
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(initializeVectors,SIMD_TYPE), PRECISION)(int ROWS, int COLS, NUMBER* shiftOutM, NUMBER *shiftOutX, NUMBER *shiftOutY, Context<NUMBER> ctx, testcase *tc, _256_TYPE *p_MM, _256_TYPE *p_GAPM, _256_TYPE *p_MX, _256_TYPE *p_XX, _256_TYPE *p_MY, _256_TYPE *p_YY, _256_TYPE *distm1D);
|
||||
template<class NUMBER> void GEN_INTRINSIC(GEN_INTRINSIC(stripINITIALIZATION,SIMD_TYPE), PRECISION)(
|
||||
int stripIdx, Context<NUMBER> ctx, testcase *tc, _256_TYPE &pGAPM, _256_TYPE &pMM, _256_TYPE &pMX, _256_TYPE &pXX, _256_TYPE &pMY, _256_TYPE &pYY,
|
||||
_256_TYPE &rs, UNION_TYPE &rsN, _256_TYPE &distm, _256_TYPE &_1_distm, _256_TYPE *distm1D, _256_TYPE N_packed256, _256_TYPE *p_MM , _256_TYPE *p_GAPM ,
|
||||
_256_TYPE *p_MX, _256_TYPE *p_XX , _256_TYPE *p_MY, _256_TYPE *p_YY, UNION_TYPE &M_t_2, UNION_TYPE &X_t_2, UNION_TYPE &M_t_1, UNION_TYPE &X_t_1,
|
||||
UNION_TYPE &Y_t_2, UNION_TYPE &Y_t_1, UNION_TYPE &M_t_1_y, NUMBER* shiftOutX, NUMBER* shiftOutM);
|
||||
_256_TYPE GEN_INTRINSIC(GEN_INTRINSIC(computeDISTM,SIMD_TYPE), PRECISION)(int d, int COLS, testcase * tc, HAP_TYPE &hap, _256_TYPE rs, UNION_TYPE rsN, _256_TYPE N_packed256,
|
||||
_256_TYPE distm, _256_TYPE _1_distm);
|
||||
void GEN_INTRINSIC(GEN_INTRINSIC(computeMXY,SIMD_TYPE), PRECISION)(UNION_TYPE &M_t, UNION_TYPE &X_t, UNION_TYPE &Y_t, UNION_TYPE &M_t_y,
|
||||
UNION_TYPE M_t_2, UNION_TYPE X_t_2, UNION_TYPE Y_t_2, UNION_TYPE M_t_1, UNION_TYPE X_t_1, UNION_TYPE M_t_1_y, UNION_TYPE Y_t_1,
|
||||
_256_TYPE pMM, _256_TYPE pGAPM, _256_TYPE pMX, _256_TYPE pXX, _256_TYPE pMY, _256_TYPE pYY, _256_TYPE distmSel);
|
||||
template<class NUMBER> NUMBER GEN_INTRINSIC(GEN_INTRINSIC(compute_full_prob_,SIMD_TYPE), PRECISION) (testcase *tc, NUMBER *before_last_log = NULL);
|
||||
|
||||
|
|
@ -1086,7 +1086,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
referenceConfidenceModel.close();
|
||||
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
||||
//TODO (open & close) at the walker, not the engine.
|
||||
//likelihoodCalculationEngine.close();
|
||||
likelihoodCalculationEngine.close();
|
||||
logger.info("Ran local assembly on " + result + " active regions");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -89,4 +89,6 @@ public interface LikelihoodCalculationEngine {
|
|||
*/
|
||||
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods(AssemblyResultSet assemblyResultSet,
|
||||
Map<String, List<GATKSAMRecord>> perSampleReadList);
|
||||
|
||||
public void close();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -165,8 +165,10 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
if ( likelihoodsStream != null ) likelihoodsStream.close();
|
||||
pairHMMThreadLocal.get().close();
|
||||
}
|
||||
|
||||
private void writeDebugLikelihoods(final GATKSAMRecord processedRead, final Haplotype haplotype, final double log10l){
|
||||
|
|
|
|||
|
|
@ -66,7 +66,8 @@ import java.util.Map;
|
|||
*/
|
||||
public class JNILoglessPairHMM extends LoglessPairHMM {
|
||||
|
||||
private static final boolean debug = true; //simulates ifdef
|
||||
private static final boolean debug = false; //simulates ifdef
|
||||
private static final boolean verify = debug || false; //simulates ifdef
|
||||
private static final boolean debug0_1 = false; //simulates ifdef
|
||||
private static final boolean debug1 = false; //simulates ifdef
|
||||
private static final boolean debug2 = false;
|
||||
|
|
@ -90,6 +91,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
}
|
||||
|
||||
private native void jniGlobalInit(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass);
|
||||
private native void jniClose();
|
||||
|
||||
private static boolean isJNILoglessPairHMMLibraryLoaded = false;
|
||||
|
||||
|
|
@ -104,6 +106,13 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close()
|
||||
{
|
||||
jniClose();
|
||||
debugClose();
|
||||
}
|
||||
|
||||
//Used to test parts of the compute kernel separately
|
||||
private native void jniInitialize(final int readMaxLength, final int haplotypeMaxLength);
|
||||
private native static void jniInitializeProbabilities(final double[][] transition, final byte[] insertionGOP, final byte[] deletionGOP, final byte[] overallGCP);
|
||||
|
|
@ -126,7 +135,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
@Override
|
||||
public void initialize(final int readMaxLength, final int haplotypeMaxLength)
|
||||
{
|
||||
if(debug)
|
||||
if(verify)
|
||||
super.initialize(readMaxLength, haplotypeMaxLength);
|
||||
if(debug3)
|
||||
{
|
||||
|
|
@ -150,14 +159,14 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
public PerReadAlleleLikelihoodMap computeLikelihoods(final List<GATKSAMRecord> reads, final Map<Allele, Haplotype> alleleHaplotypeMap, final Map<GATKSAMRecord, byte[]> GCPArrayMap)
|
||||
{
|
||||
// (re)initialize the pairHMM only if necessary
|
||||
final int readMaxLength = debug ? findMaxReadLength(reads) : 0;
|
||||
final int haplotypeMaxLength = debug ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0;
|
||||
if(debug)
|
||||
final int readMaxLength = verify ? findMaxReadLength(reads) : 0;
|
||||
final int haplotypeMaxLength = verify ? findMaxHaplotypeLength(alleleHaplotypeMap) : 0;
|
||||
if(verify)
|
||||
{
|
||||
if (!initialized || readMaxLength > maxReadLength || haplotypeMaxLength > maxHaplotypeLength)
|
||||
{ initialize(readMaxLength, haplotypeMaxLength); }
|
||||
if ( ! initialized )
|
||||
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug mode");
|
||||
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
|
||||
}
|
||||
int readListSize = reads.size();
|
||||
int alleleHaplotypeMapSize = alleleHaplotypeMap.size();
|
||||
|
|
@ -221,7 +230,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
|
||||
++idx;
|
||||
}
|
||||
if(debug)
|
||||
if(verify)
|
||||
{
|
||||
//to compare values
|
||||
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
|
|
@ -273,6 +282,7 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
}
|
||||
++numComputeLikelihoodCalls;
|
||||
//if(numComputeLikelihoodCalls == 5)
|
||||
//jniClose();
|
||||
//System.exit(0);
|
||||
return likelihoodMap;
|
||||
}
|
||||
|
|
@ -299,11 +309,11 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
//}
|
||||
//System.out.println("#### END STACK TRACE ####");
|
||||
//
|
||||
if(debug1)
|
||||
jniSubComputeReadLikelihoodGivenHaplotypeLog10(readBases.length, haplotypeBases.length,
|
||||
readBases, haplotypeBases, readQuals,
|
||||
insertionGOP, deletionGOP, overallGCP,
|
||||
hapStartIndex);
|
||||
if(debug1)
|
||||
jniSubComputeReadLikelihoodGivenHaplotypeLog10(readBases.length, haplotypeBases.length,
|
||||
readBases, haplotypeBases, readQuals,
|
||||
insertionGOP, deletionGOP, overallGCP,
|
||||
hapStartIndex);
|
||||
|
||||
boolean doInitialization = (previousHaplotypeBases == null || previousHaplotypeBases.length != haplotypeBases.length);
|
||||
if (doInitialization) {
|
||||
|
|
@ -358,8 +368,8 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
|
|||
++numCalls;
|
||||
//if(numCalls == 100)
|
||||
//{
|
||||
//debugClose();
|
||||
//System.exit(0);
|
||||
//debugClose();
|
||||
//System.exit(0);
|
||||
//}
|
||||
return Math.log10(finalSumProbabilities) - INITIAL_CONDITION_LOG10;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -280,4 +280,7 @@ public abstract class PairHMM {
|
|||
|
||||
return Math.min(haplotype1.length, haplotype2.length);
|
||||
}
|
||||
|
||||
//Called at the end of all HC calls
|
||||
public void close() { ; }
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue