2014-03-04 01:04:00 +08:00
|
|
|
/*Copyright (c) 2012 The Broad Institute
|
|
|
|
|
|
|
|
|
|
*Permission is hereby granted, free of charge, to any person
|
|
|
|
|
*obtaining a copy of this software and associated documentation
|
|
|
|
|
*files (the "Software"), to deal in the Software without
|
|
|
|
|
*restriction, including without limitation the rights to use,
|
|
|
|
|
*copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
|
|
|
*copies of the Software, and to permit persons to whom the
|
|
|
|
|
*Software is furnished to do so, subject to the following
|
|
|
|
|
*conditions:
|
|
|
|
|
|
|
|
|
|
*The above copyright notice and this permission notice shall be
|
|
|
|
|
*included in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
*THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
*EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
|
|
|
*OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
*NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
|
|
|
*HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
|
|
|
*WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
|
|
|
*FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
|
|
|
|
|
*THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
2014-01-19 03:07:23 +08:00
|
|
|
#include "headers.h"
|
2014-01-21 00:03:42 +08:00
|
|
|
#include "utils.h"
|
2014-02-07 06:35:32 +08:00
|
|
|
#include "LoadTimeInitializer.h"
|
2014-03-01 08:08:28 +08:00
|
|
|
using namespace std;
|
2014-01-19 03:07:23 +08:00
|
|
|
|
2014-03-01 08:08:28 +08:00
|
|
|
//static members from ConvertChar
|
2014-01-17 11:53:50 +08:00
|
|
|
uint8_t ConvertChar::conversionTable[255];
|
2014-03-01 08:08:28 +08:00
|
|
|
//Global function pointers in utils.h
|
2014-01-19 03:07:23 +08:00
|
|
|
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
|
|
|
|
double (*g_compute_full_prob_double)(testcase *tc, double* before_last_log) = 0;
|
2014-03-01 08:08:28 +08:00
|
|
|
//Static members in ContextBase
|
2014-03-18 02:42:19 +08:00
|
|
|
template<>
|
2014-03-01 08:08:28 +08:00
|
|
|
bool ContextBase<double>::staticMembersInitializedFlag = false;
|
2014-03-18 02:42:19 +08:00
|
|
|
template<>
|
|
|
|
|
double ContextBase<double>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { };
|
|
|
|
|
template<>
|
|
|
|
|
double ContextBase<double>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { };
|
|
|
|
|
template<>
|
2014-03-01 08:08:28 +08:00
|
|
|
bool ContextBase<float>::staticMembersInitializedFlag = false;
|
2014-03-18 02:42:19 +08:00
|
|
|
template<>
|
|
|
|
|
float ContextBase<float>::jacobianLogTable[JACOBIAN_LOG_TABLE_SIZE] = { };
|
|
|
|
|
template<>
|
|
|
|
|
float ContextBase<float>::matchToMatchProb[((MAX_QUAL + 1) * (MAX_QUAL + 2)) >> 1] = { };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool search_file_for_string(string filename, string search_string)
|
|
|
|
|
{
|
|
|
|
|
ifstream fptr;
|
|
|
|
|
fptr.open(filename.c_str(),ios::in);
|
|
|
|
|
if(fptr.is_open())
|
|
|
|
|
{
|
|
|
|
|
string buffer;
|
|
|
|
|
buffer.clear();
|
|
|
|
|
buffer.resize(4096);
|
|
|
|
|
bool retvalue = false;
|
|
|
|
|
while(!fptr.eof())
|
|
|
|
|
{
|
|
|
|
|
fptr.getline(&(buffer[0]), 4096);
|
|
|
|
|
if(buffer.find(search_string) != string::npos) //found string
|
|
|
|
|
{
|
|
|
|
|
retvalue = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
buffer.clear();
|
|
|
|
|
fptr.close();
|
|
|
|
|
return retvalue;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2014-01-19 03:07:23 +08:00
|
|
|
|
2014-03-18 02:42:19 +08:00
|
|
|
bool is_cpuid_ecx_bit_set(int eax, int bitidx)
|
|
|
|
|
{
|
|
|
|
|
int ecx = 0, edx = 0, ebx = 0;
|
|
|
|
|
__asm__ ("cpuid"
|
|
|
|
|
:"=b" (ebx),
|
|
|
|
|
"=c" (ecx),
|
|
|
|
|
"=d" (edx)
|
|
|
|
|
:"a" (eax)
|
|
|
|
|
);
|
|
|
|
|
return (((ecx >> bitidx)&1) == 1);
|
|
|
|
|
}
|
2014-01-17 11:53:50 +08:00
|
|
|
|
2014-01-19 03:07:23 +08:00
|
|
|
bool is_avx_supported()
|
|
|
|
|
{
|
2014-03-18 02:42:19 +08:00
|
|
|
#ifdef __INTEL_COMPILER
|
|
|
|
|
bool use_avx = _may_i_use_cpu_feature(_FEATURE_AVX);
|
|
|
|
|
if(use_avx)
|
|
|
|
|
return true;
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
//check if core supports AVX, but kernel does not and print info message
|
|
|
|
|
if(!is_cpuid_ecx_bit_set(1, 28)) //core does not support AVX
|
|
|
|
|
return false;
|
|
|
|
|
//else fall through to end of function
|
|
|
|
|
}
|
|
|
|
|
#else
|
|
|
|
|
if(!__builtin_cpu_supports("avx")) //core does not support AVX
|
|
|
|
|
return false;
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
//core supports AVX, check if kernel supports
|
|
|
|
|
if(search_file_for_string("/proc/cpuinfo","avx"))
|
|
|
|
|
return true;
|
|
|
|
|
//else fall through to end of function
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
#endif //__INTEL_COMPILER
|
|
|
|
|
clog << "INFO: Your CPU supports AVX vector instructions, but your kernel does not. Try upgrading to a kernel that supports AVX.\n";
|
|
|
|
|
clog << "INFO: Your program will run correctly, but slower than the AVX version\n";
|
|
|
|
|
return false;
|
2014-01-19 03:07:23 +08:00
|
|
|
}
|
|
|
|
|
|
2014-02-08 07:19:55 +08:00
|
|
|
bool is_sse41_supported()
|
|
|
|
|
{
|
2014-03-18 02:42:19 +08:00
|
|
|
#ifdef __INTEL_COMPILER
|
2014-03-07 00:49:52 +08:00
|
|
|
return (_may_i_use_cpu_feature(_FEATURE_SSE4_1) > 0);
|
2014-03-18 02:42:19 +08:00
|
|
|
#else
|
|
|
|
|
return __builtin_cpu_supports("sse4.1");
|
|
|
|
|
#endif
|
|
|
|
|
//return is_cpuid_ecx_bit_set(1, 19);
|
2014-02-08 07:19:55 +08:00
|
|
|
}
|
|
|
|
|
|
2014-01-19 03:07:23 +08:00
|
|
|
bool is_sse42_supported()
|
|
|
|
|
{
|
2014-03-18 02:42:19 +08:00
|
|
|
#ifdef __INTEL_COMPILER
|
2014-03-07 00:49:52 +08:00
|
|
|
return (_may_i_use_cpu_feature(_FEATURE_SSE4_2) > 0);
|
2014-03-18 02:42:19 +08:00
|
|
|
#else
|
|
|
|
|
return __builtin_cpu_supports("sse4.2");
|
|
|
|
|
#endif
|
|
|
|
|
//return is_cpuid_ecx_bit_set(1, 20);
|
2014-01-19 03:07:23 +08:00
|
|
|
}
|
|
|
|
|
|
2014-01-25 08:29:35 +08:00
|
|
|
uint64_t get_machine_capabilities()
|
2014-01-21 00:03:42 +08:00
|
|
|
{
|
2014-01-25 08:29:35 +08:00
|
|
|
uint64_t machine_mask = 0ull;
|
2014-01-21 00:51:53 +08:00
|
|
|
if(is_avx_supported())
|
2014-01-25 08:29:35 +08:00
|
|
|
machine_mask |= (1 << AVX_CUSTOM_IDX);
|
|
|
|
|
if(is_sse42_supported())
|
|
|
|
|
machine_mask |= (1 << SSE42_CUSTOM_IDX);
|
2014-02-08 07:19:55 +08:00
|
|
|
if(is_sse41_supported())
|
|
|
|
|
machine_mask |= (1 << SSE41_CUSTOM_IDX);
|
2014-01-25 08:29:35 +08:00
|
|
|
return machine_mask;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void initialize_function_pointers(uint64_t mask)
|
|
|
|
|
{
|
1. Whew, finally debugged the source of performance issues with PairHMM
JNI. See copied text from email below.
2. This commit contains all the code used in profiling, detecting FP
exceptions, dumping intermediate results. All flagged off using ifdefs,
but it's there.
--------------Text from email
As we discussed before, it's the denormal numbers that are causing the
slowdown - the core executes some microcode uops (called FP assists)
when denormal numbers are detected for FP operations (even un-vectorized
code).
The C++ compiler by default enables flush to zero (FTZ) - when set, the
hardware simply converts denormal numbers to 0. The Java binary
(executable provided by Oracle, not the native library) seems to be
compiled without FTZ (sensible choice, they want to be conservative).
Hence, the JNI invocation sees a large slowdown. Disabling FTZ in C++
slows down the C++ sandbox performance to the JNI version (fortunately,
the reverse also holds :)).
Not sure how to show the overhead for these FP assists easily - measured
a couple of counters.
FP_ASSISTS:ANY - shows number of uops executed as part of the FP
assists. When FTZ is enabled, this is 0 (both C++ and JNI), when FTZ is
disabled this value is around 203540557 (both C++ and JNI)
IDQ:MS_UOPS_CYCLES - shows the number of cycles the decoder was issuing
uops when the microcode sequencing engine was busy. When FTZ is enabled,
this is around 1.77M cycles (both C++ and JNI), when FTZ is disabled
this value is around 4.31B cycles (both C++ and JNI). This number is
still small with respect to total cycles (~40B), but it only reflects
the cycles in the decode stage. The total overhead of the microcode
assist ops could be larger.
As suggested by Mustafa, I compared intermediate values (matrices M,X,Y)
and final output of compute_full_prob. The values produced by C++ and
Java are identical to the last bit (as long as both use FTZ or no-FTZ).
Comparing the outputs of compute_full_prob for the cases no-FTZ and FTZ,
there are differences for very small values (denormal numbers).
Examples:
Diff values 1.952970E-33 1.952967E-33
Diff values 1.135071E-32 1.135070E-32
Diff values 1.135071E-32 1.135070E-32
Diff values 1.135071E-32 1.135070E-32
For this test case (low coverage NA12878), all these values would be
recomputed using the double precision version. Enabling FTZ should be
fine.
-------------------End text from email
2014-02-06 09:09:57 +08:00
|
|
|
//mask = 0ull;
|
2014-02-26 13:44:20 +08:00
|
|
|
//mask = (1 << SSE41_CUSTOM_IDX);
|
2014-01-25 08:29:35 +08:00
|
|
|
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
|
2014-01-21 00:03:42 +08:00
|
|
|
{
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << "Using AVX accelerated implementation of PairHMM\n";
|
2014-01-21 00:03:42 +08:00
|
|
|
g_compute_full_prob_float = compute_full_prob_avxs<float>;
|
|
|
|
|
g_compute_full_prob_double = compute_full_prob_avxd<double>;
|
|
|
|
|
}
|
|
|
|
|
else
|
2014-02-10 10:05:35 +08:00
|
|
|
if(is_sse41_supported() && (mask & ((1<< SSE41_CUSTOM_IDX) | (1<<SSE42_CUSTOM_IDX))))
|
2014-01-21 00:03:42 +08:00
|
|
|
{
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << "Using SSE4.1 accelerated implementation of PairHMM\n";
|
2014-01-21 00:03:42 +08:00
|
|
|
g_compute_full_prob_float = compute_full_prob_sses<float>;
|
|
|
|
|
g_compute_full_prob_double = compute_full_prob_ssed<double>;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << "Using un-vectorized C++ implementation of PairHMM\n";
|
2014-01-21 00:03:42 +08:00
|
|
|
g_compute_full_prob_float = compute_full_prob<float>;
|
|
|
|
|
g_compute_full_prob_double = compute_full_prob<double>;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-01-17 11:53:50 +08:00
|
|
|
int normalize(char c)
|
|
|
|
|
{
|
|
|
|
|
return ((int) (c - 33));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int read_testcase(testcase *tc, FILE* ifp)
|
|
|
|
|
{
|
|
|
|
|
char *q, *i, *d, *c, *line = NULL;
|
|
|
|
|
int _q, _i, _d, _c;
|
|
|
|
|
int x, size = 0;
|
|
|
|
|
ssize_t read;
|
|
|
|
|
|
2014-02-07 01:32:56 +08:00
|
|
|
|
|
|
|
|
read = getline(&line, (size_t *) &size, ifp == 0 ? stdin : ifp);
|
2014-01-17 11:53:50 +08:00
|
|
|
if (read == -1)
|
2014-02-07 01:32:56 +08:00
|
|
|
{
|
|
|
|
|
free(line);
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
2014-01-17 11:53:50 +08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
tc->hap = (char *) malloc(size);
|
|
|
|
|
tc->rs = (char *) malloc(size);
|
|
|
|
|
q = (char *) malloc(size);
|
|
|
|
|
i = (char *) malloc(size);
|
|
|
|
|
d = (char *) malloc(size);
|
|
|
|
|
c = (char *) malloc(size);
|
|
|
|
|
|
|
|
|
|
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
|
|
|
|
|
return -1;
|
|
|
|
|
|
2014-01-30 04:10:29 +08:00
|
|
|
|
2014-01-17 11:53:50 +08:00
|
|
|
tc->haplen = strlen(tc->hap);
|
|
|
|
|
tc->rslen = strlen(tc->rs);
|
2014-03-18 02:42:19 +08:00
|
|
|
assert(strlen(q) == (size_t)tc->rslen);
|
|
|
|
|
assert(strlen(i) == (size_t)tc->rslen);
|
|
|
|
|
assert(strlen(d) == (size_t)tc->rslen);
|
|
|
|
|
assert(strlen(c) == (size_t)tc->rslen);
|
|
|
|
|
|
|
|
|
|
g_load_time_initializer.update_stat(READ_LENGTH_IDX, tc->rslen);
|
|
|
|
|
g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, tc->haplen);
|
|
|
|
|
g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, tc->haplen*tc->rslen);
|
2014-01-27 03:36:06 +08:00
|
|
|
//assert(tc->rslen < MROWS);
|
2014-02-05 08:27:29 +08:00
|
|
|
//tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
|
|
|
|
//tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
2014-01-17 11:53:50 +08:00
|
|
|
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->q = (char *) malloc(sizeof(char) * tc->rslen);
|
|
|
|
|
tc->i = (char *) malloc(sizeof(char) * tc->rslen);
|
|
|
|
|
tc->d = (char *) malloc(sizeof(char) * tc->rslen);
|
|
|
|
|
tc->c = (char *) malloc(sizeof(char) * tc->rslen);
|
2014-01-17 11:53:50 +08:00
|
|
|
|
|
|
|
|
for (x = 0; x < tc->rslen; x++)
|
|
|
|
|
{
|
|
|
|
|
_q = normalize(q[x]);
|
|
|
|
|
_i = normalize(i[x]);
|
|
|
|
|
_d = normalize(d[x]);
|
|
|
|
|
_c = normalize(c[x]);
|
2014-01-30 04:10:29 +08:00
|
|
|
tc->q[x] = (_q < 6) ? 6 : _q;
|
|
|
|
|
//tc->q[x] = _q;
|
2014-01-17 11:53:50 +08:00
|
|
|
tc->i[x] = _i;
|
|
|
|
|
tc->d[x] = _d;
|
|
|
|
|
tc->c[x] = _c;
|
2014-02-05 08:27:29 +08:00
|
|
|
//tc->irs[x] = tc->rs[x];
|
2014-01-17 11:53:50 +08:00
|
|
|
}
|
2014-02-05 08:27:29 +08:00
|
|
|
//for (x = 0; x < tc->haplen; x++)
|
|
|
|
|
//tc->ihap[x] = tc->hap[x];
|
2014-01-30 04:10:29 +08:00
|
|
|
|
2014-01-17 11:53:50 +08:00
|
|
|
free(q);
|
|
|
|
|
free(i);
|
|
|
|
|
free(d);
|
|
|
|
|
free(c);
|
|
|
|
|
free(line);
|
|
|
|
|
|
2014-01-30 04:10:29 +08:00
|
|
|
|
|
|
|
|
|
2014-01-17 11:53:50 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
unsigned MAX_LINE_LENGTH = 65536;
|
|
|
|
|
int convToInt(std::string s)
|
|
|
|
|
{
|
|
|
|
|
int i;
|
|
|
|
|
std::istringstream strin(s);
|
|
|
|
|
strin >> i;
|
|
|
|
|
return i;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void tokenize(std::ifstream& fptr, std::vector<std::string>& tokens)
|
|
|
|
|
{
|
|
|
|
|
int i = 0;
|
|
|
|
|
std::string tmp;
|
|
|
|
|
std::vector<std::string> myVec;
|
|
|
|
|
vector<char> line;
|
|
|
|
|
line.clear();
|
|
|
|
|
line.resize(MAX_LINE_LENGTH);
|
|
|
|
|
vector<char> tmpline;
|
|
|
|
|
tmpline.clear();
|
|
|
|
|
tmpline.resize(MAX_LINE_LENGTH);
|
|
|
|
|
myVec.clear();
|
|
|
|
|
|
|
|
|
|
while(!fptr.eof())
|
|
|
|
|
{
|
|
|
|
|
i = 0;
|
|
|
|
|
bool still_read_line = true;
|
|
|
|
|
unsigned line_position = 0;
|
|
|
|
|
while(still_read_line)
|
|
|
|
|
{
|
|
|
|
|
fptr.getline(&(tmpline[0]), MAX_LINE_LENGTH);
|
|
|
|
|
if(line_position + MAX_LINE_LENGTH > line.size())
|
|
|
|
|
line.resize(2*line.size());
|
|
|
|
|
for(unsigned i=0;i<MAX_LINE_LENGTH && tmpline[i] != '\0';++i,++line_position)
|
|
|
|
|
line[line_position] = tmpline[i];
|
|
|
|
|
if(fptr.eof() || !fptr.fail())
|
|
|
|
|
{
|
|
|
|
|
still_read_line = false;
|
|
|
|
|
line[line_position++] = '\0';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
std::istringstream kap(&(line[0]));
|
|
|
|
|
|
|
|
|
|
while(!kap.eof())
|
|
|
|
|
{
|
|
|
|
|
kap >> std::skipws >> tmp;
|
|
|
|
|
if(tmp != "")
|
|
|
|
|
{
|
|
|
|
|
myVec.push_back(tmp);
|
|
|
|
|
++i;
|
2014-12-21 01:32:06 +08:00
|
|
|
//std::cerr <<tmp <<"#";
|
2014-01-17 11:53:50 +08:00
|
|
|
}
|
|
|
|
|
tmp = "";
|
|
|
|
|
}
|
2014-12-21 01:32:06 +08:00
|
|
|
//std::cerr << "\n";
|
2014-01-17 11:53:50 +08:00
|
|
|
if(myVec.size() > 0)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
tokens.clear();
|
2014-12-21 01:32:06 +08:00
|
|
|
//std::cerr << "Why "<<myVec.size()<<"\n";
|
2014-01-17 11:53:50 +08:00
|
|
|
tokens.resize(myVec.size());
|
|
|
|
|
for(i=0;i<(int)myVec.size();++i)
|
|
|
|
|
tokens[i] = myVec[i];
|
|
|
|
|
line.clear();
|
|
|
|
|
tmpline.clear();
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|
|
|
|
{
|
|
|
|
|
static bool first_call = true;
|
|
|
|
|
vector<string> tokens;
|
|
|
|
|
tokens.clear();
|
|
|
|
|
tokenize(fptr, tokens);
|
|
|
|
|
if(tokens.size() == 0)
|
|
|
|
|
return -1;
|
|
|
|
|
tc->hap = new char[tokens[0].size()+2];
|
|
|
|
|
tc->haplen = tokens[0].size();
|
|
|
|
|
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
|
|
|
|
|
tc->rs = new char[tokens[1].size()+2];
|
|
|
|
|
tc->rslen = tokens[1].size();
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->q = new char[tc->rslen];
|
|
|
|
|
tc->i = new char[tc->rslen];
|
|
|
|
|
tc->d = new char[tc->rslen];
|
|
|
|
|
tc->c = new char[tc->rslen];
|
2014-12-21 01:32:06 +08:00
|
|
|
//cerr << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
|
2014-01-17 11:53:50 +08:00
|
|
|
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
|
2014-03-18 02:42:19 +08:00
|
|
|
assert(tokens.size() == (size_t)(2 + 4*(tc->rslen)));
|
2014-01-27 03:36:06 +08:00
|
|
|
//assert(tc->rslen < MROWS);
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-23 14:57:32 +08:00
|
|
|
tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
|
2014-01-17 11:53:50 +08:00
|
|
|
|
|
|
|
|
if(reformat)
|
|
|
|
|
{
|
|
|
|
|
ofstream ofptr;
|
|
|
|
|
ofptr.open("reformat/debug_dump.txt",first_call ? ios::out : ios::app);
|
|
|
|
|
assert(ofptr.is_open());
|
|
|
|
|
ofptr << tokens[0] << " ";
|
|
|
|
|
ofptr << tokens[1] << " ";
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-17 11:53:50 +08:00
|
|
|
ofptr << ((char)(tc->q[j]+33));
|
|
|
|
|
ofptr << " ";
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-17 11:53:50 +08:00
|
|
|
ofptr << ((char)(tc->i[j]+33));
|
|
|
|
|
ofptr << " ";
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-17 11:53:50 +08:00
|
|
|
ofptr << ((char)(tc->d[j]+33));
|
|
|
|
|
ofptr << " ";
|
2014-03-18 02:42:19 +08:00
|
|
|
for(int j=0;j<tc->rslen;++j)
|
2014-01-17 11:53:50 +08:00
|
|
|
ofptr << ((char)(tc->c[j]+33));
|
|
|
|
|
ofptr << " 0 false\n";
|
|
|
|
|
|
|
|
|
|
ofptr.close();
|
|
|
|
|
first_call = false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return tokens.size();
|
|
|
|
|
}
|
2014-01-19 03:07:23 +08:00
|
|
|
|
2014-01-23 02:52:41 +08:00
|
|
|
double getCurrClk() {
|
|
|
|
|
struct timeval tv ;
|
|
|
|
|
gettimeofday(&tv, NULL);
|
|
|
|
|
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
|
|
|
|
}
|
2014-01-23 14:57:32 +08:00
|
|
|
|
2014-02-26 13:44:20 +08:00
|
|
|
inline unsigned long long rdtsc(void)
|
|
|
|
|
{
|
|
|
|
|
unsigned hi, lo;
|
|
|
|
|
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
|
|
|
|
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-07 06:35:32 +08:00
|
|
|
void get_time(struct timespec* store_struct)
|
2014-01-23 14:57:32 +08:00
|
|
|
{
|
2014-02-07 06:35:32 +08:00
|
|
|
clock_gettime(CLOCK_REALTIME, store_struct);
|
2014-01-23 14:57:32 +08:00
|
|
|
}
|
2014-01-31 04:08:06 +08:00
|
|
|
|
|
|
|
|
uint64_t diff_time(struct timespec& prev_time)
|
|
|
|
|
{
|
|
|
|
|
struct timespec curr_time;
|
|
|
|
|
clock_gettime(CLOCK_REALTIME, &curr_time);
|
|
|
|
|
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
|
|
|
|
}
|
|
|
|
|
|
2014-02-27 02:53:51 +08:00
|
|
|
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
#include "papi.h"
|
|
|
|
|
#define NUM_PAPI_COUNTERS 4
|
|
|
|
|
#endif
|
|
|
|
|
|
2014-02-10 10:05:35 +08:00
|
|
|
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check)
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
|
|
|
|
FILE* fptr = 0;
|
2014-02-07 03:01:33 +08:00
|
|
|
ifstream ifptr;
|
2014-02-05 08:27:29 +08:00
|
|
|
if(use_old_read_testcase)
|
|
|
|
|
{
|
|
|
|
|
fptr = fopen(filename,"r");
|
|
|
|
|
assert(fptr);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
ifptr.open(filename);
|
|
|
|
|
assert(ifptr.is_open());
|
|
|
|
|
}
|
2014-03-18 02:42:19 +08:00
|
|
|
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
|
|
|
|
ofstream times_fptr;
|
|
|
|
|
times_fptr.open("native_timed_intervals.csv",ios::out);
|
|
|
|
|
#endif
|
2014-02-05 08:27:29 +08:00
|
|
|
vector<testcase> tc_vector;
|
|
|
|
|
tc_vector.clear();
|
2014-02-07 03:01:33 +08:00
|
|
|
testcase tc;
|
|
|
|
|
uint64_t vector_compute_time = 0;
|
|
|
|
|
uint64_t baseline_compute_time = 0;
|
|
|
|
|
uint64_t num_double_calls = 0;
|
2014-02-26 13:44:20 +08:00
|
|
|
unsigned num_testcases = 0;
|
2014-02-10 10:05:35 +08:00
|
|
|
bool all_ok = do_check ? true : false;
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
uint32_t all_mask = (0);
|
|
|
|
|
uint32_t no_usr_mask = (1 << 16); //bit 16 user mode, bit 17 kernel mode
|
|
|
|
|
uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode
|
|
|
|
|
PAPI_num_counters();
|
|
|
|
|
int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
2014-03-18 02:42:19 +08:00
|
|
|
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "l1_pending_miss", "lfb_hit", "l2_hit" };
|
2014-02-26 13:44:20 +08:00
|
|
|
assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK);
|
2014-03-18 02:42:19 +08:00
|
|
|
assert(PAPI_event_name_to_code("L1D_PEND_MISS:OCCURRENCES", &(events[1])) == PAPI_OK);
|
|
|
|
|
assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:HIT_LFB", &(events[2])) == PAPI_OK);
|
|
|
|
|
assert(PAPI_event_name_to_code("MEM_LOAD_UOPS_RETIRED:L2_HIT", &(events[3])) == PAPI_OK);
|
2014-02-26 13:44:20 +08:00
|
|
|
long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
|
|
|
|
long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
|
|
|
|
#endif
|
2014-02-05 08:27:29 +08:00
|
|
|
while(1)
|
|
|
|
|
{
|
2014-02-07 03:01:33 +08:00
|
|
|
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
2014-02-05 08:27:29 +08:00
|
|
|
if(break_value >= 0)
|
2014-02-07 03:01:33 +08:00
|
|
|
tc_vector.push_back(tc);
|
|
|
|
|
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
2014-02-07 03:01:33 +08:00
|
|
|
vector<double> results_vec;
|
|
|
|
|
vector<double> baseline_results_vec;
|
|
|
|
|
results_vec.clear();
|
|
|
|
|
baseline_results_vec.clear();
|
2014-02-05 08:27:29 +08:00
|
|
|
results_vec.resize(tc_vector.size());
|
2014-02-07 03:01:33 +08:00
|
|
|
baseline_results_vec.resize(tc_vector.size());
|
2014-03-18 02:42:19 +08:00
|
|
|
g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, tc_vector.size());
|
|
|
|
|
g_load_time_initializer.update_stat(NUM_READS_IDX, tc_vector.size());
|
|
|
|
|
g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, tc_vector.size());
|
2014-02-07 06:35:32 +08:00
|
|
|
struct timespec start_time;
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK);
|
|
|
|
|
#endif
|
2014-02-07 06:35:32 +08:00
|
|
|
get_time(&start_time);
|
2014-02-05 08:27:29 +08:00
|
|
|
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
2014-02-27 02:53:51 +08:00
|
|
|
#ifdef DO_REPEAT_PROFILING
|
2014-02-26 13:44:20 +08:00
|
|
|
for(unsigned z=0;z<10;++z)
|
|
|
|
|
#endif
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
2014-02-26 13:44:20 +08:00
|
|
|
for(unsigned i=0;i<tc_vector.size();++i)
|
|
|
|
|
{
|
|
|
|
|
testcase& tc = tc_vector[i];
|
|
|
|
|
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
|
|
|
|
double result = 0;
|
|
|
|
|
if (result_avxf < MIN_ACCEPTED) {
|
|
|
|
|
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
|
|
|
|
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
|
|
|
|
++num_double_calls;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
2014-02-07 06:35:32 +08:00
|
|
|
#ifdef DUMP_COMPUTE_VALUES
|
2014-02-26 13:44:20 +08:00
|
|
|
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
2014-02-07 06:35:32 +08:00
|
|
|
#endif
|
2014-02-26 13:44:20 +08:00
|
|
|
results_vec[i] = result;
|
|
|
|
|
}
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK);
|
|
|
|
|
#endif
|
2014-03-18 02:42:19 +08:00
|
|
|
uint64_t curr_interval = diff_time(start_time);
|
|
|
|
|
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
|
|
|
|
times_fptr << curr_interval << "\n";
|
|
|
|
|
#endif
|
|
|
|
|
vector_compute_time += curr_interval;
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
for(unsigned k=0;k<NUM_PAPI_COUNTERS;++k)
|
|
|
|
|
accum_values[k] += values[k];
|
|
|
|
|
#endif
|
|
|
|
|
num_testcases += tc_vector.size();
|
2014-02-10 10:05:35 +08:00
|
|
|
if(do_check)
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
2014-02-10 10:05:35 +08:00
|
|
|
get_time(&start_time);
|
|
|
|
|
#pragma omp parallel for schedule(dynamic,chunk_size)
|
|
|
|
|
for(unsigned i=0;i<tc_vector.size();++i)
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
2014-02-10 10:05:35 +08:00
|
|
|
testcase& tc = tc_vector[i];
|
|
|
|
|
double baseline_result = compute_full_prob<double>(&tc);
|
|
|
|
|
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
|
|
|
|
baseline_results_vec[i] = baseline_result;
|
|
|
|
|
}
|
|
|
|
|
baseline_compute_time += diff_time(start_time);
|
|
|
|
|
for(unsigned i=0;i<tc_vector.size();++i)
|
|
|
|
|
{
|
|
|
|
|
double baseline_result = baseline_results_vec[i];
|
|
|
|
|
double abs_error = fabs(baseline_result-results_vec[i]);
|
|
|
|
|
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
|
|
|
|
|
if(abs_error > 1e-5 && rel_error > 1e-5)
|
|
|
|
|
{
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
|
2014-02-10 10:05:35 +08:00
|
|
|
all_ok = false;
|
|
|
|
|
}
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|
|
|
|
|
}
|
2014-02-07 03:01:33 +08:00
|
|
|
for(unsigned i=0;i<tc_vector.size();++i)
|
2014-02-05 08:27:29 +08:00
|
|
|
{
|
2014-03-07 04:23:08 +08:00
|
|
|
delete[] tc_vector[i].rs;
|
|
|
|
|
delete[] tc_vector[i].hap;
|
|
|
|
|
delete[] tc_vector[i].q;
|
|
|
|
|
delete[] tc_vector[i].i;
|
|
|
|
|
delete[] tc_vector[i].d;
|
|
|
|
|
delete[] tc_vector[i].c;
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|
|
|
|
|
results_vec.clear();
|
2014-02-07 03:01:33 +08:00
|
|
|
tc_vector.clear();
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|
2014-02-07 01:32:56 +08:00
|
|
|
if(break_value < 0)
|
|
|
|
|
break;
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|
2014-02-07 06:35:32 +08:00
|
|
|
#ifdef DUMP_COMPUTE_VALUES
|
|
|
|
|
g_load_time_initializer.debug_close();
|
|
|
|
|
#endif
|
2014-02-05 08:27:29 +08:00
|
|
|
if(all_ok)
|
2014-02-07 03:01:33 +08:00
|
|
|
{
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << "All output values within acceptable error\n";
|
|
|
|
|
cerr << "Baseline double precision compute time "<<baseline_compute_time*1e-9<<"\n";
|
2014-02-07 03:01:33 +08:00
|
|
|
}
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << "Num testcase "<<num_testcases<< " num double invocations "<<num_double_calls<<"\n";
|
|
|
|
|
cerr << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
|
2014-02-26 13:44:20 +08:00
|
|
|
#ifdef USE_PAPI
|
|
|
|
|
for(unsigned i=0;i<NUM_PAPI_COUNTERS;++i)
|
2014-12-21 01:32:06 +08:00
|
|
|
cerr << eventnames[i] << " : "<<accum_values[i]<<"\n";
|
2014-02-26 13:44:20 +08:00
|
|
|
#endif
|
2014-03-18 02:42:19 +08:00
|
|
|
#ifdef PRINT_PER_INTERVAL_TIMINGS
|
|
|
|
|
times_fptr.close();
|
|
|
|
|
#endif
|
2014-02-05 08:27:29 +08:00
|
|
|
if(use_old_read_testcase)
|
|
|
|
|
fclose(fptr);
|
|
|
|
|
else
|
|
|
|
|
ifptr.close();
|
2014-03-18 02:42:19 +08:00
|
|
|
g_load_time_initializer.print_profiling();
|
2014-02-05 08:27:29 +08:00
|
|
|
}
|