Temporary commit containing debug profiling code - commented out

This commit is contained in:
Karthik Gururaj 2014-01-29 12:10:29 -08:00
parent 0c63d6264f
commit 5c7427e48c
6 changed files with 142 additions and 23 deletions

View File

@ -86,3 +86,25 @@ void LoadTimeInitializer::debug_close()
m_filename_to_fptr.clear(); m_filename_to_fptr.clear();
} }
void LoadTimeInitializer::dump_sandbox(unsigned haplotypeLength, unsigned readLength, char* haplotypeBasesArray, testcase& tc)
{
ofstream& dumpFptr = m_sandbox_fptr;
for(unsigned k=0;k<haplotypeLength;++k)
dumpFptr<<(char)(haplotypeBasesArray[k]);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.rs[k]);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.q[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.i[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.d[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.c[k]+33);
dumpFptr<<"\n";
}

View File

@ -2,6 +2,7 @@
#define LOAD_TIME_INITIALIZER_H #define LOAD_TIME_INITIALIZER_H
#include "headers.h" #include "headers.h"
#include <jni.h> #include <jni.h>
#include "template.h"
class LoadTimeInitializer class LoadTimeInitializer
{ {
public: public:
@ -10,6 +11,10 @@ class LoadTimeInitializer
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
void debug_close(); void debug_close();
void dump_sandbox(unsigned haplotypeLength, unsigned readLength, char* haplotypeBasesArray, testcase& tc);
void open_sandbox() { m_sandbox_fptr.open("sandbox.txt", std::ios::app); }
void close_sandbox() { m_sandbox_fptr.close(); }
jfieldID m_readBasesFID; jfieldID m_readBasesFID;
jfieldID m_readQualsFID; jfieldID m_readQualsFID;
jfieldID m_insertionGOPFID; jfieldID m_insertionGOPFID;
@ -37,6 +42,7 @@ class LoadTimeInitializer
uint64_t m_bytes_copied; uint64_t m_bytes_copied;
private: private:
std::map<std::string, std::ofstream*> m_filename_to_fptr; std::map<std::string, std::ofstream*> m_filename_to_fptr;
std::ofstream m_sandbox_fptr;
}; };
extern LoadTimeInitializer g_load_time_initializer; extern LoadTimeInitializer g_load_time_initializer;

View File

@ -7,6 +7,7 @@
//#define DEBUG 1 //#define DEBUG 1
//#define DEBUG0_1 1 //#define DEBUG0_1 1
//#define DEBUG3 1 //#define DEBUG3 1
/*#define DUMP_TO_SANDBOX 1*/
#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1 #define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1

View File

@ -5,6 +5,16 @@
#include "utils.h" #include "utils.h"
#include "LoadTimeInitializer.h" #include "LoadTimeInitializer.h"
char* all_ptrs[] = {
"TCAAACCGAAATAAAGGCCAGTATATCCATATCCTTCCCATAAATGTTGATGGAAGAATTATTTGGAAGCCATATAGAATGAAATGACTCTATACACAAATTAAAACACAAAAACGTACTCAAAATAGTCCAGAGACTACAACTTCAAATGCAAAACTATAAATAATCTAAAAGAAAACCTAAGAGACATTC",
"GTCCAGAGACTACAACTTCAAATGCAAAACTATAAATAATCTAACAGAAAACCTAAGAGACATTC",
">D?@BAEEEEDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE@EEEEEEEEEEDEEEEEE?",
"IIIIIIIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIN",
"IIIIIIIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIN",
"+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
};
char all_arrays[6][16384];
using namespace std; using namespace std;
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType
@ -46,6 +56,17 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
initialize_function_pointers((uint64_t)mask); initialize_function_pointers((uint64_t)mask);
cout.flush(); cout.flush();
} }
#if 0
for(unsigned i=0;i<6;++i)
{
unsigned length = strlen(all_ptrs[i]);
for(unsigned j=0;j<16384;++j)
all_arrays[i][j] = all_ptrs[i][j%length];
}
for(unsigned i=2;i<6;++i)
for(unsigned j=0;j<16384;++j)
all_arrays[i][j] = all_arrays[i][j]-33;
#endif
} }
//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region, //Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region,
@ -127,6 +148,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
readBasesArrayVector.resize(numReads); readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING #ifdef DO_PROFILING
start_time = get_time(); start_time = get_time();
#endif
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.open_sandbox();
#endif #endif
for(unsigned i=0;i<numReads;++i) for(unsigned i=0;i<numReads;++i)
{ {
@ -193,6 +217,18 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
tc_array[tc_idx].i = (char*)insertionGOPArray; tc_array[tc_idx].i = (char*)insertionGOPArray;
tc_array[tc_idx].d = (char*)deletionGOPArray; tc_array[tc_idx].d = (char*)deletionGOPArray;
tc_array[tc_idx].c = (char*)overallGCPArray; tc_array[tc_idx].c = (char*)overallGCPArray;
#if 0
tc_array[tc_idx].hap = (char*)all_arrays[0];
tc_array[tc_idx].rs = (char*)all_arrays[1];
tc_array[tc_idx].q = (char*)all_arrays[2];
tc_array[tc_idx].i = (char*)all_arrays[3];
tc_array[tc_idx].d = (char*)all_arrays[4];
tc_array[tc_idx].c = (char*)all_arrays[5];
#endif
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.dump_sandbox(haplotypeLength, readLength, (char*)haplotypeBasesArray, tc_array[tc_idx]);
#endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_sumProductReadLengthHaplotypeLength += (readLength*haplotypeLength); g_load_time_initializer.m_sumProductReadLengthHaplotypeLength += (readLength*haplotypeLength);
g_load_time_initializer.m_sumSquareProductReadLengthHaplotypeLength += ((readLength*haplotypeLength)*(readLength*haplotypeLength)); g_load_time_initializer.m_sumSquareProductReadLengthHaplotypeLength += ((readLength*haplotypeLength)*(readLength*haplotypeLength));
@ -212,6 +248,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_sumReadLengths += readLength; g_load_time_initializer.m_sumReadLengths += readLength;
#endif #endif
} }
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.close_sandbox();
#endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += get_time(); g_load_time_initializer.m_data_transfer_time += get_time();
#endif #endif
@ -225,7 +264,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0); g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
start_time = get_time(); start_time = get_time();
#endif #endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse) #pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx) for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{ {
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0); float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
@ -239,7 +278,16 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
} }
else else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f))); result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
#if 0
double result = 0;
testcase& tc = tc_array[tc_idx];
for(unsigned k=0;k<tc.rslen;++k)
result += tc.rs[k] + tc.q[k] + tc.i[k] + tc.d[k] + tc.c[k];
result /= tc.rslen;
for(unsigned k=0;k<tc.haplen;++k)
result += tc.hap[k];
result = -fabs(result) - 1;
#endif
likelihoodDoubleArray[tc_idx] = result; likelihoodDoubleArray[tc_idx] = result;
} }
#ifdef DO_PROFILING #ifdef DO_PROFILING

View File

@ -9,7 +9,7 @@
using namespace std; using namespace std;
#define BATCH_SIZE 10000 #define BATCH_SIZE 5
#define RUN_HYBRID #define RUN_HYBRID
@ -43,24 +43,35 @@ int main(int argc, char** argv)
vector<testcase> tc_vector; vector<testcase> tc_vector;
tc_vector.clear(); tc_vector.clear();
testcase tc; tc_vector.resize(BATCH_SIZE+4);
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
vector<double> baseline_results;
baseline_results.clear();
baseline_results.resize(tc_vector.size());
bool all_ok = true;
uint64_t total_time = 0; uint64_t total_time = 0;
uint64_t baseline_time = 0;
unsigned total_count = 0;
unsigned num_testcases = 0;
//unsigned curr_batch_size = rand()%BATCH_SIZE + 4; //min batch size
unsigned curr_batch_size = BATCH_SIZE;
while(1) while(1)
{ {
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true); int break_value = use_old_read_testcase ? read_testcase(&(tc_vector[num_testcases]), fptr) :
read_mod_testcase(ifptr,&(tc_vector[num_testcases]),true);
if(break_value >= 0) if(break_value >= 0)
tc_vector.push_back(tc); ++num_testcases;
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0)) if(num_testcases == curr_batch_size || (break_value < 0 && num_testcases > 0))
{ {
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
get_time(); get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12) #pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<num_testcases;++i)
{ {
testcase& tc = tc_vector[i]; testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0); float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0; double result = 0;
if (result_avxf < MIN_ACCEPTED) { if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0); double result_avxd = g_compute_full_prob_double(&tc, 0);
@ -68,20 +79,34 @@ int main(int argc, char** argv)
} }
else else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f))); result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result; results_vec[i] = result;
} }
total_time += get_time(); total_time += get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) #pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<num_testcases;++i)
{ {
testcase& tc = tc_vector[i]; testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc); float result_avxf = compute_full_prob<float>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0)); double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = compute_full_prob<double>(&tc);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
baseline_results[i] = result;
}
baseline_time += get_time();
for(unsigned i=0;i<num_testcases;++i)
{
double baseline_result = baseline_results[i];
double abs_error = fabs(baseline_result-results_vec[i]); double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0; double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5) if(abs_error > 1e-5 && rel_error > 1e-5)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n"; {
cout << "Line "<<total_count+i<< " " << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
all_ok = false;
}
delete tc_vector[i].rs; delete tc_vector[i].rs;
delete tc_vector[i].hap; delete tc_vector[i].hap;
delete tc_vector[i].q; delete tc_vector[i].q;
@ -89,13 +114,22 @@ int main(int argc, char** argv)
delete tc_vector[i].d; delete tc_vector[i].d;
delete tc_vector[i].c; delete tc_vector[i].c;
} }
results_vec.clear(); total_count += num_testcases;
tc_vector.clear(); num_testcases = 0;
//curr_batch_size = rand()%BATCH_SIZE + 4; //min batch size
curr_batch_size = BATCH_SIZE;
} }
if(break_value < 0) if(break_value < 0)
break; break;
} }
cout << "Total time "<< ((double)total_time)/1e9 << "\n"; baseline_results.clear();
results_vec.clear();
tc_vector.clear();
if(all_ok)
cout << "All outputs acceptable\n";
cout << "Total vector time "<< ((double)total_time)/1e9 << " baseline time "<<baseline_time*1e-9<<"\n";
cout.flush();
fflush(stdout);
if(use_old_read_testcase) if(use_old_read_testcase)
fclose(fptr); fclose(fptr);
else else

View File

@ -94,8 +94,13 @@ int read_testcase(testcase *tc, FILE* ifp)
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6) if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
return -1; return -1;
tc->haplen = strlen(tc->hap); tc->haplen = strlen(tc->hap);
tc->rslen = strlen(tc->rs); tc->rslen = strlen(tc->rs);
assert(strlen(q) == tc->rslen);
assert(strlen(i) == tc->rslen);
assert(strlen(d) == tc->rslen);
assert(strlen(c) == tc->rslen);
//assert(tc->rslen < MROWS); //assert(tc->rslen < MROWS);
tc->ihap = (int *) malloc(tc->haplen*sizeof(int)); tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
tc->irs = (int *) malloc(tc->rslen*sizeof(int)); tc->irs = (int *) malloc(tc->rslen*sizeof(int));
@ -111,7 +116,8 @@ int read_testcase(testcase *tc, FILE* ifp)
_i = normalize(i[x]); _i = normalize(i[x]);
_d = normalize(d[x]); _d = normalize(d[x]);
_c = normalize(c[x]); _c = normalize(c[x]);
tc->q[x] = (_q < 6) ? 6 : _q; tc->q[x] = (_q < 6) ? 6 : _q;
//tc->q[x] = _q;
tc->i[x] = _i; tc->i[x] = _i;
tc->d[x] = _d; tc->d[x] = _d;
tc->c[x] = _c; tc->c[x] = _c;
@ -120,13 +126,15 @@ int read_testcase(testcase *tc, FILE* ifp)
for (x = 0; x < tc->haplen; x++) for (x = 0; x < tc->haplen; x++)
tc->ihap[x] = tc->hap[x]; tc->ihap[x] = tc->hap[x];
free(q); free(q);
free(i); free(i);
free(d); free(d);
free(c); free(c);
free(line); free(line);
return 0; return 0;
} }