Temporary commit containing debug profiling code - commented out

This commit is contained in:
Karthik Gururaj 2014-01-29 12:10:29 -08:00
parent 0c63d6264f
commit 5c7427e48c
6 changed files with 142 additions and 23 deletions

View File

@ -86,3 +86,25 @@ void LoadTimeInitializer::debug_close()
m_filename_to_fptr.clear();
}
void LoadTimeInitializer::dump_sandbox(unsigned haplotypeLength, unsigned readLength, char* haplotypeBasesArray, testcase& tc)
{
ofstream& dumpFptr = m_sandbox_fptr;
for(unsigned k=0;k<haplotypeLength;++k)
dumpFptr<<(char)(haplotypeBasesArray[k]);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.rs[k]);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.q[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.i[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.d[k]+33);
dumpFptr<<" ";
for(unsigned k=0;k<readLength;++k)
dumpFptr<<(char)(tc.c[k]+33);
dumpFptr<<"\n";
}

View File

@ -2,6 +2,7 @@
#define LOAD_TIME_INITIALIZER_H
#include "headers.h"
#include <jni.h>
#include "template.h"
class LoadTimeInitializer
{
public:
@ -10,6 +11,10 @@ class LoadTimeInitializer
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
void debug_close();
void dump_sandbox(unsigned haplotypeLength, unsigned readLength, char* haplotypeBasesArray, testcase& tc);
void open_sandbox() { m_sandbox_fptr.open("sandbox.txt", std::ios::app); }
void close_sandbox() { m_sandbox_fptr.close(); }
jfieldID m_readBasesFID;
jfieldID m_readQualsFID;
jfieldID m_insertionGOPFID;
@ -37,6 +42,7 @@ class LoadTimeInitializer
uint64_t m_bytes_copied;
private:
std::map<std::string, std::ofstream*> m_filename_to_fptr;
std::ofstream m_sandbox_fptr;
};
extern LoadTimeInitializer g_load_time_initializer;

View File

@ -7,6 +7,7 @@
//#define DEBUG 1
//#define DEBUG0_1 1
//#define DEBUG3 1
/*#define DUMP_TO_SANDBOX 1*/
#define DIRECT_ACCESS_TO_JAVA_HEAP_MEMORY 1

View File

@ -5,6 +5,16 @@
#include "utils.h"
#include "LoadTimeInitializer.h"
char* all_ptrs[] = {
"TCAAACCGAAATAAAGGCCAGTATATCCATATCCTTCCCATAAATGTTGATGGAAGAATTATTTGGAAGCCATATAGAATGAAATGACTCTATACACAAATTAAAACACAAAAACGTACTCAAAATAGTCCAGAGACTACAACTTCAAATGCAAAACTATAAATAATCTAAAAGAAAACCTAAGAGACATTC",
"GTCCAGAGACTACAACTTCAAATGCAAAACTATAAATAATCTAACAGAAAACCTAAGAGACATTC",
">D?@BAEEEEDEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE@EEEEEEEEEEDEEEEEE?",
"IIIIIIIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIN",
"IIIIIIIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIIIIIHHHHIIIIIIIIIIIIIIN",
"+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++"
};
char all_arrays[6][16384];
using namespace std;
JNIEXPORT jlong JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniGetMachineType
@ -46,6 +56,17 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
initialize_function_pointers((uint64_t)mask);
cout.flush();
}
#if 0
for(unsigned i=0;i<6;++i)
{
unsigned length = strlen(all_ptrs[i]);
for(unsigned j=0;j<16384;++j)
all_arrays[i][j] = all_ptrs[i][j%length];
}
for(unsigned i=2;i<6;++i)
for(unsigned j=0;j<16384;++j)
all_arrays[i][j] = all_arrays[i][j]-33;
#endif
}
//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region,
@ -127,6 +148,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING
start_time = get_time();
#endif
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.open_sandbox();
#endif
for(unsigned i=0;i<numReads;++i)
{
@ -193,6 +217,18 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
tc_array[tc_idx].i = (char*)insertionGOPArray;
tc_array[tc_idx].d = (char*)deletionGOPArray;
tc_array[tc_idx].c = (char*)overallGCPArray;
#if 0
tc_array[tc_idx].hap = (char*)all_arrays[0];
tc_array[tc_idx].rs = (char*)all_arrays[1];
tc_array[tc_idx].q = (char*)all_arrays[2];
tc_array[tc_idx].i = (char*)all_arrays[3];
tc_array[tc_idx].d = (char*)all_arrays[4];
tc_array[tc_idx].c = (char*)all_arrays[5];
#endif
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.dump_sandbox(haplotypeLength, readLength, (char*)haplotypeBasesArray, tc_array[tc_idx]);
#endif
#ifdef DO_PROFILING
g_load_time_initializer.m_sumProductReadLengthHaplotypeLength += (readLength*haplotypeLength);
g_load_time_initializer.m_sumSquareProductReadLengthHaplotypeLength += ((readLength*haplotypeLength)*(readLength*haplotypeLength));
@ -212,6 +248,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_sumReadLengths += readLength;
#endif
}
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.close_sandbox();
#endif
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += get_time();
#endif
@ -225,7 +264,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
start_time = get_time();
#endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
@ -239,7 +278,16 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
#if 0
double result = 0;
testcase& tc = tc_array[tc_idx];
for(unsigned k=0;k<tc.rslen;++k)
result += tc.rs[k] + tc.q[k] + tc.i[k] + tc.d[k] + tc.c[k];
result /= tc.rslen;
for(unsigned k=0;k<tc.haplen;++k)
result += tc.hap[k];
result = -fabs(result) - 1;
#endif
likelihoodDoubleArray[tc_idx] = result;
}
#ifdef DO_PROFILING

View File

@ -9,7 +9,7 @@
using namespace std;
#define BATCH_SIZE 10000
#define BATCH_SIZE 5
#define RUN_HYBRID
@ -43,24 +43,35 @@ int main(int argc, char** argv)
vector<testcase> tc_vector;
tc_vector.clear();
testcase tc;
tc_vector.resize(BATCH_SIZE+4);
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
vector<double> baseline_results;
baseline_results.clear();
baseline_results.resize(tc_vector.size());
bool all_ok = true;
uint64_t total_time = 0;
uint64_t baseline_time = 0;
unsigned total_count = 0;
unsigned num_testcases = 0;
//unsigned curr_batch_size = rand()%BATCH_SIZE + 4; //min batch size
unsigned curr_batch_size = BATCH_SIZE;
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
int break_value = use_old_read_testcase ? read_testcase(&(tc_vector[num_testcases]), fptr) :
read_mod_testcase(ifptr,&(tc_vector[num_testcases]),true);
if(break_value >= 0)
tc_vector.push_back(tc);
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
++num_testcases;
if(num_testcases == curr_batch_size || (break_value < 0 && num_testcases > 0))
{
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
for(unsigned i=0;i<num_testcases;++i)
{
testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0);
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
@ -68,20 +79,34 @@ int main(int argc, char** argv)
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result;
}
total_time += get_time();
#pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i)
for(unsigned i=0;i<num_testcases;++i)
{
testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
testcase& tc = tc_vector[i];
float result_avxf = compute_full_prob<float>(&tc);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = compute_full_prob<double>(&tc);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
baseline_results[i] = result;
}
baseline_time += get_time();
for(unsigned i=0;i<num_testcases;++i)
{
double baseline_result = baseline_results[i];
double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
{
cout << "Line "<<total_count+i<< " " << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
all_ok = false;
}
delete tc_vector[i].rs;
delete tc_vector[i].hap;
delete tc_vector[i].q;
@ -89,13 +114,22 @@ int main(int argc, char** argv)
delete tc_vector[i].d;
delete tc_vector[i].c;
}
results_vec.clear();
tc_vector.clear();
total_count += num_testcases;
num_testcases = 0;
//curr_batch_size = rand()%BATCH_SIZE + 4; //min batch size
curr_batch_size = BATCH_SIZE;
}
if(break_value < 0)
break;
}
cout << "Total time "<< ((double)total_time)/1e9 << "\n";
baseline_results.clear();
results_vec.clear();
tc_vector.clear();
if(all_ok)
cout << "All outputs acceptable\n";
cout << "Total vector time "<< ((double)total_time)/1e9 << " baseline time "<<baseline_time*1e-9<<"\n";
cout.flush();
fflush(stdout);
if(use_old_read_testcase)
fclose(fptr);
else

View File

@ -94,8 +94,13 @@ int read_testcase(testcase *tc, FILE* ifp)
if (sscanf(line, "%s %s %s %s %s %s\n", tc->hap, tc->rs, q, i, d, c) != 6)
return -1;
tc->haplen = strlen(tc->hap);
tc->rslen = strlen(tc->rs);
assert(strlen(q) == tc->rslen);
assert(strlen(i) == tc->rslen);
assert(strlen(d) == tc->rslen);
assert(strlen(c) == tc->rslen);
//assert(tc->rslen < MROWS);
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
@ -111,7 +116,8 @@ int read_testcase(testcase *tc, FILE* ifp)
_i = normalize(i[x]);
_d = normalize(d[x]);
_c = normalize(c[x]);
tc->q[x] = (_q < 6) ? 6 : _q;
tc->q[x] = (_q < 6) ? 6 : _q;
//tc->q[x] = _q;
tc->i[x] = _i;
tc->d[x] = _d;
tc->c[x] = _c;
@ -120,13 +126,15 @@ int read_testcase(testcase *tc, FILE* ifp)
for (x = 0; x < tc->haplen; x++)
tc->ihap[x] = tc->hap[x];
free(q);
free(i);
free(d);
free(c);
free(line);
return 0;
}