1. Split main JNI function into initializeTestcases, compute_testcases

and releaseReads
2. FTZ enabled
3. Cleaner profiling code
This commit is contained in:
Karthik Gururaj 2014-02-06 14:35:32 -08:00
parent 166f91d698
commit b729fc0136
9 changed files with 211 additions and 150 deletions

View File

@ -1,6 +1,19 @@
#include "LoadTimeInitializer.h" #include "LoadTimeInitializer.h"
#include "utils.h" #include "utils.h"
using namespace std; using namespace std;
char* LoadTimeInitializerStatsNames[] =
{
"num_regions",
"num_reads",
"num_haplotypes",
"num_testcases",
"num_double_invocations",
"haplotype_length",
"readlength",
"product_read_length_haplotype_length",
"dummy"
};
LoadTimeInitializer g_load_time_initializer; LoadTimeInitializer g_load_time_initializer;
@ -16,25 +29,23 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
#else #else
cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n"; cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n";
#endif #endif
m_sumNumReads = 0; //Profiling: times for compute and transfer (either bytes copied or pointers copied)
m_sumSquareNumReads = 0;
m_sumNumHaplotypes = 0;
m_sumSquareNumHaplotypes = 0;
m_sumNumTestcases = 0;
m_sumNumDoubleTestcases = 0;
m_sumSquareNumTestcases = 0;
m_sumReadLengths = 0;
m_sumHaplotypeLengths = 0;
m_sumProductReadLengthHaplotypeLength = 0;
m_sumSquareProductReadLengthHaplotypeLength = 0;
m_maxNumTestcases = 0;
m_num_invocations = 0;
m_compute_time = 0; m_compute_time = 0;
m_data_transfer_time = 0; m_data_transfer_time = 0;
m_bytes_copied = 0; m_bytes_copied = 0;
//Initialize profiling counters
for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
{
m_sum_stats[i] = 0;
m_sum_square_stats[i] = 0;
m_max_stats[i] = 0;
m_min_stats[i] = 0xFFFFFFFFFFFFFFFFull;
}
//for debug dump
m_filename_to_fptr.clear(); m_filename_to_fptr.clear();
m_written_files_set.clear();
initialize_function_pointers(); initialize_function_pointers();
cout.flush(); cout.flush();
@ -42,25 +53,44 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
void LoadTimeInitializer::print_profiling() void LoadTimeInitializer::print_profiling()
{ {
double mean_val; double mean = 0;
cout << "Compute time "<<m_compute_time*1e-9<<"\n"; double variance = 0;
cout << "Data initialization time "<<m_data_transfer_time*1e-9<<"\n"; uint64_t denominator = 1;
cout <<"Invocations : "<<m_num_invocations<<"\n"; cout << "Time spent in compute_testcases "<<m_compute_time*1e-9<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n"; cout << "Time spent in data transfer (Java <--> C++) "<<m_data_transfer_time*1e-9<<"\n";
mean_val = m_sumNumReads/m_num_invocations;
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<< cout << "\nHC input stats\nstat_name,sum,sum_square,mean,variance,min,max\n";
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n"; for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
mean_val = m_sumNumHaplotypes/m_num_invocations; {
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<< cout << LoadTimeInitializerStatsNames[i];
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n"; cout << "," << m_sum_stats[i];
mean_val = m_sumNumTestcases/m_num_invocations; cout << "," << std::scientific << m_sum_square_stats[i];
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<< denominator = 1;
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\t"<<m_maxNumTestcases<<"\n"; switch(i)
mean_val = m_sumProductReadLengthHaplotypeLength/m_sumNumTestcases; {
cout <<"productReadLengthHaplotypeLength\t"<<m_sumProductReadLengthHaplotypeLength<<"\t"<<m_sumSquareProductReadLengthHaplotypeLength<<"\t" case NUM_READS_IDX:
<<mean_val<<"\t"<<(m_sumSquareProductReadLengthHaplotypeLength/m_sumNumTestcases)-mean_val*mean_val<<"\n"; case NUM_HAPLOTYPES_IDX:
cout <<"numDoubleTestcases\t"<<m_sumNumDoubleTestcases<<"\n"; case NUM_TESTCASES_IDX:
cout <<"numBytesCopied\t"<<m_bytes_copied<<"\n"; denominator = m_sum_stats[NUM_REGIONS_IDX];
break;
case HAPLOTYPE_LENGTH_IDX:
case READ_LENGTH_IDX:
case PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX:
denominator = m_sum_stats[NUM_TESTCASES_IDX];
break;
default:
denominator = 1;
break;
}
mean = ((double)m_sum_stats[i])/denominator;
cout << "," << std::scientific << mean;
variance = (m_sum_square_stats[i]/denominator) - (mean*mean); //E(X^2)-(E(X))^2
cout << "," << std::scientific << variance;
cout << "," << m_min_stats[i];
cout << "," << m_max_stats[i];
cout << "\n";
}
cout << "\n";
cout.flush(); cout.flush();
} }
@ -72,6 +102,12 @@ void LoadTimeInitializer::debug_dump(string filename, string s, bool to_append,
{ {
m_filename_to_fptr[filename] = new ofstream(); m_filename_to_fptr[filename] = new ofstream();
fptr = m_filename_to_fptr[filename]; fptr = m_filename_to_fptr[filename];
//File never seen before
if(m_written_files_set.find(filename) == m_written_files_set.end())
{
to_append = false;
m_written_files_set.insert(filename);
}
fptr->open(filename.c_str(), to_append ? ios::app : ios::out); fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
assert(fptr->is_open()); assert(fptr->is_open());
} }
@ -121,3 +157,12 @@ void LoadTimeInitializer::dump_sandbox(testcase& tc, unsigned tc_idx, unsigned n
dumpFptr << " "<< numReads << " "<<numHaplotypes; dumpFptr << " "<< numReads << " "<<numHaplotypes;
dumpFptr<<"\n"; dumpFptr<<"\n";
} }
void LoadTimeInitializer::update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value)
{
m_sum_stats[stat_idx] += value;
double v = value;
m_sum_square_stats[stat_idx] += (v*v);
m_max_stats[stat_idx] = std::max(m_max_stats[stat_idx], value);
m_min_stats[stat_idx] = std::min(m_min_stats[stat_idx], value);
}

View File

@ -3,6 +3,21 @@
#include "headers.h" #include "headers.h"
#include <jni.h> #include <jni.h>
#include "template.h" #include "template.h"
enum LoadTimeInitializerStatsEnum
{
NUM_REGIONS_IDX=0,
NUM_READS_IDX,
NUM_HAPLOTYPES_IDX,
NUM_TESTCASES_IDX,
NUM_DOUBLE_INVOCATIONS_IDX,
HAPLOTYPE_LENGTH_IDX,
READ_LENGTH_IDX,
PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX,
TOTAL_NUMBER_STATS
};
extern char* LoadTimeInitializerStatsNames[];
class LoadTimeInitializer class LoadTimeInitializer
{ {
public: public:
@ -21,20 +36,8 @@ class LoadTimeInitializer
jfieldID m_deletionGOPFID; jfieldID m_deletionGOPFID;
jfieldID m_overallGCPFID; jfieldID m_overallGCPFID;
jfieldID m_haplotypeBasesFID; jfieldID m_haplotypeBasesFID;
//used to compute avg, variance of #testcases //profiling - update stats
double m_sumNumReads; void update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value);
double m_sumSquareNumReads;
double m_sumNumHaplotypes;
double m_sumSquareNumHaplotypes;
double m_sumNumTestcases;
double m_sumSquareNumTestcases;
uint64_t m_sumNumDoubleTestcases;
uint64_t m_sumReadLengths;
uint64_t m_sumHaplotypeLengths;
uint64_t m_sumProductReadLengthHaplotypeLength;
double m_sumSquareProductReadLengthHaplotypeLength;
unsigned m_maxNumTestcases;
unsigned m_num_invocations;
//timing in nanoseconds //timing in nanoseconds
uint64_t m_compute_time; uint64_t m_compute_time;
uint64_t m_data_transfer_time; uint64_t m_data_transfer_time;
@ -42,7 +45,13 @@ class LoadTimeInitializer
uint64_t m_bytes_copied; uint64_t m_bytes_copied;
private: private:
std::map<std::string, std::ofstream*> m_filename_to_fptr; std::map<std::string, std::ofstream*> m_filename_to_fptr;
std::set<std::string> m_written_files_set;
std::ofstream m_sandbox_fptr; std::ofstream m_sandbox_fptr;
//used to compute various stats
uint64_t m_sum_stats[TOTAL_NUMBER_STATS];
double m_sum_square_stats[TOTAL_NUMBER_STATS];
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
}; };
extern LoadTimeInitializer g_load_time_initializer; extern LoadTimeInitializer g_load_time_initializer;

View File

@ -3,7 +3,7 @@
#include "utils.h" #include "utils.h"
template<class NUMBER> template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL) NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
{ {
int r, c; int r, c;
int ROWS = tc->rslen + 1; int ROWS = tc->rslen + 1;

View File

@ -21,6 +21,7 @@
#include <fstream> #include <fstream>
#include <vector> #include <vector>
#include <map> #include <map>
#include <set>
#include <cstdio> #include <cstdio>
#include <cstring> #include <cstring>
#include <cstdlib> #include <cstdlib>

View File

@ -5,8 +5,8 @@
/*#define ENABLE_ASSERTIONS 1*/ /*#define ENABLE_ASSERTIONS 1*/
#define DO_PROFILING 1 #define DO_PROFILING 1
/*#define DEBUG 1*/ /*#define DEBUG 1*/
//#define DEBUG0_1 1 /*#define DEBUG0_1 1*/
//#define DEBUG3 1 /*#define DEBUG3 1*/
/*#define DUMP_TO_SANDBOX 1*/ /*#define DUMP_TO_SANDBOX 1*/

View File

@ -92,45 +92,20 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true); g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
#endif #endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_sumHaplotypeLengths += haplotypeBasesLength; g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, haplotypeBasesLength);
g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0); g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0);
#endif #endif
} }
} }
//JNI function to invoke compute_full_prob_avx inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc (JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray,
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector, vector<testcase>& tc_array)
//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call
//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
{ {
#ifdef DEBUG0_1 jboolean is_copy = JNI_FALSE;
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
#endif
double start_time = 0;
//haplotype vector from earlier store - note the reference to vector, not copying //haplotype vector from earlier store - note the reference to vector, not copying
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
jboolean is_copy = JNI_FALSE;
unsigned numTestCases = numReads*numHaplotypes;
//vector to store results
vector<testcase> tc_array;
tc_array.clear();
tc_array.resize(numTestCases);
unsigned tc_idx = 0; unsigned tc_idx = 0;
//Store arrays for release later
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
readBasesArrayVector.clear();
readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING
start_time = get_time();
#endif
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.open_sandbox();
#endif
for(unsigned i=0;i<numReads;++i) for(unsigned i=0;i<numReads;++i)
{ {
//Get bytearray fields from read //Get bytearray fields from read
@ -157,6 +132,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy); jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0); g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0);
g_load_time_initializer.update_stat(READ_LENGTH_IDX, readLength);
#endif #endif
#ifdef ENABLE_ASSERTIONS #ifdef ENABLE_ASSERTIONS
assert(readBasesArray && "readBasesArray not initialized in JNI"); assert(readBasesArray && "readBasesArray not initialized in JNI");
@ -183,7 +159,6 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true); g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
} }
#endif #endif
for(unsigned j=0;j<numHaplotypes;++j) for(unsigned j=0;j<numHaplotypes;++j)
{ {
jsize haplotypeLength = (jsize)g_haplotypeBasesLengths[j]; jsize haplotypeLength = (jsize)g_haplotypeBasesLengths[j];
@ -197,15 +172,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
tc_array[tc_idx].d = (char*)deletionGOPArray; tc_array[tc_idx].d = (char*)deletionGOPArray;
tc_array[tc_idx].c = (char*)overallGCPArray; tc_array[tc_idx].c = (char*)overallGCPArray;
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_sumProductReadLengthHaplotypeLength += (readLength*haplotypeLength); g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, ((uint64_t)readLength)*((uint64_t)haplotypeLength));
g_load_time_initializer.m_sumSquareProductReadLengthHaplotypeLength += ((readLength*haplotypeLength)*(readLength*haplotypeLength));
#endif #endif
#ifdef DUMP_TO_SANDBOX #ifdef DUMP_TO_SANDBOX
g_load_time_initializer.dump_sandbox(tc_array[tc_idx], tc_idx, numReads, numHaplotypes); g_load_time_initializer.dump_sandbox(tc_array[tc_idx], tc_idx, numReads, numHaplotypes);
#endif #endif
++tc_idx; ++tc_idx;
} }
//Release read arrays at end because they are used by compute_full_prob //Store the read array references and release them at the end because they are used by compute_full_prob
//Maintain order in which GET_BYTE_ARRAY_ELEMENTS called //Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
readBasesArrayVector[i].clear(); readBasesArrayVector[i].clear();
readBasesArrayVector[i].resize(5); readBasesArrayVector[i].resize(5);
@ -214,13 +188,79 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray); readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray); readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray); readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
#ifdef DO_PROFILING
g_load_time_initializer.m_sumReadLengths += readLength;
#endif
} }
}
//Do compute over vector of testcase structs
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
unsigned maxNumThreadsToUse)
{
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += get_time(); g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
#endif
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
likelihoodDoubleArray[tc_idx] = result;
}
}
//Inform the Java VM that we no longer need access to the read arrays (and free memory)
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays
(JNIEnv* env, vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector)
{
//Release read arrays first
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
{
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
readBasesArrayVector[i].clear();
}
readBasesArrayVector.clear();
}
//JNI function to invoke compute_full_prob_avx
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call
//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
{
#ifdef DEBUG0_1
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
#endif
jboolean is_copy = JNI_FALSE;
struct timespec start_time;
unsigned numTestCases = numReads*numHaplotypes;
//vector to store testcases
vector<testcase> tc_array;
tc_array.clear();
tc_array.resize(numTestCases);
//Store read arrays for release later
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
readBasesArrayVector.clear();
readBasesArrayVector.resize(numReads);
#ifdef DUMP_TO_SANDBOX
g_load_time_initializer.open_sandbox();
#endif
#ifdef DO_PROFILING
get_time(&start_time);
#endif
//Copy byte array references from Java memory into vector of testcase structs
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector(env,
numReads, numHaplotypes, readDataArray, readBasesArrayVector, tc_array);
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
#endif #endif
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy); jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
@ -230,65 +270,29 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
#endif #endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0); g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
struct timespec prev_time; get_time(&start_time);
clock_gettime(CLOCK_REALTIME, &prev_time);
#endif #endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse) compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse);
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
#ifdef DO_PROFILING #ifdef DO_PROFILING
++(g_load_time_initializer.m_sumNumDoubleTestcases); g_load_time_initializer.m_compute_time += diff_time(start_time);
#endif
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
likelihoodDoubleArray[tc_idx] = result;
}
#ifdef DO_PROFILING
g_load_time_initializer.m_compute_time += diff_time(prev_time);
#endif #endif
#ifdef DEBUG #ifdef DEBUG
for(tc_idx=0;tc_idx<numTestCases;++tc_idx) for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true); g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
}
#endif #endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
start_time = get_time(); get_time(&start_time);
#endif #endif
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory (if copy made)
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays(env, readBasesArrayVector);
//Release read arrays first
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
{
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
readBasesArrayVector[i].clear();
}
readBasesArrayVector.clear();
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += get_time(); g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
g_load_time_initializer.update_stat(NUM_REGIONS_IDX, 1);
g_load_time_initializer.update_stat(NUM_READS_IDX, numReads);
g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, numHaplotypes);
g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, numTestCases);
#endif #endif
tc_array.clear(); tc_array.clear();
#ifdef DO_PROFILING
g_load_time_initializer.m_sumNumReads += numReads;
g_load_time_initializer.m_sumSquareNumReads += numReads*numReads;
g_load_time_initializer.m_sumNumHaplotypes += numHaplotypes;
g_load_time_initializer.m_sumSquareNumHaplotypes += numHaplotypes*numHaplotypes;
g_load_time_initializer.m_sumNumTestcases += numTestCases;
g_load_time_initializer.m_sumSquareNumTestcases += numTestCases*numTestCases;
g_load_time_initializer.m_maxNumTestcases = numTestCases > g_load_time_initializer.m_maxNumTestcases ? numTestCases
: g_load_time_initializer.m_maxNumTestcases;
++(g_load_time_initializer.m_num_invocations);
#endif
#ifdef DEBUG
g_load_time_initializer.debug_close();
#endif
#ifdef DUMP_TO_SANDBOX #ifdef DUMP_TO_SANDBOX
g_load_time_initializer.close_sandbox(); g_load_time_initializer.close_sandbox();
#endif #endif

View File

@ -14,8 +14,6 @@ int main(int argc, char** argv)
cerr << "Needs path to input file as argument\n"; cerr << "Needs path to input file as argument\n";
exit(0); exit(0);
} }
do_compute(argv[1]);
return 0;
bool use_old_read_testcase = false; bool use_old_read_testcase = false;
if(argc >= 3 && string(argv[2]) == "1") if(argc >= 3 && string(argv[2]) == "1")
use_old_read_testcase = true; use_old_read_testcase = true;

View File

@ -2,6 +2,7 @@
#include "template.h" #include "template.h"
#include "utils.h" #include "utils.h"
#include "vector_defs.h" #include "vector_defs.h"
#include "LoadTimeInitializer.h"
uint8_t ConvertChar::conversionTable[255]; uint8_t ConvertChar::conversionTable[255];
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0; float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
@ -271,15 +272,9 @@ double getCurrClk() {
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
} }
uint64_t get_time(struct timespec* store_struct) void get_time(struct timespec* store_struct)
{ {
static struct timespec start_time; clock_gettime(CLOCK_REALTIME, store_struct);
struct timespec curr_time;
struct timespec* ptr = (store_struct == 0) ? &curr_time : store_struct;
clock_gettime(CLOCK_REALTIME, ptr);
uint64_t diff_time = (ptr->tv_sec-start_time.tv_sec)*1000000000+(ptr->tv_nsec-start_time.tv_nsec);
start_time = *ptr;
return diff_time;
} }
uint64_t diff_time(struct timespec& prev_time) uint64_t diff_time(struct timespec& prev_time)
@ -289,6 +284,7 @@ uint64_t diff_time(struct timespec& prev_time)
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec)); return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
} }
//#define DUMP_COMPUTE_VALUES 1
#define CHECK_VALUES 1 #define CHECK_VALUES 1
#define BATCH_SIZE 10000 #define BATCH_SIZE 10000
#define RUN_HYBRID #define RUN_HYBRID
@ -329,7 +325,8 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
baseline_results_vec.clear(); baseline_results_vec.clear();
results_vec.resize(tc_vector.size()); results_vec.resize(tc_vector.size());
baseline_results_vec.resize(tc_vector.size()); baseline_results_vec.resize(tc_vector.size());
get_time(); struct timespec start_time;
get_time(&start_time);
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12) #pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<tc_vector.size();++i)
{ {
@ -343,10 +340,14 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
} }
else else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f))); result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
#ifdef DUMP_COMPUTE_VALUES
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
#endif
results_vec[i] = result; results_vec[i] = result;
} }
vector_compute_time += get_time(); vector_compute_time += diff_time(start_time);
#ifdef CHECK_VALUES #ifdef CHECK_VALUES
get_time(&start_time);
#pragma omp parallel for schedule(dynamic,chunk_size) #pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<tc_vector.size();++i)
{ {
@ -355,7 +356,7 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0)); baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
baseline_results_vec[i] = baseline_result; baseline_results_vec[i] = baseline_result;
} }
baseline_compute_time += get_time(); baseline_compute_time += diff_time(start_time);
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<tc_vector.size();++i)
{ {
double baseline_result = baseline_results_vec[i]; double baseline_result = baseline_results_vec[i];
@ -383,6 +384,9 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
if(break_value < 0) if(break_value < 0)
break; break;
} }
#ifdef DUMP_COMPUTE_VALUES
g_load_time_initializer.debug_close();
#endif
if(all_ok) if(all_ok)
{ {
cout << "All output values within acceptable error\n"; cout << "All output values within acceptable error\n";

View File

@ -26,7 +26,7 @@ void debug_dump(std::string filename, std::string s, bool to_append, bool add_ne
template<class NUMBER> template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
double getCurrClk(); double getCurrClk();
uint64_t get_time(struct timespec* x=0); void get_time(struct timespec* x);
uint64_t diff_time(struct timespec& prev_time); uint64_t diff_time(struct timespec& prev_time);
//bit 0 is sse4.2, bit 1 is AVX //bit 0 is sse4.2, bit 1 is AVX