1. Split main JNI function into initializeTestcases, compute_testcases
and releaseReads 2. FTZ enabled 3. Cleaner profiling code
This commit is contained in:
parent
166f91d698
commit
b729fc0136
|
|
@ -1,6 +1,19 @@
|
|||
#include "LoadTimeInitializer.h"
|
||||
#include "utils.h"
|
||||
using namespace std;
|
||||
char* LoadTimeInitializerStatsNames[] =
|
||||
{
|
||||
"num_regions",
|
||||
"num_reads",
|
||||
"num_haplotypes",
|
||||
"num_testcases",
|
||||
"num_double_invocations",
|
||||
"haplotype_length",
|
||||
"readlength",
|
||||
"product_read_length_haplotype_length",
|
||||
"dummy"
|
||||
};
|
||||
|
||||
|
||||
LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
|
@ -16,25 +29,23 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
|||
#else
|
||||
cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n";
|
||||
#endif
|
||||
m_sumNumReads = 0;
|
||||
m_sumSquareNumReads = 0;
|
||||
m_sumNumHaplotypes = 0;
|
||||
m_sumSquareNumHaplotypes = 0;
|
||||
m_sumNumTestcases = 0;
|
||||
m_sumNumDoubleTestcases = 0;
|
||||
m_sumSquareNumTestcases = 0;
|
||||
m_sumReadLengths = 0;
|
||||
m_sumHaplotypeLengths = 0;
|
||||
m_sumProductReadLengthHaplotypeLength = 0;
|
||||
m_sumSquareProductReadLengthHaplotypeLength = 0;
|
||||
m_maxNumTestcases = 0;
|
||||
m_num_invocations = 0;
|
||||
|
||||
//Profiling: times for compute and transfer (either bytes copied or pointers copied)
|
||||
m_compute_time = 0;
|
||||
m_data_transfer_time = 0;
|
||||
m_bytes_copied = 0;
|
||||
|
||||
//Initialize profiling counters
|
||||
for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
|
||||
{
|
||||
m_sum_stats[i] = 0;
|
||||
m_sum_square_stats[i] = 0;
|
||||
m_max_stats[i] = 0;
|
||||
m_min_stats[i] = 0xFFFFFFFFFFFFFFFFull;
|
||||
}
|
||||
|
||||
//for debug dump
|
||||
m_filename_to_fptr.clear();
|
||||
m_written_files_set.clear();
|
||||
|
||||
initialize_function_pointers();
|
||||
cout.flush();
|
||||
|
|
@ -42,25 +53,44 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
|||
|
||||
void LoadTimeInitializer::print_profiling()
|
||||
{
|
||||
double mean_val;
|
||||
cout << "Compute time "<<m_compute_time*1e-9<<"\n";
|
||||
cout << "Data initialization time "<<m_data_transfer_time*1e-9<<"\n";
|
||||
cout <<"Invocations : "<<m_num_invocations<<"\n";
|
||||
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
|
||||
mean_val = m_sumNumReads/m_num_invocations;
|
||||
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
mean_val = m_sumNumHaplotypes/m_num_invocations;
|
||||
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
|
||||
mean_val = m_sumNumTestcases/m_num_invocations;
|
||||
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
|
||||
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\t"<<m_maxNumTestcases<<"\n";
|
||||
mean_val = m_sumProductReadLengthHaplotypeLength/m_sumNumTestcases;
|
||||
cout <<"productReadLengthHaplotypeLength\t"<<m_sumProductReadLengthHaplotypeLength<<"\t"<<m_sumSquareProductReadLengthHaplotypeLength<<"\t"
|
||||
<<mean_val<<"\t"<<(m_sumSquareProductReadLengthHaplotypeLength/m_sumNumTestcases)-mean_val*mean_val<<"\n";
|
||||
cout <<"numDoubleTestcases\t"<<m_sumNumDoubleTestcases<<"\n";
|
||||
cout <<"numBytesCopied\t"<<m_bytes_copied<<"\n";
|
||||
double mean = 0;
|
||||
double variance = 0;
|
||||
uint64_t denominator = 1;
|
||||
cout << "Time spent in compute_testcases "<<m_compute_time*1e-9<<"\n";
|
||||
cout << "Time spent in data transfer (Java <--> C++) "<<m_data_transfer_time*1e-9<<"\n";
|
||||
|
||||
cout << "\nHC input stats\nstat_name,sum,sum_square,mean,variance,min,max\n";
|
||||
for(unsigned i=0;i<TOTAL_NUMBER_STATS;++i)
|
||||
{
|
||||
cout << LoadTimeInitializerStatsNames[i];
|
||||
cout << "," << m_sum_stats[i];
|
||||
cout << "," << std::scientific << m_sum_square_stats[i];
|
||||
denominator = 1;
|
||||
switch(i)
|
||||
{
|
||||
case NUM_READS_IDX:
|
||||
case NUM_HAPLOTYPES_IDX:
|
||||
case NUM_TESTCASES_IDX:
|
||||
denominator = m_sum_stats[NUM_REGIONS_IDX];
|
||||
break;
|
||||
case HAPLOTYPE_LENGTH_IDX:
|
||||
case READ_LENGTH_IDX:
|
||||
case PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX:
|
||||
denominator = m_sum_stats[NUM_TESTCASES_IDX];
|
||||
break;
|
||||
default:
|
||||
denominator = 1;
|
||||
break;
|
||||
}
|
||||
mean = ((double)m_sum_stats[i])/denominator;
|
||||
cout << "," << std::scientific << mean;
|
||||
variance = (m_sum_square_stats[i]/denominator) - (mean*mean); //E(X^2)-(E(X))^2
|
||||
cout << "," << std::scientific << variance;
|
||||
cout << "," << m_min_stats[i];
|
||||
cout << "," << m_max_stats[i];
|
||||
cout << "\n";
|
||||
}
|
||||
cout << "\n";
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
|
|
@ -72,6 +102,12 @@ void LoadTimeInitializer::debug_dump(string filename, string s, bool to_append,
|
|||
{
|
||||
m_filename_to_fptr[filename] = new ofstream();
|
||||
fptr = m_filename_to_fptr[filename];
|
||||
//File never seen before
|
||||
if(m_written_files_set.find(filename) == m_written_files_set.end())
|
||||
{
|
||||
to_append = false;
|
||||
m_written_files_set.insert(filename);
|
||||
}
|
||||
fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
|
||||
assert(fptr->is_open());
|
||||
}
|
||||
|
|
@ -121,3 +157,12 @@ void LoadTimeInitializer::dump_sandbox(testcase& tc, unsigned tc_idx, unsigned n
|
|||
dumpFptr << " "<< numReads << " "<<numHaplotypes;
|
||||
dumpFptr<<"\n";
|
||||
}
|
||||
|
||||
void LoadTimeInitializer::update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value)
|
||||
{
|
||||
m_sum_stats[stat_idx] += value;
|
||||
double v = value;
|
||||
m_sum_square_stats[stat_idx] += (v*v);
|
||||
m_max_stats[stat_idx] = std::max(m_max_stats[stat_idx], value);
|
||||
m_min_stats[stat_idx] = std::min(m_min_stats[stat_idx], value);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,21 @@
|
|||
#include "headers.h"
|
||||
#include <jni.h>
|
||||
#include "template.h"
|
||||
|
||||
enum LoadTimeInitializerStatsEnum
|
||||
{
|
||||
NUM_REGIONS_IDX=0,
|
||||
NUM_READS_IDX,
|
||||
NUM_HAPLOTYPES_IDX,
|
||||
NUM_TESTCASES_IDX,
|
||||
NUM_DOUBLE_INVOCATIONS_IDX,
|
||||
HAPLOTYPE_LENGTH_IDX,
|
||||
READ_LENGTH_IDX,
|
||||
PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX,
|
||||
TOTAL_NUMBER_STATS
|
||||
};
|
||||
extern char* LoadTimeInitializerStatsNames[];
|
||||
|
||||
class LoadTimeInitializer
|
||||
{
|
||||
public:
|
||||
|
|
@ -21,20 +36,8 @@ class LoadTimeInitializer
|
|||
jfieldID m_deletionGOPFID;
|
||||
jfieldID m_overallGCPFID;
|
||||
jfieldID m_haplotypeBasesFID;
|
||||
//used to compute avg, variance of #testcases
|
||||
double m_sumNumReads;
|
||||
double m_sumSquareNumReads;
|
||||
double m_sumNumHaplotypes;
|
||||
double m_sumSquareNumHaplotypes;
|
||||
double m_sumNumTestcases;
|
||||
double m_sumSquareNumTestcases;
|
||||
uint64_t m_sumNumDoubleTestcases;
|
||||
uint64_t m_sumReadLengths;
|
||||
uint64_t m_sumHaplotypeLengths;
|
||||
uint64_t m_sumProductReadLengthHaplotypeLength;
|
||||
double m_sumSquareProductReadLengthHaplotypeLength;
|
||||
unsigned m_maxNumTestcases;
|
||||
unsigned m_num_invocations;
|
||||
//profiling - update stats
|
||||
void update_stat(LoadTimeInitializerStatsEnum stat_idx, uint64_t value);
|
||||
//timing in nanoseconds
|
||||
uint64_t m_compute_time;
|
||||
uint64_t m_data_transfer_time;
|
||||
|
|
@ -42,7 +45,13 @@ class LoadTimeInitializer
|
|||
uint64_t m_bytes_copied;
|
||||
private:
|
||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||
std::set<std::string> m_written_files_set;
|
||||
std::ofstream m_sandbox_fptr;
|
||||
//used to compute various stats
|
||||
uint64_t m_sum_stats[TOTAL_NUMBER_STATS];
|
||||
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
||||
};
|
||||
extern LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include "utils.h"
|
||||
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log = NULL)
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||
{
|
||||
int r, c;
|
||||
int ROWS = tc->rslen + 1;
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@
|
|||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include <cstdlib>
|
||||
|
|
|
|||
|
|
@ -5,8 +5,8 @@
|
|||
/*#define ENABLE_ASSERTIONS 1*/
|
||||
#define DO_PROFILING 1
|
||||
/*#define DEBUG 1*/
|
||||
//#define DEBUG0_1 1
|
||||
//#define DEBUG3 1
|
||||
/*#define DEBUG0_1 1*/
|
||||
/*#define DEBUG3 1*/
|
||||
/*#define DUMP_TO_SANDBOX 1*/
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -92,45 +92,20 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_sumHaplotypeLengths += haplotypeBasesLength;
|
||||
g_load_time_initializer.update_stat(HAPLOTYPE_LENGTH_IDX, haplotypeBasesLength);
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? haplotypeBasesLength : 0);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//JNI function to invoke compute_full_prob_avx
|
||||
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
||||
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
||||
//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call
|
||||
//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
|
||||
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
|
||||
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector
|
||||
(JNIEnv* env, jint numReads, jint numHaplotypes, jobjectArray& readDataArray,
|
||||
vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector, vector<testcase>& tc_array)
|
||||
{
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
|
||||
#endif
|
||||
double start_time = 0;
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
//haplotype vector from earlier store - note the reference to vector, not copying
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
|
||||
unsigned numTestCases = numReads*numHaplotypes;
|
||||
//vector to store results
|
||||
vector<testcase> tc_array;
|
||||
tc_array.clear();
|
||||
tc_array.resize(numTestCases);
|
||||
unsigned tc_idx = 0;
|
||||
//Store arrays for release later
|
||||
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
|
||||
readBasesArrayVector.clear();
|
||||
readBasesArrayVector.resize(numReads);
|
||||
#ifdef DO_PROFILING
|
||||
start_time = get_time();
|
||||
#endif
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.open_sandbox();
|
||||
#endif
|
||||
for(unsigned i=0;i<numReads;++i)
|
||||
{
|
||||
//Get bytearray fields from read
|
||||
|
|
@ -157,6 +132,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
jbyte* overallGCPArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(overallGCP, &is_copy);
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? readLength*5 : 0);
|
||||
g_load_time_initializer.update_stat(READ_LENGTH_IDX, readLength);
|
||||
#endif
|
||||
#ifdef ENABLE_ASSERTIONS
|
||||
assert(readBasesArray && "readBasesArray not initialized in JNI");
|
||||
|
|
@ -183,7 +159,6 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
g_load_time_initializer.debug_dump("reads_jni.txt",to_string((int)overallGCPArray[j]),true);
|
||||
}
|
||||
#endif
|
||||
|
||||
for(unsigned j=0;j<numHaplotypes;++j)
|
||||
{
|
||||
jsize haplotypeLength = (jsize)g_haplotypeBasesLengths[j];
|
||||
|
|
@ -197,15 +172,14 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
tc_array[tc_idx].d = (char*)deletionGOPArray;
|
||||
tc_array[tc_idx].c = (char*)overallGCPArray;
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_sumProductReadLengthHaplotypeLength += (readLength*haplotypeLength);
|
||||
g_load_time_initializer.m_sumSquareProductReadLengthHaplotypeLength += ((readLength*haplotypeLength)*(readLength*haplotypeLength));
|
||||
g_load_time_initializer.update_stat(PRODUCT_READ_LENGTH_HAPLOTYPE_LENGTH_IDX, ((uint64_t)readLength)*((uint64_t)haplotypeLength));
|
||||
#endif
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.dump_sandbox(tc_array[tc_idx], tc_idx, numReads, numHaplotypes);
|
||||
#endif
|
||||
++tc_idx;
|
||||
}
|
||||
//Release read arrays at end because they are used by compute_full_prob
|
||||
//Store the read array references and release them at the end because they are used by compute_full_prob
|
||||
//Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
|
||||
readBasesArrayVector[i].clear();
|
||||
readBasesArrayVector[i].resize(5);
|
||||
|
|
@ -214,13 +188,79 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
|
||||
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
|
||||
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_sumReadLengths += readLength;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
//Do compute over vector of testcase structs
|
||||
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
|
||||
unsigned maxNumThreadsToUse)
|
||||
{
|
||||
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += get_time();
|
||||
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
}
|
||||
|
||||
//Inform the Java VM that we no longer need access to the read arrays (and free memory)
|
||||
inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays
|
||||
(JNIEnv* env, vector<vector<pair<jbyteArray,jbyte*> > >& readBasesArrayVector)
|
||||
{
|
||||
//Release read arrays first
|
||||
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
|
||||
{
|
||||
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
|
||||
readBasesArrayVector[i].clear();
|
||||
}
|
||||
readBasesArrayVector.clear();
|
||||
}
|
||||
|
||||
//JNI function to invoke compute_full_prob_avx
|
||||
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
||||
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
||||
//likelihoodArray - array of doubles to return results back to Java. Memory allocated by Java prior to JNI call
|
||||
//maxNumThreadsToUse - Max number of threads that OpenMP can use for the HMM computation
|
||||
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods
|
||||
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
|
||||
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
|
||||
{
|
||||
#ifdef DEBUG0_1
|
||||
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
|
||||
#endif
|
||||
jboolean is_copy = JNI_FALSE;
|
||||
struct timespec start_time;
|
||||
unsigned numTestCases = numReads*numHaplotypes;
|
||||
//vector to store testcases
|
||||
vector<testcase> tc_array;
|
||||
tc_array.clear();
|
||||
tc_array.resize(numTestCases);
|
||||
//Store read arrays for release later
|
||||
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
|
||||
readBasesArrayVector.clear();
|
||||
readBasesArrayVector.resize(numReads);
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.open_sandbox();
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
//Copy byte array references from Java memory into vector of testcase structs
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeTestcasesVector(env,
|
||||
numReads, numHaplotypes, readDataArray, readBasesArrayVector, tc_array);
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
|
||||
#endif
|
||||
|
||||
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
|
||||
|
|
@ -230,65 +270,29 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
||||
struct timespec prev_time;
|
||||
clock_gettime(CLOCK_REALTIME, &prev_time);
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
compute_testcases(tc_array, numTestCases, likelihoodDoubleArray, maxNumThreadsToUse);
|
||||
#ifdef DO_PROFILING
|
||||
++(g_load_time_initializer.m_sumNumDoubleTestcases);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_compute_time += diff_time(prev_time);
|
||||
g_load_time_initializer.m_compute_time += diff_time(start_time);
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
|
||||
}
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
start_time = get_time();
|
||||
get_time(&start_time);
|
||||
#endif
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory
|
||||
|
||||
//Release read arrays first
|
||||
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
|
||||
{
|
||||
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
|
||||
readBasesArrayVector[i].clear();
|
||||
}
|
||||
readBasesArrayVector.clear();
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory (if copy made)
|
||||
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniReleaseReadArrays(env, readBasesArrayVector);
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += get_time();
|
||||
g_load_time_initializer.m_data_transfer_time += diff_time(start_time);
|
||||
g_load_time_initializer.update_stat(NUM_REGIONS_IDX, 1);
|
||||
g_load_time_initializer.update_stat(NUM_READS_IDX, numReads);
|
||||
g_load_time_initializer.update_stat(NUM_HAPLOTYPES_IDX, numHaplotypes);
|
||||
g_load_time_initializer.update_stat(NUM_TESTCASES_IDX, numTestCases);
|
||||
#endif
|
||||
tc_array.clear();
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_sumNumReads += numReads;
|
||||
g_load_time_initializer.m_sumSquareNumReads += numReads*numReads;
|
||||
g_load_time_initializer.m_sumNumHaplotypes += numHaplotypes;
|
||||
g_load_time_initializer.m_sumSquareNumHaplotypes += numHaplotypes*numHaplotypes;
|
||||
g_load_time_initializer.m_sumNumTestcases += numTestCases;
|
||||
g_load_time_initializer.m_sumSquareNumTestcases += numTestCases*numTestCases;
|
||||
g_load_time_initializer.m_maxNumTestcases = numTestCases > g_load_time_initializer.m_maxNumTestcases ? numTestCases
|
||||
: g_load_time_initializer.m_maxNumTestcases;
|
||||
++(g_load_time_initializer.m_num_invocations);
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
#ifdef DUMP_TO_SANDBOX
|
||||
g_load_time_initializer.close_sandbox();
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -14,8 +14,6 @@ int main(int argc, char** argv)
|
|||
cerr << "Needs path to input file as argument\n";
|
||||
exit(0);
|
||||
}
|
||||
do_compute(argv[1]);
|
||||
return 0;
|
||||
bool use_old_read_testcase = false;
|
||||
if(argc >= 3 && string(argv[2]) == "1")
|
||||
use_old_read_testcase = true;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "vector_defs.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
|
||||
uint8_t ConvertChar::conversionTable[255];
|
||||
float (*g_compute_full_prob_float)(testcase *tc, float* before_last_log) = 0;
|
||||
|
|
@ -271,15 +272,9 @@ double getCurrClk() {
|
|||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
uint64_t get_time(struct timespec* store_struct)
|
||||
void get_time(struct timespec* store_struct)
|
||||
{
|
||||
static struct timespec start_time;
|
||||
struct timespec curr_time;
|
||||
struct timespec* ptr = (store_struct == 0) ? &curr_time : store_struct;
|
||||
clock_gettime(CLOCK_REALTIME, ptr);
|
||||
uint64_t diff_time = (ptr->tv_sec-start_time.tv_sec)*1000000000+(ptr->tv_nsec-start_time.tv_nsec);
|
||||
start_time = *ptr;
|
||||
return diff_time;
|
||||
clock_gettime(CLOCK_REALTIME, store_struct);
|
||||
}
|
||||
|
||||
uint64_t diff_time(struct timespec& prev_time)
|
||||
|
|
@ -289,6 +284,7 @@ uint64_t diff_time(struct timespec& prev_time)
|
|||
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
||||
}
|
||||
|
||||
//#define DUMP_COMPUTE_VALUES 1
|
||||
#define CHECK_VALUES 1
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
|
|
@ -329,7 +325,8 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
|
|||
baseline_results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
baseline_results_vec.resize(tc_vector.size());
|
||||
get_time();
|
||||
struct timespec start_time;
|
||||
get_time(&start_time);
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
|
|
@ -343,10 +340,14 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
|
|||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
||||
#endif
|
||||
results_vec[i] = result;
|
||||
}
|
||||
vector_compute_time += get_time();
|
||||
vector_compute_time += diff_time(start_time);
|
||||
#ifdef CHECK_VALUES
|
||||
get_time(&start_time);
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
|
|
@ -355,7 +356,7 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
|
|||
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
|
||||
baseline_results_vec[i] = baseline_result;
|
||||
}
|
||||
baseline_compute_time += get_time();
|
||||
baseline_compute_time += diff_time(start_time);
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
double baseline_result = baseline_results_vec[i];
|
||||
|
|
@ -383,6 +384,9 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
|
|||
if(break_value < 0)
|
||||
break;
|
||||
}
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_close();
|
||||
#endif
|
||||
if(all_ok)
|
||||
{
|
||||
cout << "All output values within acceptable error\n";
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ void debug_dump(std::string filename, std::string s, bool to_append, bool add_ne
|
|||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
double getCurrClk();
|
||||
uint64_t get_time(struct timespec* x=0);
|
||||
void get_time(struct timespec* x);
|
||||
uint64_t diff_time(struct timespec& prev_time);
|
||||
|
||||
//bit 0 is sse4.2, bit 1 is AVX
|
||||
|
|
|
|||
Loading…
Reference in New Issue