Now has PAPI values
This commit is contained in:
parent
e32e9e6af6
commit
15fe244e4b
|
|
@ -1043,7 +1043,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
|||
referenceConfidenceModel.close();
|
||||
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
||||
//TODO (open & close) at the walker, not the engine.
|
||||
likelihoodCalculationEngine.close();
|
||||
likelihoodCalculationEngine.close();
|
||||
logger.info("Ran local assembly on " + result + " active regions");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -75,8 +75,9 @@ import java.io.IOException;
|
|||
*/
|
||||
public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||
|
||||
private static final boolean dumpSandboxOnly = false; //simulates ifdef
|
||||
private static final boolean debug = false; //simulates ifdef
|
||||
private static final boolean verify = debug || true; //simulates ifdef
|
||||
private static final boolean verify = !dumpSandboxOnly && (debug || true); //simulates ifdef
|
||||
private static final boolean debug0_1 = false; //simulates ifdef
|
||||
private static final boolean debug1 = false; //simulates ifdef
|
||||
private static final boolean debug2 = false;
|
||||
|
|
@ -134,9 +135,11 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
|
||||
final int readMaxLength, final int haplotypeMaxLength ) {
|
||||
if(verify)
|
||||
{
|
||||
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
|
||||
jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||
haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -145,7 +148,8 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
@Override
|
||||
public void finalizeRegion()
|
||||
{
|
||||
jniPairHMM.finalizeRegion();
|
||||
if(!dumpSandboxOnly)
|
||||
jniPairHMM.finalizeRegion();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -204,46 +208,61 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
++idx;
|
||||
}
|
||||
}
|
||||
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
double[] likelihoodArray = jniPairHMM.getLikelihoodArray();
|
||||
//to compare values
|
||||
final PerReadAlleleLikelihoodMap likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
double[] likelihoodArray = null;
|
||||
PerReadAlleleLikelihoodMap likelihoodMap = null;
|
||||
if(verify)
|
||||
{
|
||||
//re-order values in likelihoodArray
|
||||
double[] tmpArray = new double[numHaplotypes];
|
||||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
likelihoodArray = jniPairHMM.getLikelihoodArray();
|
||||
//to compare values
|
||||
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||
}
|
||||
else
|
||||
{
|
||||
likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||
likelihoodArray = new double[numTestcases];
|
||||
for(int i=0;i<numTestcases;++i)
|
||||
likelihoodArray[i] = -0.5;
|
||||
}
|
||||
if(verify || dumpSandboxOnly)
|
||||
{
|
||||
boolean toDump = dumpSandboxOnly ? true : false;
|
||||
if(verify)
|
||||
{
|
||||
for(int j=0;j<numHaplotypes;++j)
|
||||
tmpArray[j] = likelihoodArray[readIdx+j];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
//re-order values in likelihoodArray
|
||||
double[] tmpArray = new double[numHaplotypes];
|
||||
idx = 0;
|
||||
int idxInsideHaplotypeList = 0;
|
||||
int readIdx = 0;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
||||
++idx;
|
||||
for(int j=0;j<numHaplotypes;++j)
|
||||
tmpArray[j] = likelihoodArray[readIdx+j];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||
{
|
||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
||||
++idx;
|
||||
}
|
||||
readIdx += numHaplotypes;
|
||||
}
|
||||
readIdx += numHaplotypes;
|
||||
}
|
||||
//for floating point values, no exact equality
|
||||
//check whether numbers are close in terms of abs_error or relative_error
|
||||
//For very large values, relative_error is relevant
|
||||
//For very small values, abs_error is relevant
|
||||
boolean toDump = false;
|
||||
for(int i=0;i<likelihoodArray.length;++i)
|
||||
{
|
||||
double abs_error = Math.abs(likelihoodArray[i] - mLikelihoodArray[i]);
|
||||
double relative_error = 0;
|
||||
if(mLikelihoodArray[i] == 0)
|
||||
relative_error = 0;
|
||||
else
|
||||
relative_error = Math.abs(abs_error/mLikelihoodArray[i]);
|
||||
if(abs_error > 1e-5 && relative_error > 1e-5)
|
||||
//for floating point values, no exact equality
|
||||
//check whether numbers are close in terms of abs_error or relative_error
|
||||
//For very large values, relative_error is relevant
|
||||
//For very small values, abs_error is relevant
|
||||
for(int i=0;i<likelihoodArray.length;++i)
|
||||
{
|
||||
toDump = true;
|
||||
break;
|
||||
double abs_error = Math.abs(likelihoodArray[i] - mLikelihoodArray[i]);
|
||||
double relative_error = 0;
|
||||
if(mLikelihoodArray[i] == 0)
|
||||
relative_error = 0;
|
||||
else
|
||||
relative_error = Math.abs(abs_error/mLikelihoodArray[i]);
|
||||
if(abs_error > 1e-6 && relative_error > 1e-6)
|
||||
{
|
||||
toDump = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
//if numbers are not close, then dump out the data that produced the inconsistency
|
||||
|
|
@ -251,24 +270,40 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
|||
{
|
||||
idx = 0;
|
||||
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||
boolean firstLine = true;
|
||||
for(GATKSAMRecord read : reads)
|
||||
{
|
||||
byte [] overallGCP = GCPArrayMap.get(read);
|
||||
byte[] tmpByteArray = new byte[read.getReadBases().length];
|
||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||
{
|
||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
|
||||
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseQualities()[k]))+" ",true);
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseInsertionQualities()[k]))+" ",true);
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseInsertionQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseDeletionQualities()[k]))+" ",true);
|
||||
tmpByteArray[k] = (byte)((int)((read.getBaseDeletionQualities())[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||
for(int k=0;k<read.getReadBases().length;++k)
|
||||
debugDump("debug_dump.txt",String.format("%d",(int)(overallGCP[k]))+" ",true);
|
||||
debugDump("debug_dump.txt","\n",true);
|
||||
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
||||
tmpByteArray[k] = (byte)((int)(overallGCP[k]) + 33);
|
||||
debugDump("debug_dump.txt",new String(tmpByteArray),true);
|
||||
if(firstLine)
|
||||
{
|
||||
debugDump("debug_dump.txt",String.format(" %d %d\n",readListSize, numHaplotypes), true);
|
||||
firstLine = false;
|
||||
}
|
||||
else
|
||||
debugDump("debug_dump.txt","\n",true);
|
||||
if(verify)
|
||||
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
||||
else
|
||||
if(dumpSandboxOnly)
|
||||
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
|
||||
++idx;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,3 +12,5 @@ reformat
|
|||
subdir_checkout.sh
|
||||
avx/
|
||||
sse/
|
||||
triplicate.sh
|
||||
|
||||
|
|
|
|||
|
|
@ -47,7 +47,13 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
|||
m_filename_to_fptr.clear();
|
||||
m_written_files_set.clear();
|
||||
|
||||
//Common buffer - 8MB
|
||||
unsigned size = 1024*1024;
|
||||
m_buffer = new uint64_t[size];
|
||||
m_buffer_size = size*sizeof(uint64_t);
|
||||
|
||||
initialize_function_pointers();
|
||||
|
||||
cout.flush();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ class LoadTimeInitializer
|
|||
{
|
||||
public:
|
||||
LoadTimeInitializer(); //will be called when library is loaded
|
||||
~LoadTimeInitializer() { delete m_buffer; }
|
||||
void print_profiling();
|
||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||
void debug_close();
|
||||
|
|
@ -43,6 +44,8 @@ class LoadTimeInitializer
|
|||
uint64_t m_data_transfer_time;
|
||||
//bytes copied
|
||||
uint64_t m_bytes_copied;
|
||||
unsigned get_buffer_size() { return m_buffer_size; }
|
||||
char* get_buffer() { return (char*)m_buffer; }
|
||||
private:
|
||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||
std::set<std::string> m_written_files_set;
|
||||
|
|
@ -52,6 +55,8 @@ class LoadTimeInitializer
|
|||
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
||||
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
||||
unsigned m_buffer_size;
|
||||
uint64_t* m_buffer;
|
||||
};
|
||||
extern LoadTimeInitializer g_load_time_initializer;
|
||||
|
||||
|
|
|
|||
|
|
@ -73,7 +73,9 @@ JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
|
|||
(JNIEnv* env, jobject thisObject, jstring fileNameString)
|
||||
{
|
||||
const char* fileName = env->GetStringUTFChars(fileNameString, 0);
|
||||
do_compute((char*)fileName);
|
||||
char local_array[800];
|
||||
strncpy(local_array, fileName, 200);
|
||||
env->ReleaseStringUTFChars(fileNameString, fileName);
|
||||
do_compute(local_array, true, 10000, false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#include "headers.h"
|
||||
#include "template.h"
|
||||
#include "utils.h"
|
||||
#include "LoadTimeInitializer.h"
|
||||
|
||||
using namespace std;
|
||||
template<class NUMBER>
|
||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||
|
|
@ -10,12 +12,28 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
|||
int COLS = tc->haplen + 1;
|
||||
|
||||
Context<NUMBER> ctx;
|
||||
|
||||
//#define USE_STACK_ALLOCATION 1
|
||||
#ifdef USE_STACK_ALLOCATION
|
||||
NUMBER M[ROWS][COLS];
|
||||
NUMBER X[ROWS][COLS];
|
||||
NUMBER Y[ROWS][COLS];
|
||||
NUMBER p[ROWS][6];
|
||||
#else
|
||||
//allocate on heap in way that simulates a 2D array. Having a 2D array instead of
|
||||
//a straightforward array of pointers ensures that all data lies 'close' in memory, increasing
|
||||
//the chance of being stored together in the cache. Also, prefetchers can learn memory access
|
||||
//patterns for 2D arrays, not possible for array of pointers
|
||||
//bool locally_allocated = false;
|
||||
//NUMBER* common_buffer = 0;
|
||||
NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6);
|
||||
//if(true)
|
||||
//{
|
||||
//common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||
//locally_allocated = true;
|
||||
//}
|
||||
//else
|
||||
//common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer());
|
||||
//pointers to within the allocated buffer
|
||||
NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS];
|
||||
NUMBER* ptr = common_buffer;
|
||||
|
|
@ -25,14 +43,11 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
|||
for(;i<4*ROWS;++i, ptr+=6)
|
||||
common_pointer_buffer[i] = ptr;
|
||||
|
||||
//NUMBER M[ROWS][COLS];
|
||||
//NUMBER X[ROWS][COLS];
|
||||
//NUMBER Y[ROWS][COLS];
|
||||
//NUMBER p[ROWS][6];
|
||||
NUMBER** M = common_pointer_buffer;
|
||||
NUMBER** X = M + ROWS;
|
||||
NUMBER** Y = X + ROWS;
|
||||
NUMBER** p = Y + ROWS;
|
||||
#endif
|
||||
|
||||
|
||||
p[0][MM] = ctx._(0.0);
|
||||
|
|
@ -111,8 +126,11 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
|||
if (before_last_log != NULL)
|
||||
*before_last_log = result;
|
||||
|
||||
#ifndef USE_STACK_ALLOCATION
|
||||
delete common_pointer_buffer;
|
||||
//if(locally_allocated)
|
||||
delete common_buffer;
|
||||
#endif
|
||||
|
||||
return result;
|
||||
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||
|
|
|
|||
|
|
@ -191,25 +191,32 @@ inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_Vector
|
|||
}
|
||||
}
|
||||
|
||||
//#define DO_WARMUP
|
||||
//#define DO_REPEAT_PROFILING
|
||||
//Do compute over vector of testcase structs
|
||||
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
|
||||
unsigned maxNumThreadsToUse)
|
||||
{
|
||||
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
||||
#ifdef DO_REPEAT_PROFILING
|
||||
for(unsigned i=0;i<10;++i)
|
||||
#endif
|
||||
{
|
||||
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
{
|
||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
||||
#endif
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -227,6 +234,10 @@ inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_Vector
|
|||
readBasesArrayVector.clear();
|
||||
}
|
||||
|
||||
|
||||
#ifdef DO_WARMUP
|
||||
uint64_t g_sum = 0;
|
||||
#endif
|
||||
//JNI function to invoke compute_full_prob_avx
|
||||
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
||||
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
||||
|
|
@ -268,6 +279,24 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
|||
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
||||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||
#endif
|
||||
#ifdef DO_WARMUP
|
||||
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
|
||||
for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i)
|
||||
{
|
||||
unsigned curr_size = env->GetArrayLength(haplotypeBasesArrayVector[i].first);
|
||||
for(unsigned j=0;j<curr_size;++j)
|
||||
g_sum += ((uint64_t)((haplotypeBasesArrayVector[i].second)[j]));
|
||||
}
|
||||
for(unsigned i=0;i<readBasesArrayVector.size();++i)
|
||||
{
|
||||
for(unsigned j=0;j<readBasesArrayVector[i].size();++j)
|
||||
{
|
||||
unsigned curr_size = env->GetArrayLength(readBasesArrayVector[i][j].first);
|
||||
for(unsigned k=0;k<curr_size;++k)
|
||||
g_sum += ((uint64_t)((readBasesArrayVector[i][j].second)[k]));
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
||||
get_time(&start_time);
|
||||
|
|
|
|||
|
|
@ -61,6 +61,7 @@ uint64_t get_machine_capabilities()
|
|||
void initialize_function_pointers(uint64_t mask)
|
||||
{
|
||||
//mask = 0ull;
|
||||
//mask = (1 << SSE41_CUSTOM_IDX);
|
||||
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
|
||||
{
|
||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||
|
|
@ -286,6 +287,13 @@ double getCurrClk() {
|
|||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
inline unsigned long long rdtsc(void)
|
||||
{
|
||||
unsigned hi, lo;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
||||
}
|
||||
|
||||
void get_time(struct timespec* store_struct)
|
||||
{
|
||||
clock_gettime(CLOCK_REALTIME, store_struct);
|
||||
|
|
@ -298,7 +306,13 @@ uint64_t diff_time(struct timespec& prev_time)
|
|||
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
||||
}
|
||||
|
||||
//#define DUMP_COMPUTE_VALUES 1
|
||||
#define DUMP_COMPUTE_VALUES 1
|
||||
//#define DO_REPEATS
|
||||
#ifdef USE_PAPI
|
||||
#include "papi.h"
|
||||
#define NUM_PAPI_COUNTERS 4
|
||||
#endif
|
||||
|
||||
#define BATCH_SIZE 10000
|
||||
#define RUN_HYBRID
|
||||
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check)
|
||||
|
|
@ -321,7 +335,22 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
uint64_t vector_compute_time = 0;
|
||||
uint64_t baseline_compute_time = 0;
|
||||
uint64_t num_double_calls = 0;
|
||||
unsigned num_testcases = 0;
|
||||
bool all_ok = do_check ? true : false;
|
||||
#ifdef USE_PAPI
|
||||
uint32_t all_mask = (0);
|
||||
uint32_t no_usr_mask = (1 << 16); //bit 16 user mode, bit 17 kernel mode
|
||||
uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode
|
||||
PAPI_num_counters();
|
||||
int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" };
|
||||
assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK);
|
||||
assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK);
|
||||
long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||
#endif
|
||||
while(1)
|
||||
{
|
||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||
|
|
@ -336,26 +365,42 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
results_vec.resize(tc_vector.size());
|
||||
baseline_results_vec.resize(tc_vector.size());
|
||||
struct timespec start_time;
|
||||
#ifdef USE_PAPI
|
||||
assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
#endif
|
||||
get_time(&start_time);
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
++num_double_calls;
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
||||
#ifdef DO_REPEATS
|
||||
for(unsigned z=0;z<10;++z)
|
||||
#endif
|
||||
results_vec[i] = result;
|
||||
{
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||
double result = 0;
|
||||
if (result_avxf < MIN_ACCEPTED) {
|
||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||
++num_double_calls;
|
||||
}
|
||||
else
|
||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||
#ifdef DUMP_COMPUTE_VALUES
|
||||
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
||||
#endif
|
||||
results_vec[i] = result;
|
||||
}
|
||||
}
|
||||
#ifdef USE_PAPI
|
||||
assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||
#endif
|
||||
vector_compute_time += diff_time(start_time);
|
||||
#ifdef USE_PAPI
|
||||
for(unsigned k=0;k<NUM_PAPI_COUNTERS;++k)
|
||||
accum_values[k] += values[k];
|
||||
#endif
|
||||
num_testcases += tc_vector.size();
|
||||
if(do_check)
|
||||
{
|
||||
get_time(&start_time);
|
||||
|
|
@ -401,10 +446,15 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
|||
if(all_ok)
|
||||
{
|
||||
cout << "All output values within acceptable error\n";
|
||||
cout << "Baseline compute time "<<baseline_compute_time*1e-9<<"\n";
|
||||
cout << "Baseline double precision compute time "<<baseline_compute_time*1e-9<<"\n";
|
||||
}
|
||||
cout << "Num double invocations "<<num_double_calls<<"\n";
|
||||
cout << "Num testcase "<<num_testcases<< " num double invocations "<<num_double_calls<<"\n";
|
||||
cout << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
|
||||
#ifdef USE_PAPI
|
||||
for(unsigned i=0;i<NUM_PAPI_COUNTERS;++i)
|
||||
cout << eventnames[i] << " : "<<accum_values[i]<<"\n";
|
||||
#endif
|
||||
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
|
|
|
|||
Loading…
Reference in New Issue