Now has PAPI values
This commit is contained in:
parent
e32e9e6af6
commit
15fe244e4b
|
|
@ -1043,7 +1043,7 @@ public class HaplotypeCaller extends ActiveRegionWalker<List<VariantContext>, In
|
||||||
referenceConfidenceModel.close();
|
referenceConfidenceModel.close();
|
||||||
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
//TODO remove the need to call close here for debugging, the likelihood output stream should be managed
|
||||||
//TODO (open & close) at the walker, not the engine.
|
//TODO (open & close) at the walker, not the engine.
|
||||||
likelihoodCalculationEngine.close();
|
likelihoodCalculationEngine.close();
|
||||||
logger.info("Ran local assembly on " + result + " active regions");
|
logger.info("Ran local assembly on " + result + " active regions");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -75,8 +75,9 @@ import java.io.IOException;
|
||||||
*/
|
*/
|
||||||
public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
|
|
||||||
|
private static final boolean dumpSandboxOnly = false; //simulates ifdef
|
||||||
private static final boolean debug = false; //simulates ifdef
|
private static final boolean debug = false; //simulates ifdef
|
||||||
private static final boolean verify = debug || true; //simulates ifdef
|
private static final boolean verify = !dumpSandboxOnly && (debug || true); //simulates ifdef
|
||||||
private static final boolean debug0_1 = false; //simulates ifdef
|
private static final boolean debug0_1 = false; //simulates ifdef
|
||||||
private static final boolean debug1 = false; //simulates ifdef
|
private static final boolean debug1 = false; //simulates ifdef
|
||||||
private static final boolean debug2 = false;
|
private static final boolean debug2 = false;
|
||||||
|
|
@ -134,9 +135,11 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
|
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList,
|
||||||
final int readMaxLength, final int haplotypeMaxLength ) {
|
final int readMaxLength, final int haplotypeMaxLength ) {
|
||||||
if(verify)
|
if(verify)
|
||||||
|
{
|
||||||
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||||
jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
jniPairHMM.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
|
||||||
haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
|
haplotypeToHaplotypeListIdxMap = jniPairHMM.getHaplotypeToHaplotypeListIdxMap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -145,7 +148,8 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
@Override
|
@Override
|
||||||
public void finalizeRegion()
|
public void finalizeRegion()
|
||||||
{
|
{
|
||||||
jniPairHMM.finalizeRegion();
|
if(!dumpSandboxOnly)
|
||||||
|
jniPairHMM.finalizeRegion();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
@ -204,46 +208,61 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
++idx;
|
++idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
double[] likelihoodArray = null;
|
||||||
double[] likelihoodArray = jniPairHMM.getLikelihoodArray();
|
PerReadAlleleLikelihoodMap likelihoodMap = null;
|
||||||
//to compare values
|
|
||||||
final PerReadAlleleLikelihoodMap likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
|
||||||
if(verify)
|
if(verify)
|
||||||
{
|
{
|
||||||
//re-order values in likelihoodArray
|
jniPairHMM.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||||
double[] tmpArray = new double[numHaplotypes];
|
likelihoodArray = jniPairHMM.getLikelihoodArray();
|
||||||
idx = 0;
|
//to compare values
|
||||||
int idxInsideHaplotypeList = 0;
|
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
|
||||||
int readIdx = 0;
|
}
|
||||||
for(GATKSAMRecord read : reads)
|
else
|
||||||
|
{
|
||||||
|
likelihoodMap = new PerReadAlleleLikelihoodMap();
|
||||||
|
likelihoodArray = new double[numTestcases];
|
||||||
|
for(int i=0;i<numTestcases;++i)
|
||||||
|
likelihoodArray[i] = -0.5;
|
||||||
|
}
|
||||||
|
if(verify || dumpSandboxOnly)
|
||||||
|
{
|
||||||
|
boolean toDump = dumpSandboxOnly ? true : false;
|
||||||
|
if(verify)
|
||||||
{
|
{
|
||||||
for(int j=0;j<numHaplotypes;++j)
|
//re-order values in likelihoodArray
|
||||||
tmpArray[j] = likelihoodArray[readIdx+j];
|
double[] tmpArray = new double[numHaplotypes];
|
||||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
idx = 0;
|
||||||
|
int idxInsideHaplotypeList = 0;
|
||||||
|
int readIdx = 0;
|
||||||
|
for(GATKSAMRecord read : reads)
|
||||||
{
|
{
|
||||||
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
for(int j=0;j<numHaplotypes;++j)
|
||||||
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
tmpArray[j] = likelihoodArray[readIdx+j];
|
||||||
++idx;
|
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
|
||||||
|
{
|
||||||
|
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
|
||||||
|
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
|
||||||
|
++idx;
|
||||||
|
}
|
||||||
|
readIdx += numHaplotypes;
|
||||||
}
|
}
|
||||||
readIdx += numHaplotypes;
|
//for floating point values, no exact equality
|
||||||
}
|
//check whether numbers are close in terms of abs_error or relative_error
|
||||||
//for floating point values, no exact equality
|
//For very large values, relative_error is relevant
|
||||||
//check whether numbers are close in terms of abs_error or relative_error
|
//For very small values, abs_error is relevant
|
||||||
//For very large values, relative_error is relevant
|
for(int i=0;i<likelihoodArray.length;++i)
|
||||||
//For very small values, abs_error is relevant
|
|
||||||
boolean toDump = false;
|
|
||||||
for(int i=0;i<likelihoodArray.length;++i)
|
|
||||||
{
|
|
||||||
double abs_error = Math.abs(likelihoodArray[i] - mLikelihoodArray[i]);
|
|
||||||
double relative_error = 0;
|
|
||||||
if(mLikelihoodArray[i] == 0)
|
|
||||||
relative_error = 0;
|
|
||||||
else
|
|
||||||
relative_error = Math.abs(abs_error/mLikelihoodArray[i]);
|
|
||||||
if(abs_error > 1e-5 && relative_error > 1e-5)
|
|
||||||
{
|
{
|
||||||
toDump = true;
|
double abs_error = Math.abs(likelihoodArray[i] - mLikelihoodArray[i]);
|
||||||
break;
|
double relative_error = 0;
|
||||||
|
if(mLikelihoodArray[i] == 0)
|
||||||
|
relative_error = 0;
|
||||||
|
else
|
||||||
|
relative_error = Math.abs(abs_error/mLikelihoodArray[i]);
|
||||||
|
if(abs_error > 1e-6 && relative_error > 1e-6)
|
||||||
|
{
|
||||||
|
toDump = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//if numbers are not close, then dump out the data that produced the inconsistency
|
//if numbers are not close, then dump out the data that produced the inconsistency
|
||||||
|
|
@ -251,24 +270,40 @@ public class DebugJNILoglessPairHMM extends LoglessPairHMM {
|
||||||
{
|
{
|
||||||
idx = 0;
|
idx = 0;
|
||||||
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
|
||||||
|
boolean firstLine = true;
|
||||||
for(GATKSAMRecord read : reads)
|
for(GATKSAMRecord read : reads)
|
||||||
{
|
{
|
||||||
byte [] overallGCP = GCPArrayMap.get(read);
|
byte [] overallGCP = GCPArrayMap.get(read);
|
||||||
|
byte[] tmpByteArray = new byte[read.getReadBases().length];
|
||||||
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
|
||||||
{
|
{
|
||||||
byte[] haplotypeBases = currEntry.getValue().getBases();
|
byte[] haplotypeBases = currEntry.getValue().getBases();
|
||||||
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
|
debugDump("debug_dump.txt",new String(haplotypeBases)+" ",true);
|
||||||
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
|
debugDump("debug_dump.txt",new String(read.getReadBases())+" ",true);
|
||||||
for(int k=0;k<read.getReadBases().length;++k)
|
for(int k=0;k<read.getReadBases().length;++k)
|
||||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseQualities()[k]))+" ",true);
|
tmpByteArray[k] = (byte)((int)((read.getBaseQualities())[k]) + 33);
|
||||||
|
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||||
for(int k=0;k<read.getReadBases().length;++k)
|
for(int k=0;k<read.getReadBases().length;++k)
|
||||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseInsertionQualities()[k]))+" ",true);
|
tmpByteArray[k] = (byte)((int)((read.getBaseInsertionQualities())[k]) + 33);
|
||||||
|
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||||
for(int k=0;k<read.getReadBases().length;++k)
|
for(int k=0;k<read.getReadBases().length;++k)
|
||||||
debugDump("debug_dump.txt",String.format("%d",(int)(read.getBaseDeletionQualities()[k]))+" ",true);
|
tmpByteArray[k] = (byte)((int)((read.getBaseDeletionQualities())[k]) + 33);
|
||||||
|
debugDump("debug_dump.txt",new String(tmpByteArray)+" ",true);
|
||||||
for(int k=0;k<read.getReadBases().length;++k)
|
for(int k=0;k<read.getReadBases().length;++k)
|
||||||
debugDump("debug_dump.txt",String.format("%d",(int)(overallGCP[k]))+" ",true);
|
tmpByteArray[k] = (byte)((int)(overallGCP[k]) + 33);
|
||||||
debugDump("debug_dump.txt","\n",true);
|
debugDump("debug_dump.txt",new String(tmpByteArray),true);
|
||||||
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
if(firstLine)
|
||||||
|
{
|
||||||
|
debugDump("debug_dump.txt",String.format(" %d %d\n",readListSize, numHaplotypes), true);
|
||||||
|
firstLine = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
debugDump("debug_dump.txt","\n",true);
|
||||||
|
if(verify)
|
||||||
|
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
|
||||||
|
else
|
||||||
|
if(dumpSandboxOnly)
|
||||||
|
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
|
||||||
++idx;
|
++idx;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,3 +12,5 @@ reformat
|
||||||
subdir_checkout.sh
|
subdir_checkout.sh
|
||||||
avx/
|
avx/
|
||||||
sse/
|
sse/
|
||||||
|
triplicate.sh
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -47,7 +47,13 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
||||||
m_filename_to_fptr.clear();
|
m_filename_to_fptr.clear();
|
||||||
m_written_files_set.clear();
|
m_written_files_set.clear();
|
||||||
|
|
||||||
|
//Common buffer - 8MB
|
||||||
|
unsigned size = 1024*1024;
|
||||||
|
m_buffer = new uint64_t[size];
|
||||||
|
m_buffer_size = size*sizeof(uint64_t);
|
||||||
|
|
||||||
initialize_function_pointers();
|
initialize_function_pointers();
|
||||||
|
|
||||||
cout.flush();
|
cout.flush();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,6 +22,7 @@ class LoadTimeInitializer
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
LoadTimeInitializer(); //will be called when library is loaded
|
LoadTimeInitializer(); //will be called when library is loaded
|
||||||
|
~LoadTimeInitializer() { delete m_buffer; }
|
||||||
void print_profiling();
|
void print_profiling();
|
||||||
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
|
||||||
void debug_close();
|
void debug_close();
|
||||||
|
|
@ -43,6 +44,8 @@ class LoadTimeInitializer
|
||||||
uint64_t m_data_transfer_time;
|
uint64_t m_data_transfer_time;
|
||||||
//bytes copied
|
//bytes copied
|
||||||
uint64_t m_bytes_copied;
|
uint64_t m_bytes_copied;
|
||||||
|
unsigned get_buffer_size() { return m_buffer_size; }
|
||||||
|
char* get_buffer() { return (char*)m_buffer; }
|
||||||
private:
|
private:
|
||||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||||
std::set<std::string> m_written_files_set;
|
std::set<std::string> m_written_files_set;
|
||||||
|
|
@ -52,6 +55,8 @@ class LoadTimeInitializer
|
||||||
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
double m_sum_square_stats[TOTAL_NUMBER_STATS];
|
||||||
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
uint64_t m_min_stats[TOTAL_NUMBER_STATS];
|
||||||
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
uint64_t m_max_stats[TOTAL_NUMBER_STATS];
|
||||||
|
unsigned m_buffer_size;
|
||||||
|
uint64_t* m_buffer;
|
||||||
};
|
};
|
||||||
extern LoadTimeInitializer g_load_time_initializer;
|
extern LoadTimeInitializer g_load_time_initializer;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -73,7 +73,9 @@ JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
|
||||||
(JNIEnv* env, jobject thisObject, jstring fileNameString)
|
(JNIEnv* env, jobject thisObject, jstring fileNameString)
|
||||||
{
|
{
|
||||||
const char* fileName = env->GetStringUTFChars(fileNameString, 0);
|
const char* fileName = env->GetStringUTFChars(fileNameString, 0);
|
||||||
do_compute((char*)fileName);
|
char local_array[800];
|
||||||
|
strncpy(local_array, fileName, 200);
|
||||||
env->ReleaseStringUTFChars(fileNameString, fileName);
|
env->ReleaseStringUTFChars(fileNameString, fileName);
|
||||||
|
do_compute(local_array, true, 10000, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,8 @@
|
||||||
#include "headers.h"
|
#include "headers.h"
|
||||||
#include "template.h"
|
#include "template.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
#include "LoadTimeInitializer.h"
|
||||||
|
|
||||||
using namespace std;
|
using namespace std;
|
||||||
template<class NUMBER>
|
template<class NUMBER>
|
||||||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
|
|
@ -10,12 +12,28 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
int COLS = tc->haplen + 1;
|
int COLS = tc->haplen + 1;
|
||||||
|
|
||||||
Context<NUMBER> ctx;
|
Context<NUMBER> ctx;
|
||||||
|
//#define USE_STACK_ALLOCATION 1
|
||||||
|
#ifdef USE_STACK_ALLOCATION
|
||||||
|
NUMBER M[ROWS][COLS];
|
||||||
|
NUMBER X[ROWS][COLS];
|
||||||
|
NUMBER Y[ROWS][COLS];
|
||||||
|
NUMBER p[ROWS][6];
|
||||||
|
#else
|
||||||
//allocate on heap in way that simulates a 2D array. Having a 2D array instead of
|
//allocate on heap in way that simulates a 2D array. Having a 2D array instead of
|
||||||
//a straightforward array of pointers ensures that all data lies 'close' in memory, increasing
|
//a straightforward array of pointers ensures that all data lies 'close' in memory, increasing
|
||||||
//the chance of being stored together in the cache. Also, prefetchers can learn memory access
|
//the chance of being stored together in the cache. Also, prefetchers can learn memory access
|
||||||
//patterns for 2D arrays, not possible for array of pointers
|
//patterns for 2D arrays, not possible for array of pointers
|
||||||
|
//bool locally_allocated = false;
|
||||||
|
//NUMBER* common_buffer = 0;
|
||||||
NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
NUMBER* common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||||
|
//unsigned curr_size = sizeof(NUMBER)*(3*ROWS*COLS + ROWS*6);
|
||||||
|
//if(true)
|
||||||
|
//{
|
||||||
|
//common_buffer = new NUMBER[3*ROWS*COLS + ROWS*6];
|
||||||
|
//locally_allocated = true;
|
||||||
|
//}
|
||||||
|
//else
|
||||||
|
//common_buffer = (NUMBER*)(g_load_time_initializer.get_buffer());
|
||||||
//pointers to within the allocated buffer
|
//pointers to within the allocated buffer
|
||||||
NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS];
|
NUMBER** common_pointer_buffer = new NUMBER*[4*ROWS];
|
||||||
NUMBER* ptr = common_buffer;
|
NUMBER* ptr = common_buffer;
|
||||||
|
|
@ -25,14 +43,11 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
for(;i<4*ROWS;++i, ptr+=6)
|
for(;i<4*ROWS;++i, ptr+=6)
|
||||||
common_pointer_buffer[i] = ptr;
|
common_pointer_buffer[i] = ptr;
|
||||||
|
|
||||||
//NUMBER M[ROWS][COLS];
|
|
||||||
//NUMBER X[ROWS][COLS];
|
|
||||||
//NUMBER Y[ROWS][COLS];
|
|
||||||
//NUMBER p[ROWS][6];
|
|
||||||
NUMBER** M = common_pointer_buffer;
|
NUMBER** M = common_pointer_buffer;
|
||||||
NUMBER** X = M + ROWS;
|
NUMBER** X = M + ROWS;
|
||||||
NUMBER** Y = X + ROWS;
|
NUMBER** Y = X + ROWS;
|
||||||
NUMBER** p = Y + ROWS;
|
NUMBER** p = Y + ROWS;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
p[0][MM] = ctx._(0.0);
|
p[0][MM] = ctx._(0.0);
|
||||||
|
|
@ -111,8 +126,11 @@ NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log)
|
||||||
if (before_last_log != NULL)
|
if (before_last_log != NULL)
|
||||||
*before_last_log = result;
|
*before_last_log = result;
|
||||||
|
|
||||||
|
#ifndef USE_STACK_ALLOCATION
|
||||||
delete common_pointer_buffer;
|
delete common_pointer_buffer;
|
||||||
|
//if(locally_allocated)
|
||||||
delete common_buffer;
|
delete common_buffer;
|
||||||
|
#endif
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
//return ctx.LOG10(result) - ctx.LOG10_INITIAL_CONSTANT;
|
||||||
|
|
|
||||||
|
|
@ -191,25 +191,32 @@ inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_Vector
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//#define DO_WARMUP
|
||||||
|
//#define DO_REPEAT_PROFILING
|
||||||
//Do compute over vector of testcase structs
|
//Do compute over vector of testcase structs
|
||||||
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
|
inline void compute_testcases(vector<testcase>& tc_array, unsigned numTestCases, double* likelihoodDoubleArray,
|
||||||
unsigned maxNumThreadsToUse)
|
unsigned maxNumThreadsToUse)
|
||||||
{
|
{
|
||||||
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
#ifdef DO_REPEAT_PROFILING
|
||||||
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
for(unsigned i=0;i<10;++i)
|
||||||
{
|
|
||||||
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
|
||||||
double result = 0;
|
|
||||||
if (result_avxf < MIN_ACCEPTED) {
|
|
||||||
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
|
||||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
|
||||||
#ifdef DO_PROFILING
|
|
||||||
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
|
||||||
#endif
|
#endif
|
||||||
|
{
|
||||||
|
#pragma omp parallel for schedule (dynamic,10000) num_threads(maxNumThreadsToUse)
|
||||||
|
for(unsigned tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||||
|
{
|
||||||
|
float result_avxf = g_compute_full_prob_float(&(tc_array[tc_idx]), 0);
|
||||||
|
double result = 0;
|
||||||
|
if (result_avxf < MIN_ACCEPTED) {
|
||||||
|
double result_avxd = g_compute_full_prob_double(&(tc_array[tc_idx]), 0);
|
||||||
|
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||||
|
#ifdef DO_PROFILING
|
||||||
|
g_load_time_initializer.update_stat(NUM_DOUBLE_INVOCATIONS_IDX, 1);
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
else
|
||||||
|
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||||
|
likelihoodDoubleArray[tc_idx] = result;
|
||||||
}
|
}
|
||||||
else
|
|
||||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
|
||||||
likelihoodDoubleArray[tc_idx] = result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -227,6 +234,10 @@ inline JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_Vector
|
||||||
readBasesArrayVector.clear();
|
readBasesArrayVector.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#ifdef DO_WARMUP
|
||||||
|
uint64_t g_sum = 0;
|
||||||
|
#endif
|
||||||
//JNI function to invoke compute_full_prob_avx
|
//JNI function to invoke compute_full_prob_avx
|
||||||
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
|
||||||
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
|
||||||
|
|
@ -268,6 +279,24 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_VectorLogless
|
||||||
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
|
||||||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef DO_WARMUP
|
||||||
|
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
|
||||||
|
for(unsigned i=0;i<haplotypeBasesArrayVector.size();++i)
|
||||||
|
{
|
||||||
|
unsigned curr_size = env->GetArrayLength(haplotypeBasesArrayVector[i].first);
|
||||||
|
for(unsigned j=0;j<curr_size;++j)
|
||||||
|
g_sum += ((uint64_t)((haplotypeBasesArrayVector[i].second)[j]));
|
||||||
|
}
|
||||||
|
for(unsigned i=0;i<readBasesArrayVector.size();++i)
|
||||||
|
{
|
||||||
|
for(unsigned j=0;j<readBasesArrayVector[i].size();++j)
|
||||||
|
{
|
||||||
|
unsigned curr_size = env->GetArrayLength(readBasesArrayVector[i][j].first);
|
||||||
|
for(unsigned k=0;k<curr_size;++k)
|
||||||
|
g_sum += ((uint64_t)((readBasesArrayVector[i][j].second)[k]));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
#ifdef DO_PROFILING
|
#ifdef DO_PROFILING
|
||||||
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
g_load_time_initializer.m_bytes_copied += (is_copy ? numTestCases*sizeof(double) : 0);
|
||||||
get_time(&start_time);
|
get_time(&start_time);
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,7 @@ uint64_t get_machine_capabilities()
|
||||||
void initialize_function_pointers(uint64_t mask)
|
void initialize_function_pointers(uint64_t mask)
|
||||||
{
|
{
|
||||||
//mask = 0ull;
|
//mask = 0ull;
|
||||||
|
//mask = (1 << SSE41_CUSTOM_IDX);
|
||||||
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
|
if(is_avx_supported() && (mask & (1<< AVX_CUSTOM_IDX)))
|
||||||
{
|
{
|
||||||
cout << "Using AVX accelerated implementation of PairHMM\n";
|
cout << "Using AVX accelerated implementation of PairHMM\n";
|
||||||
|
|
@ -286,6 +287,13 @@ double getCurrClk() {
|
||||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline unsigned long long rdtsc(void)
|
||||||
|
{
|
||||||
|
unsigned hi, lo;
|
||||||
|
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||||
|
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
||||||
|
}
|
||||||
|
|
||||||
void get_time(struct timespec* store_struct)
|
void get_time(struct timespec* store_struct)
|
||||||
{
|
{
|
||||||
clock_gettime(CLOCK_REALTIME, store_struct);
|
clock_gettime(CLOCK_REALTIME, store_struct);
|
||||||
|
|
@ -298,7 +306,13 @@ uint64_t diff_time(struct timespec& prev_time)
|
||||||
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
return (uint64_t)((curr_time.tv_sec-prev_time.tv_sec)*1000000000+(curr_time.tv_nsec-prev_time.tv_nsec));
|
||||||
}
|
}
|
||||||
|
|
||||||
//#define DUMP_COMPUTE_VALUES 1
|
#define DUMP_COMPUTE_VALUES 1
|
||||||
|
//#define DO_REPEATS
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
#include "papi.h"
|
||||||
|
#define NUM_PAPI_COUNTERS 4
|
||||||
|
#endif
|
||||||
|
|
||||||
#define BATCH_SIZE 10000
|
#define BATCH_SIZE 10000
|
||||||
#define RUN_HYBRID
|
#define RUN_HYBRID
|
||||||
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check)
|
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size, bool do_check)
|
||||||
|
|
@ -321,7 +335,22 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
||||||
uint64_t vector_compute_time = 0;
|
uint64_t vector_compute_time = 0;
|
||||||
uint64_t baseline_compute_time = 0;
|
uint64_t baseline_compute_time = 0;
|
||||||
uint64_t num_double_calls = 0;
|
uint64_t num_double_calls = 0;
|
||||||
|
unsigned num_testcases = 0;
|
||||||
bool all_ok = do_check ? true : false;
|
bool all_ok = do_check ? true : false;
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
uint32_t all_mask = (0);
|
||||||
|
uint32_t no_usr_mask = (1 << 16); //bit 16 user mode, bit 17 kernel mode
|
||||||
|
uint32_t no_kernel_mask = (1 << 17); //bit 16 user mode, bit 17 kernel mode
|
||||||
|
PAPI_num_counters();
|
||||||
|
int events[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||||
|
char* eventnames[NUM_PAPI_COUNTERS]= { "cycles", "itlb_walk_cycles", "dtlb_load_walk_cycles", "dtlb_store_walk_cycles" };
|
||||||
|
assert(PAPI_event_name_to_code("UNHALTED_REFERENCE_CYCLES:u=1:k=1",&(events[0])) == PAPI_OK);
|
||||||
|
assert(PAPI_event_name_to_code("ITLB_MISSES:WALK_DURATION", &(events[1])) == PAPI_OK);
|
||||||
|
assert(PAPI_event_name_to_code("DTLB_LOAD_MISSES:WALK_DURATION", &(events[2])) == PAPI_OK);
|
||||||
|
assert(PAPI_event_name_to_code("DTLB_STORE_MISSES:WALK_DURATION", &(events[3])) == PAPI_OK);
|
||||||
|
long long values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||||
|
long long accum_values[NUM_PAPI_COUNTERS] = { 0, 0, 0, 0 };
|
||||||
|
#endif
|
||||||
while(1)
|
while(1)
|
||||||
{
|
{
|
||||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||||
|
|
@ -336,26 +365,42 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
||||||
results_vec.resize(tc_vector.size());
|
results_vec.resize(tc_vector.size());
|
||||||
baseline_results_vec.resize(tc_vector.size());
|
baseline_results_vec.resize(tc_vector.size());
|
||||||
struct timespec start_time;
|
struct timespec start_time;
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
assert(PAPI_start_counters(events, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||||
|
#endif
|
||||||
get_time(&start_time);
|
get_time(&start_time);
|
||||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||||
for(unsigned i=0;i<tc_vector.size();++i)
|
#ifdef DO_REPEATS
|
||||||
{
|
for(unsigned z=0;z<10;++z)
|
||||||
testcase& tc = tc_vector[i];
|
|
||||||
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
|
||||||
double result = 0;
|
|
||||||
if (result_avxf < MIN_ACCEPTED) {
|
|
||||||
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
|
||||||
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
|
||||||
++num_double_calls;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
|
||||||
#ifdef DUMP_COMPUTE_VALUES
|
|
||||||
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
|
||||||
#endif
|
#endif
|
||||||
results_vec[i] = result;
|
{
|
||||||
|
for(unsigned i=0;i<tc_vector.size();++i)
|
||||||
|
{
|
||||||
|
testcase& tc = tc_vector[i];
|
||||||
|
float result_avxf = g_compute_full_prob_float(&tc, 0);
|
||||||
|
double result = 0;
|
||||||
|
if (result_avxf < MIN_ACCEPTED) {
|
||||||
|
double result_avxd = g_compute_full_prob_double(&tc, 0);
|
||||||
|
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
|
||||||
|
++num_double_calls;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
|
||||||
|
#ifdef DUMP_COMPUTE_VALUES
|
||||||
|
g_load_time_initializer.debug_dump("return_values_vector.txt",to_string(result),true);
|
||||||
|
#endif
|
||||||
|
results_vec[i] = result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
assert(PAPI_stop_counters(values, NUM_PAPI_COUNTERS) == PAPI_OK);
|
||||||
|
#endif
|
||||||
vector_compute_time += diff_time(start_time);
|
vector_compute_time += diff_time(start_time);
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
for(unsigned k=0;k<NUM_PAPI_COUNTERS;++k)
|
||||||
|
accum_values[k] += values[k];
|
||||||
|
#endif
|
||||||
|
num_testcases += tc_vector.size();
|
||||||
if(do_check)
|
if(do_check)
|
||||||
{
|
{
|
||||||
get_time(&start_time);
|
get_time(&start_time);
|
||||||
|
|
@ -401,10 +446,15 @@ void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size,
|
||||||
if(all_ok)
|
if(all_ok)
|
||||||
{
|
{
|
||||||
cout << "All output values within acceptable error\n";
|
cout << "All output values within acceptable error\n";
|
||||||
cout << "Baseline compute time "<<baseline_compute_time*1e-9<<"\n";
|
cout << "Baseline double precision compute time "<<baseline_compute_time*1e-9<<"\n";
|
||||||
}
|
}
|
||||||
cout << "Num double invocations "<<num_double_calls<<"\n";
|
cout << "Num testcase "<<num_testcases<< " num double invocations "<<num_double_calls<<"\n";
|
||||||
cout << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
|
cout << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
|
||||||
|
#ifdef USE_PAPI
|
||||||
|
for(unsigned i=0;i<NUM_PAPI_COUNTERS;++i)
|
||||||
|
cout << eventnames[i] << " : "<<accum_values[i]<<"\n";
|
||||||
|
#endif
|
||||||
|
|
||||||
if(use_old_read_testcase)
|
if(use_old_read_testcase)
|
||||||
fclose(fptr);
|
fclose(fptr);
|
||||||
else
|
else
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue