1. Enabled FTZ in LoadTimeInitializer.cc

2. Added Sandbox.java for testing
3. Moved compute to utils.cc (inside library)
4. Added flag for disabling FTZ in Makefile
This commit is contained in:
Karthik Gururaj 2014-02-06 11:01:33 -08:00
parent 0c63d6264f
commit fab6f57e97
11 changed files with 589 additions and 88 deletions

View File

@ -6,6 +6,7 @@ tests
hmm_Mohammad
pairhmm-template-main
*.swp
*.class
checker
reformat
subdir_checkout.sh

View File

@ -7,6 +7,15 @@ LoadTimeInitializer g_load_time_initializer;
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
#ifndef DISABLE_FTZ
//Very important to get good performance on Intel processors
//Function: enabling FTZ converts denormals to 0 in hardware
//Denormals cause microcode to insert uops into the core causing big slowdown
_MM_SET_FLUSH_ZERO_MODE(_MM_FLUSH_ZERO_ON);
cout << "FTZ enabled - may decrease accuracy if denormal numbers encountered\n";
#else
cout << "FTZ is not set - may slow down performance if denormal numbers encountered\n";
#endif
m_sumNumReads = 0;
m_sumSquareNumReads = 0;
m_sumNumHaplotypes = 0;

View File

@ -12,6 +12,9 @@ CC=icc
CXX=icc
LDFLAGS=-lm -lrt $(OMPLDFLAGS)
ifdef DISABLE_FTZ
COMMON_COMPILATION_FLAGS+=-DDISABLE_FTZ -no-ftz
endif
BIN=libVectorLoglessPairHMM.so pairhmm-template-main checker
#BIN=checker
@ -22,14 +25,14 @@ DF=$(DEPDIR)/$(*).d
#Common across libJNI and sandbox
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc LoadTimeInitializer.cc
#Part of libJNI
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc $(COMMON_SOURCES)
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc Sandbox.cc $(COMMON_SOURCES)
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
LIBOBJECTS=$(LIBSOURCES:.cc=.o)
COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o)
#No vectorization for these files
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.cc org_broadinstitute_sting_utils_pairhmm_DebugJNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc Sandbox.cc
#Use -xAVX for these files
AVX_SOURCES=avx_function_instantiations.cc
#Use -xSSE4.2 for these files
@ -43,7 +46,7 @@ $(AVX_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xAVX
$(SSE_OBJECTS): CXXFLAGS=$(COMMON_COMPILATION_FLAGS) -xSSE4.2
OBJECTS=$(NO_VECTOR_OBJECTS) $(AVX_OBJECTS) $(SSE_OBJECTS)
all: $(BIN)
all: $(BIN) Sandbox.class
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
@ -54,13 +57,15 @@ pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
libVectorLoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) ${LDFLAGS} -Wl,-Bstatic -limf -lsvml -lirng -Wl,-Bdynamic #-lintlc
$(CXX) $(OMPLFLAGS) -shared -static-intel -o $@ $(LIBOBJECTS) ${LDFLAGS}
$(OBJECTS): %.o: %.cc
@mkdir -p $(DEPDIR)
$(CXX) -c -MMD -MF $(DF) $(CXXFLAGS) $(OUTPUT_OPTION) $<
Sandbox.class: Sandbox.java
javac Sandbox.java
clean:
rm -rf $(BIN) *.o $(DEPDIR)
rm -rf $(BIN) *.o $(DEPDIR) *.class

View File

@ -0,0 +1,79 @@
#include "Sandbox.h"
#include "org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM.h"
#include "utils.h"
#include "jni_common.h"
/*
* Class: Sandbox
* Method: jniGetMachineType
* Signature: ()J
*/
JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType
(JNIEnv * env, jobject thisObj)
{
return 0;
}
/*
* Class: Sandbox
* Method: jniInitializeClassFieldsAndMachineMask
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask
(JNIEnv* env, jobject thisObject, jclass readDataHolderClass, jclass haplotypeDataHolderClass, jlong mask)
{
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeClassFieldsAndMachineMask(env, thisObject, readDataHolderClass,
haplotypeDataHolderClass, mask);
}
/*
* Class: Sandbox
* Method: jniInitializeHaplotypes
* Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes
(JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray)
{
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniInitializeHaplotypes(env, thisObject, numHaplotypes, haplotypeDataArray);
}
/*
* Class: Sandbox
* Method: jniFinalizeRegion
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion
(JNIEnv * env, jobject thisObject)
{
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniFinalizeRegion(env, thisObject);
}
/*
* Class: Sandbox
* Method: jniComputeLikelihoods
* Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
{
Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniComputeLikelihoods(env, thisObject,
numReads, numHaplotypes, readDataArray, haplotypeDataArray, likelihoodArray, maxNumThreadsToUse);
}
/*
* Class: Sandbox
* Method: jniClose
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniClose
(JNIEnv* env, jobject thisObject)
{ Java_org_broadinstitute_sting_utils_pairhmm_VectorLoglessPairHMM_jniClose(env, thisObject); }
JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
(JNIEnv* env, jobject thisObject, jstring fileNameString)
{
const char* fileName = env->GetStringUTFChars(fileNameString, 0);
do_compute((char*)fileName);
env->ReleaseStringUTFChars(fileNameString, fileName);
}

View File

@ -0,0 +1,71 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class Sandbox */
#ifndef _Included_Sandbox
#define _Included_Sandbox
#ifdef __cplusplus
extern "C" {
#endif
#undef Sandbox_enableAll
#define Sandbox_enableAll -1LL
/*
* Class: Sandbox
* Method: jniGetMachineType
* Signature: ()J
*/
JNIEXPORT jlong JNICALL Java_Sandbox_jniGetMachineType
(JNIEnv *, jobject);
/*
* Class: Sandbox
* Method: jniInitializeClassFieldsAndMachineMask
* Signature: (Ljava/lang/Class;Ljava/lang/Class;J)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeClassFieldsAndMachineMask
(JNIEnv *, jobject, jclass, jclass, jlong);
/*
* Class: Sandbox
* Method: jniInitializeHaplotypes
* Signature: (I[LSandbox/JNIHaplotypeDataHolderClass;)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniInitializeHaplotypes
(JNIEnv *, jobject, jint, jobjectArray);
/*
* Class: Sandbox
* Method: jniFinalizeRegion
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniFinalizeRegion
(JNIEnv *, jobject);
/*
* Class: Sandbox
* Method: jniComputeLikelihoods
* Signature: (II[LSandbox/JNIReadDataHolderClass;[LSandbox/JNIHaplotypeDataHolderClass;[DI)V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniComputeLikelihoods
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
/*
* Class: Sandbox
* Method: jniClose
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_Sandbox_jniClose
(JNIEnv *, jobject);
/*
* Class: Sandbox
* Method: doEverythingNative
* Signature: ([B)V
*/
JNIEXPORT void JNICALL Java_Sandbox_doEverythingNative
(JNIEnv *, jobject, jstring);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,278 @@
import java.util.List;
import java.util.LinkedList;
import java.util.Map;
import java.util.HashMap;
import java.io.File;
import java.util.Scanner;
import java.io.IOException;
import java.io.FileNotFoundException;
import java.io.InputStreamReader;
public class Sandbox {
private long setupTime = 0;
private long computeTime = 0;
//Used to copy references to byteArrays to JNI from reads
protected class JNIReadDataHolderClass {
public byte[] readBases = null;
public byte[] readQuals = null;
public byte[] insertionGOP = null;
public byte[] deletionGOP = null;
public byte[] overallGCP = null;
}
//Used to copy references to byteArrays to JNI from haplotypes
protected class JNIHaplotypeDataHolderClass {
public byte[] haplotypeBases = null;
}
/**
* Return 64-bit mask representing machine capabilities
* Bit 0 is LSB, bit 63 MSB
* Bit 0 represents sse4.2 availability
* Bit 1 represents AVX availability
*/
public native long jniGetMachineType();
public static final long enableAll = 0xFFFFFFFFFFFFFFFFl;
/**
* Function to initialize the fields of JNIReadDataHolderClass and JNIHaplotypeDataHolderClass from JVM.
* C++ codegets FieldIDs for these classes once and re-uses these IDs for the remainder of the program. Field IDs do not
* change per JVM session
* @param readDataHolderClass class type of JNIReadDataHolderClass
* @param haplotypeDataHolderClass class type of JNIHaplotypeDataHolderClass
* @param mask mask is a 64 bit integer identical to the one received from jniGetMachineType(). Users can disable usage of some hardware features by zeroing some bits in the mask
* */
private native void jniInitializeClassFieldsAndMachineMask(Class<?> readDataHolderClass, Class<?> haplotypeDataHolderClass, long mask);
private static Boolean isVectorLoglessPairHMMLibraryLoaded = false;
//The constructor is called only once inside PairHMMLikelihoodCalculationEngine
public Sandbox() {
synchronized(isVectorLoglessPairHMMLibraryLoaded) {
//Load the library and initialize the FieldIDs
if(!isVectorLoglessPairHMMLibraryLoaded) {
System.loadLibrary("VectorLoglessPairHMM");
isVectorLoglessPairHMMLibraryLoaded = true;
jniInitializeClassFieldsAndMachineMask(JNIReadDataHolderClass.class, JNIHaplotypeDataHolderClass.class, enableAll); //need to do this only once
}
}
}
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
//Used to transfer data to JNI
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
public void initialize(final List<JNIHaplotypeDataHolderClass> haplotypes) {
int numHaplotypes = haplotypes.size();
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
int idx = 0;
for(final JNIHaplotypeDataHolderClass currHaplotype : haplotypes)
{
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
haplotypeDataArray[idx].haplotypeBases = currHaplotype.haplotypeBases;
++idx;
}
jniInitializeHaplotypes(numHaplotypes, haplotypeDataArray);
}
/**
* Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
* accessing Java memory directly, still important to release memory from C++
*/
private native void jniFinalizeRegion();
public void finalizeRegion()
{
jniFinalizeRegion();
}
/**
* Real compute kernel
*/
private native void jniComputeLikelihoods(int numReads, int numHaplotypes, JNIReadDataHolderClass[] readDataArray,
JNIHaplotypeDataHolderClass[] haplotypeDataArray, double[] likelihoodArray, int maxNumThreadsToUse);
public void computeLikelihoods(final List<JNIReadDataHolderClass> reads, final List<JNIHaplotypeDataHolderClass> haplotypes) {
//System.out.println("Region : "+reads.size()+" x "+haplotypes.size());
long startTime = System.nanoTime();
int readListSize = reads.size();
int numHaplotypes = haplotypes.size();
int numTestcases = readListSize*numHaplotypes;
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
int idx = 0;
for(JNIReadDataHolderClass read : reads)
{
readDataArray[idx] = new JNIReadDataHolderClass();
readDataArray[idx].readBases = read.readBases;
readDataArray[idx].readQuals = read.readQuals;
readDataArray[idx].insertionGOP = read.insertionGOP;
readDataArray[idx].deletionGOP = read.deletionGOP;
readDataArray[idx].overallGCP = read.overallGCP;
++idx;
}
double[] mLikelihoodArray = new double[readListSize*numHaplotypes]; //to store results
setupTime += (System.nanoTime() - startTime);
//for(reads)
// for(haplotypes)
// compute_full_prob()
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, null, mLikelihoodArray, 12);
computeTime += (System.nanoTime() - startTime);
}
/**
* Print final profiling information from native code
*/
public native void jniClose();
public void close()
{
System.out.println("Time spent in setup for JNI call : "+(setupTime*1e-9)+" compute time : "+(computeTime*1e-9));
jniClose();
}
public void parseSandboxFile(String filename)
{
File file = new File(filename);
Scanner input = null;
try
{
input = new Scanner(file);
}
catch(FileNotFoundException e)
{
System.err.println("File "+filename+" cannot be found/read");
return;
}
int idx = 0;
int numReads = 0;
int numHaplotypes = 0;
int readIdx = 0, testCaseIdx = 0, haplotypeIdx = 0;
LinkedList<JNIHaplotypeDataHolderClass> haplotypeList = new LinkedList<JNIHaplotypeDataHolderClass>();
LinkedList<JNIReadDataHolderClass> readList = new LinkedList<JNIReadDataHolderClass>();
byte[][] byteArray = new byte[6][];
boolean firstLine = true;
String[] currTokens = new String[8];
while(input.hasNextLine())
{
String line = input.nextLine();
Scanner lineScanner = new Scanner(line);
idx = 0;
while(lineScanner.hasNext())
currTokens[idx++] = lineScanner.next();
if(idx == 0)
break;
assert(idx >= 6);
//start of new region
if(idx == 8)
{
if(!firstLine)
{
initialize(haplotypeList);
computeLikelihoods(readList, haplotypeList);
finalizeRegion();
}
try
{
numReads = Integer.parseInt(currTokens[6]);
}
catch(NumberFormatException e)
{
numReads = 1;
}
try
{
numHaplotypes = Integer.parseInt(currTokens[7]);
}
catch(NumberFormatException e)
{
numHaplotypes = 1;
}
haplotypeIdx = readIdx = testCaseIdx = 0;
readList.clear();
haplotypeList.clear();
}
if(haplotypeIdx < numHaplotypes)
{
JNIHaplotypeDataHolderClass X = new JNIHaplotypeDataHolderClass();
X.haplotypeBases = currTokens[0].getBytes();
haplotypeList.add(X);
}
if(testCaseIdx%numHaplotypes == 0)
{
JNIReadDataHolderClass X = new JNIReadDataHolderClass();
X.readBases = currTokens[1].getBytes();
for(int i=2;i<6;++i)
{
byteArray[i] = currTokens[i].getBytes();
for(int j=0;j<byteArray[i].length;++j)
byteArray[i][j] -= 33; //normalize
}
X.readQuals = byteArray[2];
X.insertionGOP = byteArray[3];
X.deletionGOP = byteArray[4];
X.overallGCP = byteArray[5];
readList.add(X);
}
++testCaseIdx;
++haplotypeIdx;
lineScanner.close();
firstLine = false;
}
if(haplotypeList.size() > 0 && readList.size() > 0)
{
initialize(haplotypeList);
computeLikelihoods(readList, haplotypeList);
finalizeRegion();
}
close();
input.close();
}
private native void doEverythingNative(String filename);
public static void main(String[] args)
{
if(args.length <= 0)
{
System.err.println("Needs 1 argument - <filename>");
System.exit(-1);
}
//// Get runtime
//java.lang.Runtime rt = java.lang.Runtime.getRuntime();
//// Start a new process: UNIX command ls
//String cmd = "/home/karthikg/broad/gsa-unstable/public/c++/VectorPairHMM/checker "+args[0];
//try
//{
//System.out.println(cmd);
//java.lang.Process p = rt.exec(cmd);
//try
//{
//p.waitFor();
//java.io.InputStream is = p.getInputStream();
//java.io.BufferedReader reader = new java.io.BufferedReader(new InputStreamReader(is));
//// And print each line
//String s = null;
//while ((s = reader.readLine()) != null) {
//System.out.println(s);
//}
//is.close();
//}
//catch(InterruptedException e)
//{
//System.err.println(e);
//}
//}
//catch(IOException e)
//{
//System.err.println(e);
//}
Sandbox t = new Sandbox();
//t.doEverythingNative(args[0]);
t.parseSandboxFile(args[0]);
}
}

View File

@ -0,0 +1,13 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class Sandbox_JNIHaplotypeDataHolderClass */
#ifndef _Included_Sandbox_JNIHaplotypeDataHolderClass
#define _Included_Sandbox_JNIHaplotypeDataHolderClass
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -0,0 +1,13 @@
/* DO NOT EDIT THIS FILE - it is machine generated */
#include <jni.h>
/* Header for class Sandbox_JNIReadDataHolderClass */
#ifndef _Included_Sandbox_JNIReadDataHolderClass
#define _Included_Sandbox_JNIReadDataHolderClass
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __cplusplus
}
#endif
#endif

View File

@ -8,12 +8,6 @@
using namespace std;
#define BATCH_SIZE 10000
#define RUN_HYBRID
int main(int argc, char** argv)
{
if(argc < 2)
@ -24,82 +18,12 @@ int main(int argc, char** argv)
bool use_old_read_testcase = false;
if(argc >= 3 && string(argv[2]) == "1")
use_old_read_testcase = true;
unsigned chunk_size = 100;
unsigned chunk_size = 10000;
if(argc >= 4)
chunk_size = strtol(argv[3],0,10);
std::ifstream ifptr;
FILE* fptr = 0;
if(use_old_read_testcase)
{
fptr = fopen(argv[1],"r");
assert(fptr);
}
else
{
ifptr.open(argv[1]);
assert(ifptr.is_open());
}
do_compute(argv[1], use_old_read_testcase, chunk_size);
vector<testcase> tc_vector;
tc_vector.clear();
testcase tc;
uint64_t total_time = 0;
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
if(break_value >= 0)
tc_vector.push_back(tc);
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
{
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result;
}
total_time += get_time();
#pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
delete tc_vector[i].rs;
delete tc_vector[i].hap;
delete tc_vector[i].q;
delete tc_vector[i].i;
delete tc_vector[i].d;
delete tc_vector[i].c;
}
results_vec.clear();
tc_vector.clear();
}
if(break_value < 0)
break;
}
cout << "Total time "<< ((double)total_time)/1e9 << "\n";
if(use_old_read_testcase)
fclose(fptr);
else
ifptr.close();
return 0;
}

View File

@ -97,8 +97,8 @@ int read_testcase(testcase *tc, FILE* ifp)
tc->haplen = strlen(tc->hap);
tc->rslen = strlen(tc->rs);
//assert(tc->rslen < MROWS);
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
//tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
//tc->irs = (int *) malloc(tc->rslen*sizeof(int));
tc->q = (char *) malloc(sizeof(char) * tc->rslen);
tc->i = (char *) malloc(sizeof(char) * tc->rslen);
@ -115,10 +115,10 @@ int read_testcase(testcase *tc, FILE* ifp)
tc->i[x] = _i;
tc->d[x] = _d;
tc->c[x] = _c;
tc->irs[x] = tc->rs[x];
//tc->irs[x] = tc->rs[x];
}
for (x = 0; x < tc->haplen; x++)
tc->ihap[x] = tc->hap[x];
//for (x = 0; x < tc->haplen; x++)
//tc->ihap[x] = tc->hap[x];
free(q);
@ -270,3 +270,110 @@ uint64_t get_time(struct timespec* store_struct)
start_time = *ptr;
return diff_time;
}
//#define CHECK_VALUES 1
#define BATCH_SIZE 10000
#define RUN_HYBRID
void do_compute(char* filename, bool use_old_read_testcase, unsigned chunk_size)
{
FILE* fptr = 0;
ifstream ifptr;
if(use_old_read_testcase)
{
fptr = fopen(filename,"r");
assert(fptr);
}
else
{
ifptr.open(filename);
assert(ifptr.is_open());
}
vector<testcase> tc_vector;
tc_vector.clear();
testcase tc;
uint64_t vector_compute_time = 0;
uint64_t baseline_compute_time = 0;
uint64_t num_double_calls = 0;
bool all_ok = true;
#ifndef CHECK_VALUES
all_ok = false;
#endif
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
if(break_value >= 0)
tc_vector.push_back(tc);
if(tc_vector.size() == BATCH_SIZE || (break_value < 0 && tc_vector.size() > 0))
{
vector<double> results_vec;
vector<double> baseline_results_vec;
results_vec.clear();
baseline_results_vec.clear();
results_vec.resize(tc_vector.size());
baseline_results_vec.resize(tc_vector.size());
get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
float result_avxf = g_compute_full_prob_float(&tc, 0);
double result = 0;
if (result_avxf < MIN_ACCEPTED) {
double result_avxd = g_compute_full_prob_double(&tc, 0);
result = log10(result_avxd) - log10(ldexp(1.0, 1020.0));
++num_double_calls;
}
else
result = (double)(log10f(result_avxf) - log10f(ldexpf(1.f, 120.f)));
results_vec[i] = result;
}
vector_compute_time += get_time();
#ifdef CHECK_VALUES
#pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
double baseline_result = compute_full_prob<double>(&tc);
baseline_result = log10(baseline_result) - log10(ldexp(1.0, 1020.0));
baseline_results_vec[i] = baseline_result;
}
baseline_compute_time += get_time();
for(unsigned i=0;i<tc_vector.size();++i)
{
double baseline_result = baseline_results_vec[i];
double abs_error = fabs(baseline_result-results_vec[i]);
double rel_error = (baseline_result != 0) ? fabs(abs_error/baseline_result) : 0;
if(abs_error > 1e-5 && rel_error > 1e-5)
{
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
all_ok = false;
}
}
#endif
for(unsigned i=0;i<tc_vector.size();++i)
{
delete tc_vector[i].rs;
delete tc_vector[i].hap;
delete tc_vector[i].q;
delete tc_vector[i].i;
delete tc_vector[i].d;
delete tc_vector[i].c;
}
results_vec.clear();
tc_vector.clear();
}
if(break_value < 0)
break;
}
if(all_ok)
{
cout << "All output values within acceptable error\n";
cout << "Baseline compute time "<<baseline_compute_time*1e-9<<"\n";
}
cout << "Num double invocations "<<num_double_calls<<"\n";
cout << "Vector compute time "<< vector_compute_time*1e-9 << "\n";
if(use_old_read_testcase)
fclose(fptr);
else
ifptr.close();
}

View File

@ -37,4 +37,5 @@ enum ProcessorCapabilitiesEnum
#define ENABLE_ALL_HARDWARE_FEATURES 0xFFFFFFFFFFFFFFFFull
uint64_t get_machine_capabilities();
void initialize_function_pointers(uint64_t mask=ENABLE_ALL_HARDWARE_FEATURES);
void do_compute(char* filename, bool use_old_read_testcase=true, unsigned chunk_size=10000);
#endif