Added support to transfer haplotypes once per region to the JNI

Re-use transferred haplotypes (stored in GlobalRef) across calls to
computeLikelihoods
This commit is contained in:
Karthik Gururaj 2014-01-22 10:52:41 -08:00
parent 868a8394f7
commit 733a84e4f9
13 changed files with 359 additions and 236 deletions

View File

@ -0,0 +1,76 @@
#include "LoadTimeInitializer.h"
#include "utils.h"
using namespace std;
LoadTimeInitializer g_load_time_initializer;
LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
m_sumNumReads = 0;
m_sumSquareNumReads = 0;
m_sumNumHaplotypes = 0;
m_sumSquareNumHaplotypes = 0;
m_sumNumTestcases = 0;
m_sumSquareNumTestcases = 0;
m_maxNumTestcases = 0;
m_num_invocations = 0;
m_compute_time = 0;
m_data_transfer_time = 0;
m_filename_to_fptr.clear();
initialize_function_pointers();
cout.flush();
}
void LoadTimeInitializer::print_profiling()
{
double mean_val;
cout << "Compute time "<<m_compute_time<<"\n";
cout << "Data initialization time "<<m_data_transfer_time<<"\n";
cout <<"Invocations : "<<m_num_invocations<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
mean_val = m_sumNumReads/m_num_invocations;
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumHaplotypes/m_num_invocations;
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumTestcases/m_num_invocations;
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\t"<<m_maxNumTestcases<<"\n";
cout.flush();
}
void LoadTimeInitializer::debug_dump(string filename, string s, bool to_append, bool add_newline)
{
map<string, ofstream*>::iterator mI = m_filename_to_fptr.find(filename);
ofstream* fptr = 0;
if(mI == m_filename_to_fptr.end())
{
m_filename_to_fptr[filename] = new ofstream();
fptr = m_filename_to_fptr[filename];
fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
assert(fptr->is_open());
}
else
fptr = (*mI).second;
//ofstream fptr;
//fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
(*fptr) << s;
if(add_newline)
(*fptr) << "\n";
//fptr.close();
}
void LoadTimeInitializer::debug_close()
{
for(map<string,ofstream*>::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end();
mB != mE;mB++)
{
(*mB).second->close();
delete (*mB).second;
}
m_filename_to_fptr.clear();
}

View File

@ -0,0 +1,37 @@
#ifndef LOAD_TIME_INITIALIZER_H
#define LOAD_TIME_INITIALIZER_H
#include "headers.h"
#include <jni.h>
class LoadTimeInitializer
{
public:
LoadTimeInitializer(); //will be called when library is loaded
void print_profiling();
void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true);
void debug_close();
jfieldID m_readBasesFID;
jfieldID m_readQualsFID;
jfieldID m_insertionGOPFID;
jfieldID m_deletionGOPFID;
jfieldID m_overallGCPFID;
jfieldID m_haplotypeBasesFID;
//used to compute avg, variance of #testcases
double m_sumNumReads;
double m_sumSquareNumReads;
double m_sumNumHaplotypes;
double m_sumSquareNumHaplotypes;
double m_sumNumTestcases;
double m_sumSquareNumTestcases;
unsigned m_maxNumTestcases;
unsigned m_num_invocations;
//timing
double m_compute_time;
double m_data_transfer_time;
private:
std::map<std::string, std::ofstream*> m_filename_to_fptr;
};
extern LoadTimeInitializer g_load_time_initializer;
#endif

View File

@ -1,4 +1,5 @@
#OMPCFLAGS=-fopenmp
OMPCFLAGS=-fopenmp
OMPLFLAGS=-fopenmp #-openmp-link static
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
@ -19,7 +20,7 @@ DEPDIR=.deps
DF=$(DEPDIR)/$(*).d
#Common across libJNI and sandbox
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc
COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc LoadTimeInitializer.cc
#Part of libJNI
LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES)
SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc
@ -28,7 +29,7 @@ COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o)
#No vectorization for these files
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc
NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc
#Use -xAVX for these files
AVX_SOURCES=avx_function_instantiations.cc
#Use -xSSE4.2 for these files
@ -47,13 +48,13 @@ all: $(BIN)
-include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d))
checker: pairhmm-1-base.o $(COMMON_OBJECTS)
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
$(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS)
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
libJNILoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) $(OMPCFLAGS) -shared -o $@ $(LIBOBJECTS)
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS)
$(OBJECTS): %.o: %.cc

View File

@ -11,96 +11,10 @@
#include "template.h"
#include "utils.h"
#include "LoadTimeInitializer.h"
using namespace std;
class LoadTimeInitializer
{
public:
LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
m_sumNumReads = 0;
m_sumSquareNumReads = 0;
m_sumNumHaplotypes = 0;
m_sumSquareNumHaplotypes = 0;
m_sumNumTestcases = 0;
m_sumSquareNumTestcases = 0;
m_maxNumTestcases = 0;
m_num_invocations = 0;
m_filename_to_fptr.clear();
initialize_function_pointers();
cout.flush();
}
void print_profiling()
{
double mean_val;
cout <<"Invocations : "<<m_num_invocations<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
mean_val = m_sumNumReads/m_num_invocations;
cout << "reads\t"<<m_sumNumReads<<"\t"<<m_sumSquareNumReads<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumReads/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumHaplotypes/m_num_invocations;
cout << "haplotypes\t"<<m_sumNumHaplotypes<<"\t"<<m_sumSquareNumHaplotypes<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumHaplotypes/m_num_invocations)-mean_val*mean_val<<"\n";
mean_val = m_sumNumTestcases/m_num_invocations;
cout << "numtestcases\t"<<m_sumNumTestcases<<"\t"<<m_sumSquareNumTestcases<<"\t"<<mean_val<<"\t"<<
(m_sumSquareNumTestcases/m_num_invocations)-mean_val*mean_val<<"\t"<<m_maxNumTestcases<<"\n";
cout.flush();
}
void debug_dump(string filename, string s, bool to_append, bool add_newline=true)
{
map<string, ofstream*>::iterator mI = m_filename_to_fptr.find(filename);
ofstream* fptr = 0;
if(mI == m_filename_to_fptr.end())
{
m_filename_to_fptr[filename] = new ofstream();
fptr = m_filename_to_fptr[filename];
fptr->open(filename.c_str(), to_append ? ios::app : ios::out);
assert(fptr->is_open());
}
else
fptr = (*mI).second;
//ofstream fptr;
//fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out);
(*fptr) << s;
if(add_newline)
(*fptr) << "\n";
//fptr.close();
}
void debug_close()
{
for(map<string,ofstream*>::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end();
mB != mE;mB++)
{
(*mB).second->close();
delete (*mB).second;
}
m_filename_to_fptr.clear();
}
jfieldID m_readBasesFID;
jfieldID m_readQualsFID;
jfieldID m_insertionGOPFID;
jfieldID m_deletionGOPFID;
jfieldID m_overallGCPFID;
jfieldID m_haplotypeBasesFID;
//used to compute avg, variance of #testcases
double m_sumNumReads;
double m_sumSquareNumReads;
double m_sumNumHaplotypes;
double m_sumSquareNumHaplotypes;
double m_sumNumTestcases;
double m_sumSquareNumTestcases;
unsigned m_maxNumTestcases;
unsigned m_num_invocations;
private:
map<string, ofstream*> m_filename_to_fptr;
};
LoadTimeInitializer g_load_time_initializer;
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities
(JNIEnv* env, jclass thisObject,
jobjectArray transition, jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP
@ -128,7 +42,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePrior
//Gets direct access to Java arrays
#define GET_BYTE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
#define JNI_RELEASE_MODE 0
#define JNI_RO_RELEASE_MODE JNI_ABORT
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical
@ -136,7 +50,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePrior
//Likely makes copy of Java arrays to JNI C++ space
#define GET_BYTE_ARRAY_ELEMENTS env->GetByteArrayElements
#define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleaseByteArrayElements
#define JNI_RELEASE_MODE JNI_ABORT
#define JNI_RO_RELEASE_MODE JNI_ABORT
#define GET_DOUBLE_ARRAY_ELEMENTS env->GetDoubleArrayElements
#define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleaseDoubleArrayElements
@ -187,12 +101,12 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadL
#endif
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RO_RELEASE_MODE);
return 0.0;
}
@ -226,6 +140,46 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
g_load_time_initializer.m_haplotypeBasesFID = fid;
}
//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region,
//transfer the list only once
vector<pair<jbyteArray, jbyte*> > g_haplotypeBasesArrayVector;
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeHaplotypes
(JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray)
{
jboolean is_copy = JNI_FALSE;
//To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
haplotypeBasesArrayVector.clear();
haplotypeBasesArrayVector.resize(numHaplotypes);
for(unsigned j=0;j<numHaplotypes;++j)
{
jobject haplotypeObject = env->GetObjectArrayElement(haplotypeDataArray, j);
jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID);
#ifdef ENABLE_ASSERTIONS
assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str());
#endif
//Need a global reference as this will be accessed across multiple JNI calls to JNIComputeLikelihoods()
jbyteArray haplotypeBasesGlobalRef = (jbyteArray)env->NewGlobalRef(haplotypeBases);
#ifdef ENABLE_ASSERTIONS
assert(haplotypeBasesGlobalRef && ("Could not get global ref to haplotypeBases at index : "+to_string(j)+"\n").c_str());
#endif
env->DeleteLocalRef(haplotypeBases); //free the local reference
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBasesGlobalRef, &is_copy);
#ifdef ENABLE_ASSERTIONS
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
assert(env->GetArrayLength(haplotypeBasesGlobalRef) < MCOLS);
#endif
#ifdef DEBUG0_1
cout << "JNI haplotype length "<<env->GetArrayLength(haplotypeBasesGlobalRef)<<"\n";
#endif
haplotypeBasesArrayVector[j] = make_pair(haplotypeBasesGlobalRef, haplotypeBasesArray);
#ifdef DEBUG3
for(unsigned k=0;k<env->GetArrayLength(haplotypeBases);++k)
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
#endif
}
}
//JNI function to invoke compute_full_prob_avx
//readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc
//haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases
@ -235,37 +189,16 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
(JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes,
jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse)
{
jboolean is_copy = JNI_FALSE;
//To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector
vector<pair<jbyteArray, jbyte*> > haplotypeBasesArrayVector;
haplotypeBasesArrayVector.clear();
haplotypeBasesArrayVector.resize(numHaplotypes);
#ifdef DEBUG0_1
cout << "JNI numReads "<<numReads<<" numHaplotypes "<<numHaplotypes<<"\n";
#endif
for(unsigned j=0;j<numHaplotypes;++j)
{
jobject haplotypeObject = env->GetObjectArrayElement(haplotypeDataArray, j);
jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID);
#ifdef ENABLE_ASSERTIONS
assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str());
#endif
jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy);
#ifdef ENABLE_ASSERTIONS
assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI");
assert(env->GetArrayLength(haplotypeBases) < MCOLS);
#endif
#ifdef DEBUG0_1
cout << "JNI haplotype length "<<env->GetArrayLength(haplotypeBases)<<"\n";
#endif
haplotypeBasesArrayVector[j] = make_pair(haplotypeBases, haplotypeBasesArray);
#ifdef DEBUG3
for(unsigned k=0;k<env->GetArrayLength(haplotypeBases);++k)
g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true);
#endif
}
double start_time = 0;
//haplotype vector from earlier store - note the reference to vector, not copying
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
jboolean is_copy = JNI_FALSE;
unsigned numTestCases = numReads*numHaplotypes;
//vector to store results
vector<testcase> tc_array;
tc_array.clear();
tc_array.resize(numTestCases);
@ -274,6 +207,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
vector<vector<pair<jbyteArray,jbyte*> > > readBasesArrayVector;
readBasesArrayVector.clear();
readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING
start_time = getCurrClk();
#endif
for(unsigned i=0;i<numReads;++i)
{
//Get bytearray fields from read
@ -340,25 +276,30 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
tc_array[tc_idx].d[k] = (int)deletionGOPArray[k];
tc_array[tc_idx].c[k] = (int)overallGCPArray[k];
}
++tc_idx;
}
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RELEASE_MODE); //order of GET-RELEASE is important
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE); //order of GET-RELEASE is important
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
//Release readBases at end because it is used by compute_full_prob
readBasesArrayVector[i].clear();
readBasesArrayVector[i].resize(1);
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
}
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
#endif
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
#ifdef ENABLE_ASSERTIONS
assert(likelihoodDoubleArray && "likelihoodArray is NULL");
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
#endif
#ifdef DO_PROFILING
start_time = getCurrClk();
#endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
@ -373,27 +314,31 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
likelihoodDoubleArray[tc_idx] = result;
}
#ifdef DO_PROFILING
g_load_time_initializer.m_compute_time += (getCurrClk()-start_time);
#endif
#ifdef DEBUG
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
{
g_load_time_initializer.debug_dump("return_values_jni.txt",to_string(likelihoodDoubleArray[tc_idx]),true);
}
#endif
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0);
#ifdef DO_PROFILING
start_time = getCurrClk();
#endif
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory
//Release read arrays first
for(int i=readBasesArrayVector.size()-1;i>=0;--i)//note the order - reverse of GET
{
for(int j=readBasesArrayVector[i].size()-1;j>=0;--j)
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE);
readBasesArrayVector[i].clear();
}
readBasesArrayVector.clear();
//Now release haplotype arrays
for(int j=numHaplotypes-1;j>=0;--j) //note the order - reverse of GET
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RELEASE_MODE);
haplotypeBasesArrayVector.clear();
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
#endif
tc_array.clear();
#ifdef DO_PROFILING
g_load_time_initializer.m_sumNumReads += numReads;
@ -411,6 +356,21 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
#endif
}
//Release haplotypes at the end of a region
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniFinalizeRegion
(JNIEnv * env, jobject thisObject)
{
vector<pair<jbyteArray, jbyte*> >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector;
//Now release haplotype arrays
for(int j=haplotypeBasesArrayVector.size()-1;j>=0;--j) //note the order - reverse of GET
{
RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RO_RELEASE_MODE);
env->DeleteGlobalRef(haplotypeBasesArrayVector[j].first); //free the global reference
}
haplotypeBasesArrayVector.clear();
}
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose
(JNIEnv* env, jobject thisObject)
{

View File

@ -21,6 +21,34 @@ extern "C" {
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToDeletion 4L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion 5L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug 0L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_verify
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_verify 0L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug0_1
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug0_1 0L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug1
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug1 0L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug2
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug2 0L
#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug3
#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug3 0L
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniGlobalInit
* Signature: (Ljava/lang/Class;Ljava/lang/Class;)V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit
(JNIEnv *, jobject, jclass, jclass);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniClose
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose
(JNIEnv *, jobject);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniInitialize
@ -45,39 +73,38 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePriorsAndUpdateCells
(JNIEnv *, jobject, jboolean, jint, jint, jbyteArray, jbyteArray, jbyteArray, jint);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniSubComputeReadLikelihoodGivenHaplotypeLog10
* Signature: (II[B[B[B[B[B[BI)D
*/
JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10
(JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint);
(JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniGlobalInit
* Signature: (Ljava/lang/Class;Ljava/lang/Class;)V
* Method: jniInitializeHaplotypes
* Signature: (I[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;)V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit
(JNIEnv *, jobject, jclass, jclass);
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeHaplotypes
(JNIEnv *, jobject, jint, jobjectArray);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniFinalizeRegion
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniFinalizeRegion
(JNIEnv *, jobject);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniComputeLikelihoods
* Signature: ([Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;[D)V
* Signature: (II[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;[DI)V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods
(JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint);
/*
* Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM
* Method: jniClose
* Signature: ()V
*/
JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose
(JNIEnv *, jobject);
#ifdef __cplusplus
}
#endif

View File

@ -4,26 +4,15 @@
#include "headers.h"
#include "template.h"
#include "utils.h"
#include "LoadTimeInitializer.h"
using namespace std;
class LoadTimeInitializer
{
public:
LoadTimeInitializer() //will be called when library is loaded
{
ConvertChar::init();
}
};
LoadTimeInitializer g_load_time_initializer;
#define BATCH_SIZE 10000
#define RUN_HYBRID
double getCurrClk() {
struct timeval tv ;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
int main(int argc, char** argv)
{
@ -39,9 +28,6 @@ int main(int argc, char** argv)
if(argc >= 4)
chunk_size = strtol(argv[3],0,10);
initialize_function_pointers();
std::ifstream ifptr;
FILE* fptr = 0;
if(use_old_read_testcase)

View File

@ -10,14 +10,9 @@
#define BATCH_SIZE 10000
#define RUN_HYBRID
double getCurrClk();
int thread_level_parallelism_enabled = false ;
double getCurrClk() {
struct timeval tv ;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
void print128b_F(__m128 x)
{
float *p = (float *)(&x);

View File

@ -1,6 +1,6 @@
#!/bin/bash
rm -f *.txt
export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
rm -f *.txt *.log
GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \

View File

@ -240,3 +240,8 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
return tokens.size();
}
double getCurrClk() {
struct timeval tv ;
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}

View File

@ -1,6 +1,7 @@
#ifndef PAIRHMM_UTIL_H
#define PAIRHMM_UTIL_H
#include "template.h"
template<class T>
std::string to_string(T obj)
@ -25,4 +26,5 @@ void debug_dump(std::string filename, std::string s, bool to_append, bool add_ne
template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
void initialize_function_pointers();
double getCurrClk();
#endif

View File

@ -335,6 +335,11 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH);
}
private void finalizePairHMM()
{
pairHMMThreadLocal.get().finalizeRegion();
}
@Override
public Map<String, PerReadAlleleLikelihoodMap> computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final Map<String, List<GATKSAMRecord>> perSampleReadList ) {
@ -352,6 +357,9 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation
map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE);
stratifiedReadMap.put(sampleEntry.getKey(), map);
}
//Used mostly by the JNI implementation(s) to free arrays
finalizePairHMM();
return stratifiedReadMap;
}

View File

@ -68,8 +68,8 @@ import java.util.HashMap;
public class JNILoglessPairHMM extends LoglessPairHMM {
private static final boolean debug = false; //simulates ifdef
private static final boolean verify = debug || false; //simulates ifdef
private static final boolean debug0_1 = false; //simulates ifdef
private static final boolean verify = debug || true; //simulates ifdef
private static final boolean debug0_1 = true; //simulates ifdef
private static final boolean debug1 = false; //simulates ifdef
private static final boolean debug2 = false;
private static final boolean debug3 = false;
@ -148,10 +148,10 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
jniInitialize(readMaxLength, haplotypeMaxLength);
}
private native void jniInitialize(final int numHaplotypes, final int numTotalReads, JNIHaplotypeDataHolderClass[] haplotypeDataArray,
JNIReadDataHolderClass[] readDataArray);
HashMap<String, Integer> sampleToIndex = new HashMap<String, Integer>();
//Used to transfer data to JNI
private HashMap<Haplotype,Integer> haplotypeToHaplotypeListIdxMap = new HashMap<Haplotype,Integer>();
private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray);
//Used to transfer data to JNI
//Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region
/**
* {@inheritDoc}
*/
@ -159,7 +159,6 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
public void initialize( final List<Haplotype> haplotypes, final Map<String, List<GATKSAMRecord>> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) {
if(verify)
super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength);
/*
int numHaplotypes = haplotypes.size();
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
int idx = 0;
@ -167,32 +166,22 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
{
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
haplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases();
haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx);
++idx;
}
sampleToIndex.clear();
int totalNumReads = 0;
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
{
sampleToIndex.put(currEntry.getKey(), totalNumReads);
totalNumReads += currEntry.getValue().size();
}
jniInitializeHaplotypes(numHaplotypes, haplotypeDataArray);
}
JNIHaplotypeDataHolderClass[] readDataArray = new JNIReadDataHolderClass[totalNumReads];
idx = 0;
for(final Map.Entry<String, List<GATKSAMRecord>> currEntry : perSampleReadList)
{
for(GATKSAMRecord read : currEntry.getValue())
{
readDataArray[idx] = new JNIReadDataHolderClass();
readDataArray[idx].readBases = read.getReadBases();
readDataArray[idx].readQuals = read.getBaseQualities();
readDataArray[idx].insertionGOP = read.getBaseInsertionQualities();
readDataArray[idx].deletionGOP = read.getBaseDeletionQualities();
readDataArray[idx].overallGCP = GCPArrayMap.get(read);
++idx;
}
}
jniInitialize(numHaplotypes, numTotalReads, haplotypeDataArray, readDataArray);*/
private native void jniFinalizeRegion();
//Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not
//accessing Java memory directly, still important to release memory from C++
/**
* {@inheritDoc}
*/
@Override
public void finalizeRegion()
{
jniFinalizeRegion();
}
//Real compute kernel
@ -216,10 +205,10 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode");
}
int readListSize = reads.size();
int alleleHaplotypeMapSize = alleleHaplotypeMap.size();
int numTestcases = readListSize*alleleHaplotypeMapSize;
int numHaplotypes = alleleHaplotypeMap.size();
int numTestcases = readListSize*numHaplotypes;
if(debug0_1)
System.out.println("Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize);
System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize];
int idx = 0;
for(GATKSAMRecord read : reads)
@ -246,39 +235,68 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
}
++idx;
}
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[alleleHaplotypeMapSize];
idx = 0;
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes];
if(verify)
{
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
haplotypeDataArray[idx].haplotypeBases = currEntry.getValue().getBases();
if(debug0_1)
System.out.println("Java haplotype length "+haplotypeDataArray[idx].haplotypeBases.length);
if(debug3)
idx = 0;
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always
{
for(int i=0;i<haplotypeDataArray[idx].haplotypeBases.length;++i)
debugDump("haplotype_bases_java.txt",String.format("%d\n",(int)haplotypeDataArray[idx].haplotypeBases[i]),true);
haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass();
haplotypeDataArray[idx].haplotypeBases = currEntry.getValue().getBases();
if(debug0_1)
System.out.println("Java haplotype length "+haplotypeDataArray[idx].haplotypeBases.length);
if(debug3)
{
for(int i=0;i<haplotypeDataArray[idx].haplotypeBases.length;++i)
debugDump("haplotype_bases_java.txt",String.format("%d\n",(int)haplotypeDataArray[idx].haplotypeBases[i]),true);
}
++idx;
}
++idx;
}
double[] likelihoodArray = new double[readListSize*alleleHaplotypeMapSize]; //to store results
double[] likelihoodArray = new double[readListSize*numHaplotypes]; //to store results
//for(reads)
// for(haplotypes)
// compute_full_prob()
jniComputeLikelihoods(readListSize, alleleHaplotypeMapSize, readDataArray, haplotypeDataArray, likelihoodArray, 12);
jniComputeLikelihoods(readListSize, numHaplotypes, readDataArray, haplotypeDataArray, likelihoodArray, 12);
//final PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
PerReadAlleleLikelihoodMap likelihoodMap = new PerReadAlleleLikelihoodMap();
idx = 0;
int idxInsideHaplotypeList = 0;
int readIdx = 0;
for(GATKSAMRecord read : reads)
{
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
{
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]);
//Since the order of haplotypes in the List<Haplotype> and alleleHaplotypeMap is different,
//get idx of current haplotype in the list and use this idx to get the right likelihoodValue
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[readIdx + idxInsideHaplotypeList]);
++idx;
}
readIdx += numHaplotypes;
}
if(verify)
{
//re-order values in likelihoodArray
double[] tmpArray = new double[numHaplotypes];
idx = 0;
idxInsideHaplotypeList = 0;
readIdx = 0;
for(GATKSAMRecord read : reads)
{
for(int j=0;j<numHaplotypes;++j)
tmpArray[j] = likelihoodArray[readIdx+j];
for (Map.Entry<Allele, Haplotype> currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always
{
idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue());
likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList];
++idx;
}
readIdx += numHaplotypes;
}
//to compare values
likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap);
//for floating point values, no exact equality
@ -304,23 +322,23 @@ public class JNILoglessPairHMM extends LoglessPairHMM {
if(toDump)
{
idx = 0;
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize);
System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes);
for(int i=0;i<readDataArray.length;++i)
{
for(int j=0;j<haplotypeDataArray.length;++j)
{
debugDump("debug_dump.log",new String(haplotypeDataArray[j].haplotypeBases)+" ",true);
debugDump("debug_dump.log",new String(readDataArray[i].readBases)+" ",true);
debugDump("debug_dump.txt",new String(haplotypeDataArray[j].haplotypeBases)+" ",true);
debugDump("debug_dump.txt",new String(readDataArray[i].readBases)+" ",true);
for(int k=0;k<readDataArray[i].readBases.length;++k)
debugDump("debug_dump.log",String.format("%d",(int)(readDataArray[i].readQuals[k]))+" ",true);
debugDump("debug_dump.txt",String.format("%d",(int)(readDataArray[i].readQuals[k]))+" ",true);
for(int k=0;k<readDataArray[i].readBases.length;++k)
debugDump("debug_dump.log",String.format("%d",(int)(readDataArray[i].insertionGOP[k]))+" ",true);
debugDump("debug_dump.txt",String.format("%d",(int)(readDataArray[i].insertionGOP[k]))+" ",true);
for(int k=0;k<readDataArray[i].readBases.length;++k)
debugDump("debug_dump.log",String.format("%d",(int)(readDataArray[i].deletionGOP[k]))+" ",true);
debugDump("debug_dump.txt",String.format("%d",(int)(readDataArray[i].deletionGOP[k]))+" ",true);
for(int k=0;k<readDataArray[i].readBases.length;++k)
debugDump("debug_dump.log",String.format("%d",(int)(readDataArray[i].overallGCP[k]))+" ",true);
debugDump("debug_dump.log","\n",true);
debugDump("debug_results.log",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
debugDump("debug_dump.txt",String.format("%d",(int)(readDataArray[i].overallGCP[k]))+" ",true);
debugDump("debug_dump.txt","\n",true);
debugDump("debug_results.txt",String.format("%e %e\n",mLikelihoodArray[idx],likelihoodArray[idx]),true);
++idx;
}
}

View File

@ -100,6 +100,14 @@ public abstract class PairHMM {
initialized = true;
}
/**
* Called at the end of PairHMM for a region - mostly used by the JNI implementations
*/
public void finalizeRegion()
{
;
}
/**
* Initialize this PairHMM, making it suitable to run against a read and haplotype with given lengths
* This function is used by the JNI implementations to transfer all data once to the native code