From 733a84e4f9e04885e6a6aa9723973057bc1693a0 Mon Sep 17 00:00:00 2001 From: Karthik Gururaj Date: Wed, 22 Jan 2014 10:52:41 -0800 Subject: [PATCH] Added support to transfer haplotypes once per region to the JNI Re-use transferred haplotypes (stored in GlobalRef) across calls to computeLikelihoods --- PairHMM_JNI/LoadTimeInitializer.cc | 76 ++++++ PairHMM_JNI/LoadTimeInitializer.h | 37 +++ PairHMM_JNI/Makefile | 13 +- ...e_sting_utils_pairhmm_JNILoglessPairHMM.cc | 228 ++++++++---------- ...te_sting_utils_pairhmm_JNILoglessPairHMM.h | 57 +++-- PairHMM_JNI/pairhmm-1-base.cc | 20 +- PairHMM_JNI/pairhmm-template-main.cc | 7 +- PairHMM_JNI/run.sh | 4 +- PairHMM_JNI/utils.cc | 5 + PairHMM_JNI/utils.h | 2 + .../PairHMMLikelihoodCalculationEngine.java | 8 + .../utils/pairhmm/JNILoglessPairHMM.java | 130 +++++----- .../sting/utils/pairhmm/PairHMM.java | 8 + 13 files changed, 359 insertions(+), 236 deletions(-) create mode 100644 PairHMM_JNI/LoadTimeInitializer.cc create mode 100644 PairHMM_JNI/LoadTimeInitializer.h diff --git a/PairHMM_JNI/LoadTimeInitializer.cc b/PairHMM_JNI/LoadTimeInitializer.cc new file mode 100644 index 000000000..946451952 --- /dev/null +++ b/PairHMM_JNI/LoadTimeInitializer.cc @@ -0,0 +1,76 @@ +#include "LoadTimeInitializer.h" +#include "utils.h" +using namespace std; + +LoadTimeInitializer g_load_time_initializer; + +LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loaded +{ + ConvertChar::init(); + m_sumNumReads = 0; + m_sumSquareNumReads = 0; + m_sumNumHaplotypes = 0; + m_sumSquareNumHaplotypes = 0; + m_sumNumTestcases = 0; + m_sumSquareNumTestcases = 0; + m_maxNumTestcases = 0; + m_num_invocations = 0; + m_compute_time = 0; + m_data_transfer_time = 0; + + m_filename_to_fptr.clear(); + + initialize_function_pointers(); + cout.flush(); +} + +void LoadTimeInitializer::print_profiling() +{ + double mean_val; + cout << "Compute time "<::iterator mI = m_filename_to_fptr.find(filename); + ofstream* fptr = 0; + if(mI == m_filename_to_fptr.end()) + { + m_filename_to_fptr[filename] = new ofstream(); + fptr = m_filename_to_fptr[filename]; + fptr->open(filename.c_str(), to_append ? ios::app : ios::out); + assert(fptr->is_open()); + } + else + fptr = (*mI).second; + //ofstream fptr; + //fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); + (*fptr) << s; + if(add_newline) + (*fptr) << "\n"; + //fptr.close(); +} +void LoadTimeInitializer::debug_close() +{ + for(map::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end(); + mB != mE;mB++) + { + (*mB).second->close(); + delete (*mB).second; + } + m_filename_to_fptr.clear(); +} + diff --git a/PairHMM_JNI/LoadTimeInitializer.h b/PairHMM_JNI/LoadTimeInitializer.h new file mode 100644 index 000000000..8a03ea0f4 --- /dev/null +++ b/PairHMM_JNI/LoadTimeInitializer.h @@ -0,0 +1,37 @@ +#ifndef LOAD_TIME_INITIALIZER_H +#define LOAD_TIME_INITIALIZER_H +#include "headers.h" +#include +class LoadTimeInitializer +{ + public: + LoadTimeInitializer(); //will be called when library is loaded + void print_profiling(); + void debug_dump(std::string filename, std::string s, bool to_append, bool add_newline=true); + void debug_close(); + + jfieldID m_readBasesFID; + jfieldID m_readQualsFID; + jfieldID m_insertionGOPFID; + jfieldID m_deletionGOPFID; + jfieldID m_overallGCPFID; + jfieldID m_haplotypeBasesFID; + //used to compute avg, variance of #testcases + double m_sumNumReads; + double m_sumSquareNumReads; + double m_sumNumHaplotypes; + double m_sumSquareNumHaplotypes; + double m_sumNumTestcases; + double m_sumSquareNumTestcases; + unsigned m_maxNumTestcases; + unsigned m_num_invocations; + //timing + double m_compute_time; + double m_data_transfer_time; + private: + std::map m_filename_to_fptr; +}; +extern LoadTimeInitializer g_load_time_initializer; + + +#endif diff --git a/PairHMM_JNI/Makefile b/PairHMM_JNI/Makefile index f6dfbb1f0..7506279ef 100644 --- a/PairHMM_JNI/Makefile +++ b/PairHMM_JNI/Makefile @@ -1,4 +1,5 @@ -#OMPCFLAGS=-fopenmp +OMPCFLAGS=-fopenmp +OMPLFLAGS=-fopenmp #-openmp-link static #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas #CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas @@ -19,7 +20,7 @@ DEPDIR=.deps DF=$(DEPDIR)/$(*).d #Common across libJNI and sandbox -COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc +COMMON_SOURCES=utils.cc avx_function_instantiations.cc baseline.cc sse_function_instantiations.cc LoadTimeInitializer.cc #Part of libJNI LIBSOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc $(COMMON_SOURCES) SOURCES=$(LIBSOURCES) pairhmm-template-main.cc pairhmm-1-base.cc @@ -28,7 +29,7 @@ COMMON_OBJECTS=$(COMMON_SOURCES:.cc=.o) #No vectorization for these files -NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc +NO_VECTOR_SOURCES=org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc pairhmm-template-main.cc pairhmm-1-base.cc utils.cc baseline.cc LoadTimeInitializer.cc #Use -xAVX for these files AVX_SOURCES=avx_function_instantiations.cc #Use -xSSE4.2 for these files @@ -47,13 +48,13 @@ all: $(BIN) -include $(addprefix $(DEPDIR)/,$(SOURCES:.cc=.d)) checker: pairhmm-1-base.o $(COMMON_OBJECTS) - $(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS) + $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS) - $(CXX) $(OMPCFLAGS) -o $@ $^ $(LDFLAGS) + $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) libJNILoglessPairHMM.so: $(LIBOBJECTS) - $(CXX) $(OMPCFLAGS) -shared -o $@ $(LIBOBJECTS) + $(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) $(OBJECTS): %.o: %.cc diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc index a746bc265..741957fc0 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.cc @@ -11,96 +11,10 @@ #include "template.h" #include "utils.h" +#include "LoadTimeInitializer.h" using namespace std; -class LoadTimeInitializer -{ - public: - LoadTimeInitializer() //will be called when library is loaded - { - ConvertChar::init(); - m_sumNumReads = 0; - m_sumSquareNumReads = 0; - m_sumNumHaplotypes = 0; - m_sumSquareNumHaplotypes = 0; - m_sumNumTestcases = 0; - m_sumSquareNumTestcases = 0; - m_maxNumTestcases = 0; - m_num_invocations = 0; - m_filename_to_fptr.clear(); - - initialize_function_pointers(); - cout.flush(); - } - void print_profiling() - { - double mean_val; - cout <<"Invocations : "<::iterator mI = m_filename_to_fptr.find(filename); - ofstream* fptr = 0; - if(mI == m_filename_to_fptr.end()) - { - m_filename_to_fptr[filename] = new ofstream(); - fptr = m_filename_to_fptr[filename]; - fptr->open(filename.c_str(), to_append ? ios::app : ios::out); - assert(fptr->is_open()); - } - else - fptr = (*mI).second; - //ofstream fptr; - //fptr.open(filename.c_str(), to_append ? ofstream::app : ofstream::out); - (*fptr) << s; - if(add_newline) - (*fptr) << "\n"; - //fptr.close(); - } - void debug_close() - { - for(map::iterator mB = m_filename_to_fptr.begin(), mE = m_filename_to_fptr.end(); - mB != mE;mB++) - { - (*mB).second->close(); - delete (*mB).second; - } - m_filename_to_fptr.clear(); - } - jfieldID m_readBasesFID; - jfieldID m_readQualsFID; - jfieldID m_insertionGOPFID; - jfieldID m_deletionGOPFID; - jfieldID m_overallGCPFID; - jfieldID m_haplotypeBasesFID; - //used to compute avg, variance of #testcases - double m_sumNumReads; - double m_sumSquareNumReads; - double m_sumNumHaplotypes; - double m_sumSquareNumHaplotypes; - double m_sumNumTestcases; - double m_sumSquareNumTestcases; - unsigned m_maxNumTestcases; - unsigned m_num_invocations; - private: - map m_filename_to_fptr; -}; -LoadTimeInitializer g_load_time_initializer; - - JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeProbabilities (JNIEnv* env, jclass thisObject, jobjectArray transition, jbyteArray insertionGOP, jbyteArray deletionGOP, jbyteArray overallGCP @@ -128,7 +42,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePrior //Gets direct access to Java arrays #define GET_BYTE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical #define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical -#define JNI_RELEASE_MODE 0 +#define JNI_RO_RELEASE_MODE JNI_ABORT #define GET_DOUBLE_ARRAY_ELEMENTS env->GetPrimitiveArrayCritical #define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleasePrimitiveArrayCritical @@ -136,7 +50,7 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePrior //Likely makes copy of Java arrays to JNI C++ space #define GET_BYTE_ARRAY_ELEMENTS env->GetByteArrayElements #define RELEASE_BYTE_ARRAY_ELEMENTS env->ReleaseByteArrayElements -#define JNI_RELEASE_MODE JNI_ABORT +#define JNI_RO_RELEASE_MODE JNI_ABORT #define GET_DOUBLE_ARRAY_ELEMENTS env->GetDoubleArrayElements #define RELEASE_DOUBLE_ARRAY_ELEMENTS env->ReleaseDoubleArrayElements @@ -187,12 +101,12 @@ Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadL #endif - RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RELEASE_MODE); - RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RELEASE_MODE); - RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RELEASE_MODE); - RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RELEASE_MODE); - RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RELEASE_MODE); - RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBases, haplotypeBasesArray, JNI_RO_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(readBases, readBasesArray, JNI_RO_RELEASE_MODE); return 0.0; } @@ -226,6 +140,46 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai g_load_time_initializer.m_haplotypeBasesFID = fid; } +//Since the list of haplotypes against which the reads are evaluated in PairHMM is the same for a region, +//transfer the list only once +vector > g_haplotypeBasesArrayVector; +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeHaplotypes + (JNIEnv * env, jobject thisObject, jint numHaplotypes, jobjectArray haplotypeDataArray) +{ + jboolean is_copy = JNI_FALSE; + //To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector + vector >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; + haplotypeBasesArrayVector.clear(); + haplotypeBasesArrayVector.resize(numHaplotypes); + for(unsigned j=0;jGetObjectArrayElement(haplotypeDataArray, j); + jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str()); +#endif + //Need a global reference as this will be accessed across multiple JNI calls to JNIComputeLikelihoods() + jbyteArray haplotypeBasesGlobalRef = (jbyteArray)env->NewGlobalRef(haplotypeBases); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBasesGlobalRef && ("Could not get global ref to haplotypeBases at index : "+to_string(j)+"\n").c_str()); +#endif + env->DeleteLocalRef(haplotypeBases); //free the local reference + jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBasesGlobalRef, &is_copy); +#ifdef ENABLE_ASSERTIONS + assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI"); + assert(env->GetArrayLength(haplotypeBasesGlobalRef) < MCOLS); +#endif +#ifdef DEBUG0_1 + cout << "JNI haplotype length "<GetArrayLength(haplotypeBasesGlobalRef)<<"\n"; +#endif + haplotypeBasesArrayVector[j] = make_pair(haplotypeBasesGlobalRef, haplotypeBasesArray); +#ifdef DEBUG3 + for(unsigned k=0;kGetArrayLength(haplotypeBases);++k) + g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true); +#endif + } +} + //JNI function to invoke compute_full_prob_avx //readDataArray - array of JNIReadDataHolderClass objects which contain the readBases, readQuals etc //haplotypeDataArray - array of JNIHaplotypeDataHolderClass objects which contain the haplotypeBases @@ -235,37 +189,16 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai (JNIEnv* env, jobject thisObject, jint numReads, jint numHaplotypes, jobjectArray readDataArray, jobjectArray haplotypeDataArray, jdoubleArray likelihoodArray, jint maxNumThreadsToUse) { - jboolean is_copy = JNI_FALSE; - //To ensure, GET_BYTE_ARRAY_ELEMENTS is invoked only once for each haplotype, store bytearrays in a vector - vector > haplotypeBasesArrayVector; - haplotypeBasesArrayVector.clear(); - haplotypeBasesArrayVector.resize(numHaplotypes); #ifdef DEBUG0_1 cout << "JNI numReads "<GetObjectArrayElement(haplotypeDataArray, j); - jbyteArray haplotypeBases = (jbyteArray)env->GetObjectField(haplotypeObject, g_load_time_initializer.m_haplotypeBasesFID); -#ifdef ENABLE_ASSERTIONS - assert(haplotypeBases && ("haplotypeBases is NULL at index : "+to_string(j)+"\n").c_str()); -#endif - jbyte* haplotypeBasesArray = (jbyte*)GET_BYTE_ARRAY_ELEMENTS(haplotypeBases, &is_copy); -#ifdef ENABLE_ASSERTIONS - assert(haplotypeBasesArray && "haplotypeBasesArray not initialized in JNI"); - assert(env->GetArrayLength(haplotypeBases) < MCOLS); -#endif -#ifdef DEBUG0_1 - cout << "JNI haplotype length "<GetArrayLength(haplotypeBases)<<"\n"; -#endif - haplotypeBasesArrayVector[j] = make_pair(haplotypeBases, haplotypeBasesArray); -#ifdef DEBUG3 - for(unsigned k=0;kGetArrayLength(haplotypeBases);++k) - g_load_time_initializer.debug_dump("haplotype_bases_jni.txt",to_string((int)haplotypeBasesArray[k]),true); -#endif - } + double start_time = 0; + //haplotype vector from earlier store - note the reference to vector, not copying + vector >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; + jboolean is_copy = JNI_FALSE; unsigned numTestCases = numReads*numHaplotypes; + //vector to store results vector tc_array; tc_array.clear(); tc_array.resize(numTestCases); @@ -274,6 +207,9 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai vector > > readBasesArrayVector; readBasesArrayVector.clear(); readBasesArrayVector.resize(numReads); +#ifdef DO_PROFILING + start_time = getCurrClk(); +#endif for(unsigned i=0;iGetArrayLength(likelihoodArray) == numTestCases); #endif +#ifdef DO_PROFILING + start_time = getCurrClk(); +#endif #pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse) for(tc_idx=0;tc_idx=0;--i)//note the order - reverse of GET { for(int j=readBasesArrayVector[i].size()-1;j>=0;--j) - RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RELEASE_MODE); + RELEASE_BYTE_ARRAY_ELEMENTS(readBasesArrayVector[i][j].first, readBasesArrayVector[i][j].second, JNI_RO_RELEASE_MODE); readBasesArrayVector[i].clear(); } readBasesArrayVector.clear(); - - //Now release haplotype arrays - for(int j=numHaplotypes-1;j>=0;--j) //note the order - reverse of GET - RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RELEASE_MODE); - haplotypeBasesArrayVector.clear(); +#ifdef DO_PROFILING + g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time); +#endif tc_array.clear(); #ifdef DO_PROFILING g_load_time_initializer.m_sumNumReads += numReads; @@ -411,6 +356,21 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai #endif } +//Release haplotypes at the end of a region +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniFinalizeRegion + (JNIEnv * env, jobject thisObject) +{ + vector >& haplotypeBasesArrayVector = g_haplotypeBasesArrayVector; + //Now release haplotype arrays + for(int j=haplotypeBasesArrayVector.size()-1;j>=0;--j) //note the order - reverse of GET + { + RELEASE_BYTE_ARRAY_ELEMENTS(haplotypeBasesArrayVector[j].first, haplotypeBasesArrayVector[j].second, JNI_RO_RELEASE_MODE); + env->DeleteGlobalRef(haplotypeBasesArrayVector[j].first); //free the global reference + } + haplotypeBasesArrayVector.clear(); +} + + JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose (JNIEnv* env, jobject thisObject) { diff --git a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h index b50f6a5a9..06ee99d7d 100644 --- a/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h +++ b/PairHMM_JNI/org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM.h @@ -21,6 +21,34 @@ extern "C" { #define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_matchToDeletion 4L #undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion #define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_deletionToDeletion 5L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug 0L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_verify +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_verify 0L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug0_1 +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug0_1 0L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug1 +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug1 0L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug2 +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug2 0L +#undef org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug3 +#define org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_debug3 0L +/* + * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM + * Method: jniGlobalInit + * Signature: (Ljava/lang/Class;Ljava/lang/Class;)V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit + (JNIEnv *, jobject, jclass, jclass); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM + * Method: jniClose + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose + (JNIEnv *, jobject); + /* * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM * Method: jniInitialize @@ -45,39 +73,38 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializePriorsAndUpdateCells (JNIEnv *, jobject, jboolean, jint, jint, jbyteArray, jbyteArray, jbyteArray, jint); - /* * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM * Method: jniSubComputeReadLikelihoodGivenHaplotypeLog10 * Signature: (II[B[B[B[B[B[BI)D */ JNIEXPORT jdouble JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniSubComputeReadLikelihoodGivenHaplotypeLog10 - (JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint); + (JNIEnv *, jobject, jint, jint, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jbyteArray, jint); /* * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM - * Method: jniGlobalInit - * Signature: (Ljava/lang/Class;Ljava/lang/Class;)V + * Method: jniInitializeHaplotypes + * Signature: (I[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;)V */ -JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniGlobalInit - (JNIEnv *, jobject, jclass, jclass); +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniInitializeHaplotypes + (JNIEnv *, jobject, jint, jobjectArray); + +/* + * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM + * Method: jniFinalizeRegion + * Signature: ()V + */ +JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniFinalizeRegion + (JNIEnv *, jobject); /* * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM * Method: jniComputeLikelihoods - * Signature: ([Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;[D)V + * Signature: (II[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIReadDataHolderClass;[Lorg/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM/JNIHaplotypeDataHolderClass;[DI)V */ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniComputeLikelihoods (JNIEnv *, jobject, jint, jint, jobjectArray, jobjectArray, jdoubleArray, jint); -/* - * Class: org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM - * Method: jniClose - * Signature: ()V - */ -JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPairHMM_jniClose - (JNIEnv *, jobject); - #ifdef __cplusplus } #endif diff --git a/PairHMM_JNI/pairhmm-1-base.cc b/PairHMM_JNI/pairhmm-1-base.cc index a2d5dfa88..1a2dc9a7c 100644 --- a/PairHMM_JNI/pairhmm-1-base.cc +++ b/PairHMM_JNI/pairhmm-1-base.cc @@ -4,26 +4,15 @@ #include "headers.h" #include "template.h" #include "utils.h" +#include "LoadTimeInitializer.h" using namespace std; -class LoadTimeInitializer -{ - public: - LoadTimeInitializer() //will be called when library is loaded - { - ConvertChar::init(); - } -}; -LoadTimeInitializer g_load_time_initializer; + #define BATCH_SIZE 10000 #define RUN_HYBRID -double getCurrClk() { - struct timeval tv ; - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; -} + int main(int argc, char** argv) { @@ -39,9 +28,6 @@ int main(int argc, char** argv) if(argc >= 4) chunk_size = strtol(argv[3],0,10); - - initialize_function_pointers(); - std::ifstream ifptr; FILE* fptr = 0; if(use_old_read_testcase) diff --git a/PairHMM_JNI/pairhmm-template-main.cc b/PairHMM_JNI/pairhmm-template-main.cc index 1c2ca585e..0ab53d378 100644 --- a/PairHMM_JNI/pairhmm-template-main.cc +++ b/PairHMM_JNI/pairhmm-template-main.cc @@ -10,14 +10,9 @@ #define BATCH_SIZE 10000 #define RUN_HYBRID +double getCurrClk(); int thread_level_parallelism_enabled = false ; -double getCurrClk() { - struct timeval tv ; - gettimeofday(&tv, NULL); - return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; -} - void print128b_F(__m128 x) { float *p = (float *)(&x); diff --git a/PairHMM_JNI/run.sh b/PairHMM_JNI/run.sh index de69ba5fc..eafb7ff79 100755 --- a/PairHMM_JNI/run.sh +++ b/PairHMM_JNI/run.sh @@ -1,6 +1,6 @@ #!/bin/bash -rm -f *.txt -export GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable +rm -f *.txt *.log +GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable #-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \ -R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \ diff --git a/PairHMM_JNI/utils.cc b/PairHMM_JNI/utils.cc index 2288377c4..5ed79e29d 100644 --- a/PairHMM_JNI/utils.cc +++ b/PairHMM_JNI/utils.cc @@ -240,3 +240,8 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat) return tokens.size(); } +double getCurrClk() { + struct timeval tv ; + gettimeofday(&tv, NULL); + return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; +} diff --git a/PairHMM_JNI/utils.h b/PairHMM_JNI/utils.h index 8da8269b8..3f7c2ff69 100644 --- a/PairHMM_JNI/utils.h +++ b/PairHMM_JNI/utils.h @@ -1,6 +1,7 @@ #ifndef PAIRHMM_UTIL_H #define PAIRHMM_UTIL_H +#include "template.h" template std::string to_string(T obj) @@ -25,4 +26,5 @@ void debug_dump(std::string filename, std::string s, bool to_append, bool add_ne template NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); void initialize_function_pointers(); +double getCurrClk(); #endif diff --git a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java index caa880b41..b91805a62 100644 --- a/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java +++ b/protected/java/src/org/broadinstitute/sting/gatk/walkers/haplotypecaller/PairHMMLikelihoodCalculationEngine.java @@ -335,6 +335,11 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation pairHMMThreadLocal.get().initialize(haplotypes, perSampleReadList, X_METRIC_LENGTH, Y_METRIC_LENGTH); } + private void finalizePairHMM() + { + pairHMMThreadLocal.get().finalizeRegion(); + } + @Override public Map computeReadLikelihoods( final AssemblyResultSet assemblyResultSet, final Map> perSampleReadList ) { @@ -352,6 +357,9 @@ public class PairHMMLikelihoodCalculationEngine implements LikelihoodCalculation map.filterPoorlyModelledReads(EXPECTED_ERROR_RATE_PER_BASE); stratifiedReadMap.put(sampleEntry.getKey(), map); } + + //Used mostly by the JNI implementation(s) to free arrays + finalizePairHMM(); return stratifiedReadMap; } diff --git a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java index ed628f064..f926e4bd6 100644 --- a/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java +++ b/protected/java/src/org/broadinstitute/sting/utils/pairhmm/JNILoglessPairHMM.java @@ -68,8 +68,8 @@ import java.util.HashMap; public class JNILoglessPairHMM extends LoglessPairHMM { private static final boolean debug = false; //simulates ifdef - private static final boolean verify = debug || false; //simulates ifdef - private static final boolean debug0_1 = false; //simulates ifdef + private static final boolean verify = debug || true; //simulates ifdef + private static final boolean debug0_1 = true; //simulates ifdef private static final boolean debug1 = false; //simulates ifdef private static final boolean debug2 = false; private static final boolean debug3 = false; @@ -148,10 +148,10 @@ public class JNILoglessPairHMM extends LoglessPairHMM { jniInitialize(readMaxLength, haplotypeMaxLength); } - private native void jniInitialize(final int numHaplotypes, final int numTotalReads, JNIHaplotypeDataHolderClass[] haplotypeDataArray, - JNIReadDataHolderClass[] readDataArray); - HashMap sampleToIndex = new HashMap(); - //Used to transfer data to JNI + private HashMap haplotypeToHaplotypeListIdxMap = new HashMap(); + private native void jniInitializeHaplotypes(final int numHaplotypes, JNIHaplotypeDataHolderClass[] haplotypeDataArray); + //Used to transfer data to JNI + //Since the haplotypes are the same for all calls to computeLikelihoods within a region, transfer the haplotypes only once to the JNI per region /** * {@inheritDoc} */ @@ -159,7 +159,6 @@ public class JNILoglessPairHMM extends LoglessPairHMM { public void initialize( final List haplotypes, final Map> perSampleReadList, final int readMaxLength, final int haplotypeMaxLength ) { if(verify) super.initialize(haplotypes, perSampleReadList, readMaxLength, haplotypeMaxLength); - /* int numHaplotypes = haplotypes.size(); JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes]; int idx = 0; @@ -167,32 +166,22 @@ public class JNILoglessPairHMM extends LoglessPairHMM { { haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass(); haplotypeDataArray[idx].haplotypeBases = currHaplotype.getBases(); + haplotypeToHaplotypeListIdxMap.put(currHaplotype, idx); ++idx; } - sampleToIndex.clear(); - int totalNumReads = 0; - for(final Map.Entry> currEntry : perSampleReadList) - { - sampleToIndex.put(currEntry.getKey(), totalNumReads); - totalNumReads += currEntry.getValue().size(); - } + jniInitializeHaplotypes(numHaplotypes, haplotypeDataArray); + } - JNIHaplotypeDataHolderClass[] readDataArray = new JNIReadDataHolderClass[totalNumReads]; - idx = 0; - for(final Map.Entry> currEntry : perSampleReadList) - { - for(GATKSAMRecord read : currEntry.getValue()) - { - readDataArray[idx] = new JNIReadDataHolderClass(); - readDataArray[idx].readBases = read.getReadBases(); - readDataArray[idx].readQuals = read.getBaseQualities(); - readDataArray[idx].insertionGOP = read.getBaseInsertionQualities(); - readDataArray[idx].deletionGOP = read.getBaseDeletionQualities(); - readDataArray[idx].overallGCP = GCPArrayMap.get(read); - ++idx; - } - } - jniInitialize(numHaplotypes, numTotalReads, haplotypeDataArray, readDataArray);*/ + private native void jniFinalizeRegion(); + //Tell JNI to release arrays - really important if native code is directly accessing Java memory, if not + //accessing Java memory directly, still important to release memory from C++ + /** + * {@inheritDoc} + */ + @Override + public void finalizeRegion() + { + jniFinalizeRegion(); } //Real compute kernel @@ -216,10 +205,10 @@ public class JNILoglessPairHMM extends LoglessPairHMM { throw new IllegalStateException("Must call initialize before calling jniComputeLikelihoods in debug/verify mode"); } int readListSize = reads.size(); - int alleleHaplotypeMapSize = alleleHaplotypeMap.size(); - int numTestcases = readListSize*alleleHaplotypeMapSize; + int numHaplotypes = alleleHaplotypeMap.size(); + int numTestcases = readListSize*numHaplotypes; if(debug0_1) - System.out.println("Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize); + System.out.println("Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); JNIReadDataHolderClass[] readDataArray = new JNIReadDataHolderClass[readListSize]; int idx = 0; for(GATKSAMRecord read : reads) @@ -246,39 +235,68 @@ public class JNILoglessPairHMM extends LoglessPairHMM { } ++idx; } - JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[alleleHaplotypeMapSize]; - idx = 0; - for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always + + JNIHaplotypeDataHolderClass[] haplotypeDataArray = new JNIHaplotypeDataHolderClass[numHaplotypes]; + if(verify) { - haplotypeDataArray[idx] = new JNIHaplotypeDataHolderClass(); - haplotypeDataArray[idx].haplotypeBases = currEntry.getValue().getBases(); - if(debug0_1) - System.out.println("Java haplotype length "+haplotypeDataArray[idx].haplotypeBases.length); - if(debug3) + idx = 0; + for (Map.Entry currEntry : alleleHaplotypeMap.entrySet()) //order is important - access in same order always { - for(int i=0;i currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always { - likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[idx]); + //Since the order of haplotypes in the List and alleleHaplotypeMap is different, + //get idx of current haplotype in the list and use this idx to get the right likelihoodValue + idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); + likelihoodMap.add(read, currEntry.getKey(), likelihoodArray[readIdx + idxInsideHaplotypeList]); ++idx; } + readIdx += numHaplotypes; + } if(verify) { + //re-order values in likelihoodArray + double[] tmpArray = new double[numHaplotypes]; + idx = 0; + idxInsideHaplotypeList = 0; + readIdx = 0; + for(GATKSAMRecord read : reads) + { + for(int j=0;j currEntry : alleleHaplotypeMap.entrySet())//order is important - access in same order always + { + idxInsideHaplotypeList = haplotypeToHaplotypeListIdxMap.get(currEntry.getValue()); + likelihoodArray[idx] = tmpArray[idxInsideHaplotypeList]; + ++idx; + } + readIdx += numHaplotypes; + } //to compare values likelihoodMap = super.computeLikelihoods(reads, alleleHaplotypeMap, GCPArrayMap); //for floating point values, no exact equality @@ -304,23 +322,23 @@ public class JNILoglessPairHMM extends LoglessPairHMM { if(toDump) { idx = 0; - System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+alleleHaplotypeMapSize); + System.out.println("Dump : Java numReads "+readListSize+" numHaplotypes "+numHaplotypes); for(int i=0;i