1. Converted q,i,d,c in C++ from int* to char*

2. Use clock_gettime to measure performance
3. Disabled OpenMP
4. Moved LoadTimeInitializer to different file
This commit is contained in:
Karthik Gururaj 2014-01-22 22:57:32 -08:00
parent 733a84e4f9
commit 936e9e175e
10 changed files with 66 additions and 48 deletions

View File

@ -27,8 +27,8 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
void LoadTimeInitializer::print_profiling() void LoadTimeInitializer::print_profiling()
{ {
double mean_val; double mean_val;
cout << "Compute time "<<m_compute_time<<"\n"; cout << "Compute time "<<m_compute_time*1e-9<<"\n";
cout << "Data initialization time "<<m_data_transfer_time<<"\n"; cout << "Data initialization time "<<m_data_transfer_time*1e-9<<"\n";
cout <<"Invocations : "<<m_num_invocations<<"\n"; cout <<"Invocations : "<<m_num_invocations<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n"; cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
mean_val = m_sumNumReads/m_num_invocations; mean_val = m_sumNumReads/m_num_invocations;

View File

@ -26,8 +26,8 @@ class LoadTimeInitializer
unsigned m_maxNumTestcases; unsigned m_maxNumTestcases;
unsigned m_num_invocations; unsigned m_num_invocations;
//timing //timing
double m_compute_time; uint64_t m_compute_time;
double m_data_transfer_time; uint64_t m_data_transfer_time;
private: private:
std::map<std::string, std::ofstream*> m_filename_to_fptr; std::map<std::string, std::ofstream*> m_filename_to_fptr;
}; };

View File

@ -1,5 +1,5 @@
OMPCFLAGS=-fopenmp #OMPCFLAGS=-fopenmp
OMPLFLAGS=-fopenmp #-openmp-link static #OMPLFLAGS=-fopenmp #-openmp-link static
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas #CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas #CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
@ -11,7 +11,7 @@ COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCF
CC=icc CC=icc
CXX=icc CXX=icc
LDFLAGS=-lm $(OMPLDFLAGS) LDFLAGS=-lm -lrt $(OMPLDFLAGS)
BIN=libJNILoglessPairHMM.so pairhmm-template-main checker BIN=libJNILoglessPairHMM.so pairhmm-template-main checker
#BIN=checker #BIN=checker
@ -54,7 +54,7 @@ pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS) $(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
libJNILoglessPairHMM.so: $(LIBOBJECTS) libJNILoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) $(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) ${LDFLAGS}
$(OBJECTS): %.o: %.cc $(OBJECTS): %.o: %.cc

View File

@ -208,7 +208,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
readBasesArrayVector.clear(); readBasesArrayVector.clear();
readBasesArrayVector.resize(numReads); readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING #ifdef DO_PROFILING
start_time = getCurrClk(); start_time = get_time();
#endif #endif
for(unsigned i=0;i<numReads;++i) for(unsigned i=0;i<numReads;++i)
{ {
@ -266,30 +266,26 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second; jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second;
tc_array[tc_idx].rslen = (int)readLength; tc_array[tc_idx].rslen = (int)readLength;
tc_array[tc_idx].haplen = (int)haplotypeLength; tc_array[tc_idx].haplen = (int)haplotypeLength;
tc_array[tc_idx].rs = (char*)readBasesArray;
tc_array[tc_idx].hap = (char*)haplotypeBasesArray; tc_array[tc_idx].hap = (char*)haplotypeBasesArray;
//Can be avoided tc_array[tc_idx].rs = (char*)readBasesArray;
for(unsigned k=0;k<readLength;++k) tc_array[tc_idx].q = (char*)readQualsArray;
{ tc_array[tc_idx].i = (char*)insertionGOPArray;
tc_array[tc_idx].q[k] = (int)readQualsArray[k]; tc_array[tc_idx].d = (char*)deletionGOPArray;
tc_array[tc_idx].i[k] = (int)insertionGOPArray[k]; tc_array[tc_idx].c = (char*)overallGCPArray;
tc_array[tc_idx].d[k] = (int)deletionGOPArray[k];
tc_array[tc_idx].c[k] = (int)overallGCPArray[k];
}
++tc_idx; ++tc_idx;
} }
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE); //order of GET-RELEASE is important //Release read arrays at end because they are used by compute_full_prob
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE); //Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
//Release readBases at end because it is used by compute_full_prob
readBasesArrayVector[i].clear(); readBasesArrayVector[i].clear();
readBasesArrayVector[i].resize(1); readBasesArrayVector[i].resize(5);
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray); readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
readBasesArrayVector[i][1] = make_pair(readQuals, readQualsArray);
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
} }
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time); g_load_time_initializer.m_data_transfer_time += get_time();
#endif #endif
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy); jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
@ -298,7 +294,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
assert(env->GetArrayLength(likelihoodArray) == numTestCases); assert(env->GetArrayLength(likelihoodArray) == numTestCases);
#endif #endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
start_time = getCurrClk(); start_time = get_time();
#endif #endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse) #pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx) for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
@ -315,7 +311,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
likelihoodDoubleArray[tc_idx] = result; likelihoodDoubleArray[tc_idx] = result;
} }
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_compute_time += (getCurrClk()-start_time); g_load_time_initializer.m_compute_time += get_time();
#endif #endif
#ifdef DEBUG #ifdef DEBUG
for(tc_idx=0;tc_idx<numTestCases;++tc_idx) for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
@ -324,7 +320,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
} }
#endif #endif
#ifdef DO_PROFILING #ifdef DO_PROFILING
start_time = getCurrClk(); start_time = get_time();
#endif #endif
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory
@ -337,7 +333,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
} }
readBasesArrayVector.clear(); readBasesArrayVector.clear();
#ifdef DO_PROFILING #ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time); g_load_time_initializer.m_data_transfer_time += get_time();
#endif #endif
tc_array.clear(); tc_array.clear();
#ifdef DO_PROFILING #ifdef DO_PROFILING

View File

@ -44,7 +44,7 @@ int main(int argc, char** argv)
vector<testcase> tc_vector; vector<testcase> tc_vector;
tc_vector.clear(); tc_vector.clear();
testcase tc; testcase tc;
double total_time = 0; uint64_t total_time = 0;
while(1) while(1)
{ {
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true); int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
@ -55,7 +55,7 @@ int main(int argc, char** argv)
vector<double> results_vec; vector<double> results_vec;
results_vec.clear(); results_vec.clear();
results_vec.resize(tc_vector.size()); results_vec.resize(tc_vector.size());
double start_time = getCurrClk(); get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12) #pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<tc_vector.size();++i)
{ {
@ -71,7 +71,8 @@ int main(int argc, char** argv)
results_vec[i] = result; results_vec[i] = result;
} }
total_time += (getCurrClk()-start_time); total_time += get_time();
#pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i) for(unsigned i=0;i<tc_vector.size();++i)
{ {
testcase& tc = tc_vector[i]; testcase& tc = tc_vector[i];
@ -83,6 +84,10 @@ int main(int argc, char** argv)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n"; cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
delete tc_vector[i].rs; delete tc_vector[i].rs;
delete tc_vector[i].hap; delete tc_vector[i].hap;
delete tc_vector[i].q;
delete tc_vector[i].i;
delete tc_vector[i].d;
delete tc_vector[i].c;
} }
results_vec.clear(); results_vec.clear();
tc_vector.clear(); tc_vector.clear();
@ -90,7 +95,7 @@ int main(int argc, char** argv)
if(break_value < 0) if(break_value < 0)
break; break;
} }
cout << "Total time "<< total_time << "\n"; cout << "Total time "<< ((double)total_time)/1e9 << "\n";
if(use_old_read_testcase) if(use_old_read_testcase)
fclose(fptr); fclose(fptr);
else else

View File

@ -3,15 +3,15 @@ rm -f *.txt *.log
GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed #-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \ java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \ -R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \
-I /data/simulated/sim1M_pairs_final.bam \ -I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \ --dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
-stand_call_conf 50.0 \ -stand_call_conf 50.0 \
-stand_emit_conf 10.0 \ -stand_emit_conf 10.0 \
--pair_hmm_implementation JNI_LOGLESS_CACHING \ --pair_hmm_implementation JNI_LOGLESS_CACHING \
-XL unmapped \
-o output.raw.snps.indels.vcf -o output.raw.snps.indels.vcf
#-XL unmapped \
#--pair_hmm_implementation JNI_LOGLESS_CACHING \ #--pair_hmm_implementation JNI_LOGLESS_CACHING \
#-I /data/simulated/sim1M_pairs_final.bam \ #-I /data/simulated/sim1M_pairs_final.bam \
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \ #-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \

View File

@ -150,7 +150,8 @@ typedef struct
{ {
int rslen, haplen; int rslen, haplen;
/*int *q, *i, *d, *c;*/ /*int *q, *i, *d, *c;*/
int q[MROWS], i[MROWS], d[MROWS], c[MROWS]; /*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
char *q, *i, *d, *c;
char *hap, *rs; char *hap, *rs;
int *ihap; int *ihap;
int *irs; int *irs;

View File

@ -90,10 +90,10 @@ int read_testcase(testcase *tc, FILE* ifp)
tc->ihap = (int *) malloc(tc->haplen*sizeof(int)); tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
tc->irs = (int *) malloc(tc->rslen*sizeof(int)); tc->irs = (int *) malloc(tc->rslen*sizeof(int));
//tc->q = (int *) malloc(sizeof(int) * tc->rslen); tc->q = (char *) malloc(sizeof(char) * tc->rslen);
//tc->i = (int *) malloc(sizeof(int) * tc->rslen); tc->i = (char *) malloc(sizeof(char) * tc->rslen);
//tc->d = (int *) malloc(sizeof(int) * tc->rslen); tc->d = (char *) malloc(sizeof(char) * tc->rslen);
//tc->c = (int *) malloc(sizeof(int) * tc->rslen); tc->c = (char *) malloc(sizeof(char) * tc->rslen);
for (x = 0; x < tc->rslen; x++) for (x = 0; x < tc->rslen; x++)
{ {
@ -199,18 +199,22 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size()); memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
tc->rs = new char[tokens[1].size()+2]; tc->rs = new char[tokens[1].size()+2];
tc->rslen = tokens[1].size(); tc->rslen = tokens[1].size();
tc->q = new char[tc->rslen];
tc->i = new char[tc->rslen];
tc->d = new char[tc->rslen];
tc->c = new char[tc->rslen];
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n"; //cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size()); memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
assert(tokens.size() == 2 + 4*(tc->rslen)); assert(tokens.size() == 2 + 4*(tc->rslen));
assert(tc->rslen < MROWS); assert(tc->rslen < MROWS);
for(unsigned j=0;j<tc->rslen;++j) for(unsigned j=0;j<tc->rslen;++j)
tc->q[j] = convToInt(tokens[2+0*tc->rslen+j]); tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j) for(unsigned j=0;j<tc->rslen;++j)
tc->i[j] = convToInt(tokens[2+1*tc->rslen+j]); tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j) for(unsigned j=0;j<tc->rslen;++j)
tc->d[j] = convToInt(tokens[2+2*tc->rslen+j]); tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j) for(unsigned j=0;j<tc->rslen;++j)
tc->c[j] = convToInt(tokens[2+3*tc->rslen+j]); tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
if(reformat) if(reformat)
{ {
@ -245,3 +249,14 @@ double getCurrClk() {
gettimeofday(&tv, NULL); gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0; return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
} }
uint64_t get_time(struct timespec* store_struct)
{
static struct timespec start_time;
struct timespec curr_time;
struct timespec* ptr = (store_struct == 0) ? &curr_time : store_struct;
clock_gettime(CLOCK_REALTIME, ptr);
uint64_t diff_time = (ptr->tv_sec-start_time.tv_sec)*1000000000+(ptr->tv_nsec-start_time.tv_nsec);
start_time = *ptr;
return diff_time;
}

View File

@ -27,4 +27,5 @@ template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0); NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
void initialize_function_pointers(); void initialize_function_pointers();
double getCurrClk(); double getCurrClk();
uint64_t get_time(struct timespec* x=0);
#endif #endif

View File

@ -68,8 +68,8 @@ import java.util.HashMap;
public class JNILoglessPairHMM extends LoglessPairHMM { public class JNILoglessPairHMM extends LoglessPairHMM {
private static final boolean debug = false; //simulates ifdef private static final boolean debug = false; //simulates ifdef
private static final boolean verify = debug || true; //simulates ifdef private static final boolean verify = debug || false; //simulates ifdef
private static final boolean debug0_1 = true; //simulates ifdef private static final boolean debug0_1 = false; //simulates ifdef
private static final boolean debug1 = false; //simulates ifdef private static final boolean debug1 = false; //simulates ifdef
private static final boolean debug2 = false; private static final boolean debug2 = false;
private static final boolean debug3 = false; private static final boolean debug3 = false;