1. Converted q,i,d,c in C++ from int* to char*

2. Use clock_gettime to measure performance
3. Disabled OpenMP
4. Moved LoadTimeInitializer to different file
This commit is contained in:
Karthik Gururaj 2014-01-22 22:57:32 -08:00
parent 733a84e4f9
commit 936e9e175e
10 changed files with 66 additions and 48 deletions

View File

@ -27,8 +27,8 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
void LoadTimeInitializer::print_profiling()
{
double mean_val;
cout << "Compute time "<<m_compute_time<<"\n";
cout << "Data initialization time "<<m_data_transfer_time<<"\n";
cout << "Compute time "<<m_compute_time*1e-9<<"\n";
cout << "Data initialization time "<<m_data_transfer_time*1e-9<<"\n";
cout <<"Invocations : "<<m_num_invocations<<"\n";
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
mean_val = m_sumNumReads/m_num_invocations;

View File

@ -26,8 +26,8 @@ class LoadTimeInitializer
unsigned m_maxNumTestcases;
unsigned m_num_invocations;
//timing
double m_compute_time;
double m_data_transfer_time;
uint64_t m_compute_time;
uint64_t m_data_transfer_time;
private:
std::map<std::string, std::ofstream*> m_filename_to_fptr;
};

View File

@ -1,5 +1,5 @@
OMPCFLAGS=-fopenmp
OMPLFLAGS=-fopenmp #-openmp-link static
#OMPCFLAGS=-fopenmp
#OMPLFLAGS=-fopenmp #-openmp-link static
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
@ -11,7 +11,7 @@ COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCF
CC=icc
CXX=icc
LDFLAGS=-lm $(OMPLDFLAGS)
LDFLAGS=-lm -lrt $(OMPLDFLAGS)
BIN=libJNILoglessPairHMM.so pairhmm-template-main checker
#BIN=checker
@ -54,7 +54,7 @@ pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
libJNILoglessPairHMM.so: $(LIBOBJECTS)
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS)
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) ${LDFLAGS}
$(OBJECTS): %.o: %.cc

View File

@ -208,7 +208,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
readBasesArrayVector.clear();
readBasesArrayVector.resize(numReads);
#ifdef DO_PROFILING
start_time = getCurrClk();
start_time = get_time();
#endif
for(unsigned i=0;i<numReads;++i)
{
@ -266,30 +266,26 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second;
tc_array[tc_idx].rslen = (int)readLength;
tc_array[tc_idx].haplen = (int)haplotypeLength;
tc_array[tc_idx].rs = (char*)readBasesArray;
tc_array[tc_idx].hap = (char*)haplotypeBasesArray;
//Can be avoided
for(unsigned k=0;k<readLength;++k)
{
tc_array[tc_idx].q[k] = (int)readQualsArray[k];
tc_array[tc_idx].i[k] = (int)insertionGOPArray[k];
tc_array[tc_idx].d[k] = (int)deletionGOPArray[k];
tc_array[tc_idx].c[k] = (int)overallGCPArray[k];
}
tc_array[tc_idx].rs = (char*)readBasesArray;
tc_array[tc_idx].q = (char*)readQualsArray;
tc_array[tc_idx].i = (char*)insertionGOPArray;
tc_array[tc_idx].d = (char*)deletionGOPArray;
tc_array[tc_idx].c = (char*)overallGCPArray;
++tc_idx;
}
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE); //order of GET-RELEASE is important
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
//Release readBases at end because it is used by compute_full_prob
//Release read arrays at end because they are used by compute_full_prob
//Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
readBasesArrayVector[i].clear();
readBasesArrayVector[i].resize(1);
readBasesArrayVector[i].resize(5);
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
readBasesArrayVector[i][1] = make_pair(readQuals, readQualsArray);
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
}
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
g_load_time_initializer.m_data_transfer_time += get_time();
#endif
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
@ -298,7 +294,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
#endif
#ifdef DO_PROFILING
start_time = getCurrClk();
start_time = get_time();
#endif
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
@ -315,7 +311,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
likelihoodDoubleArray[tc_idx] = result;
}
#ifdef DO_PROFILING
g_load_time_initializer.m_compute_time += (getCurrClk()-start_time);
g_load_time_initializer.m_compute_time += get_time();
#endif
#ifdef DEBUG
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
@ -324,7 +320,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
}
#endif
#ifdef DO_PROFILING
start_time = getCurrClk();
start_time = get_time();
#endif
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory
@ -337,7 +333,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
}
readBasesArrayVector.clear();
#ifdef DO_PROFILING
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
g_load_time_initializer.m_data_transfer_time += get_time();
#endif
tc_array.clear();
#ifdef DO_PROFILING

View File

@ -44,7 +44,7 @@ int main(int argc, char** argv)
vector<testcase> tc_vector;
tc_vector.clear();
testcase tc;
double total_time = 0;
uint64_t total_time = 0;
while(1)
{
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
@ -55,7 +55,7 @@ int main(int argc, char** argv)
vector<double> results_vec;
results_vec.clear();
results_vec.resize(tc_vector.size());
double start_time = getCurrClk();
get_time();
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
for(unsigned i=0;i<tc_vector.size();++i)
{
@ -71,7 +71,8 @@ int main(int argc, char** argv)
results_vec[i] = result;
}
total_time += (getCurrClk()-start_time);
total_time += get_time();
#pragma omp parallel for schedule(dynamic,chunk_size)
for(unsigned i=0;i<tc_vector.size();++i)
{
testcase& tc = tc_vector[i];
@ -83,6 +84,10 @@ int main(int argc, char** argv)
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
delete tc_vector[i].rs;
delete tc_vector[i].hap;
delete tc_vector[i].q;
delete tc_vector[i].i;
delete tc_vector[i].d;
delete tc_vector[i].c;
}
results_vec.clear();
tc_vector.clear();
@ -90,7 +95,7 @@ int main(int argc, char** argv)
if(break_value < 0)
break;
}
cout << "Total time "<< total_time << "\n";
cout << "Total time "<< ((double)total_time)/1e9 << "\n";
if(use_old_read_testcase)
fclose(fptr);
else

View File

@ -3,15 +3,15 @@ rm -f *.txt *.log
GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
-I /data/simulated/sim1M_pairs_final.bam \
-R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
-stand_call_conf 50.0 \
-stand_emit_conf 10.0 \
--pair_hmm_implementation JNI_LOGLESS_CACHING \
-XL unmapped \
-o output.raw.snps.indels.vcf
#-XL unmapped \
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
#-I /data/simulated/sim1M_pairs_final.bam \
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \

View File

@ -150,7 +150,8 @@ typedef struct
{
int rslen, haplen;
/*int *q, *i, *d, *c;*/
int q[MROWS], i[MROWS], d[MROWS], c[MROWS];
/*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
char *q, *i, *d, *c;
char *hap, *rs;
int *ihap;
int *irs;

View File

@ -90,10 +90,10 @@ int read_testcase(testcase *tc, FILE* ifp)
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
//tc->q = (int *) malloc(sizeof(int) * tc->rslen);
//tc->i = (int *) malloc(sizeof(int) * tc->rslen);
//tc->d = (int *) malloc(sizeof(int) * tc->rslen);
//tc->c = (int *) malloc(sizeof(int) * tc->rslen);
tc->q = (char *) malloc(sizeof(char) * tc->rslen);
tc->i = (char *) malloc(sizeof(char) * tc->rslen);
tc->d = (char *) malloc(sizeof(char) * tc->rslen);
tc->c = (char *) malloc(sizeof(char) * tc->rslen);
for (x = 0; x < tc->rslen; x++)
{
@ -199,18 +199,22 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
tc->rs = new char[tokens[1].size()+2];
tc->rslen = tokens[1].size();
tc->q = new char[tc->rslen];
tc->i = new char[tc->rslen];
tc->d = new char[tc->rslen];
tc->c = new char[tc->rslen];
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
assert(tokens.size() == 2 + 4*(tc->rslen));
assert(tc->rslen < MROWS);
for(unsigned j=0;j<tc->rslen;++j)
tc->q[j] = convToInt(tokens[2+0*tc->rslen+j]);
tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j)
tc->i[j] = convToInt(tokens[2+1*tc->rslen+j]);
tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j)
tc->d[j] = convToInt(tokens[2+2*tc->rslen+j]);
tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
for(unsigned j=0;j<tc->rslen;++j)
tc->c[j] = convToInt(tokens[2+3*tc->rslen+j]);
tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
if(reformat)
{
@ -245,3 +249,14 @@ double getCurrClk() {
gettimeofday(&tv, NULL);
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
}
uint64_t get_time(struct timespec* store_struct)
{
static struct timespec start_time;
struct timespec curr_time;
struct timespec* ptr = (store_struct == 0) ? &curr_time : store_struct;
clock_gettime(CLOCK_REALTIME, ptr);
uint64_t diff_time = (ptr->tv_sec-start_time.tv_sec)*1000000000+(ptr->tv_nsec-start_time.tv_nsec);
start_time = *ptr;
return diff_time;
}

View File

@ -27,4 +27,5 @@ template<class NUMBER>
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
void initialize_function_pointers();
double getCurrClk();
uint64_t get_time(struct timespec* x=0);
#endif

View File

@ -68,8 +68,8 @@ import java.util.HashMap;
public class JNILoglessPairHMM extends LoglessPairHMM {
private static final boolean debug = false; //simulates ifdef
private static final boolean verify = debug || true; //simulates ifdef
private static final boolean debug0_1 = true; //simulates ifdef
private static final boolean verify = debug || false; //simulates ifdef
private static final boolean debug0_1 = false; //simulates ifdef
private static final boolean debug1 = false; //simulates ifdef
private static final boolean debug2 = false;
private static final boolean debug3 = false;