1. Converted q,i,d,c in C++ from int* to char*
2. Use clock_gettime to measure performance 3. Disabled OpenMP 4. Moved LoadTimeInitializer to different file
This commit is contained in:
parent
733a84e4f9
commit
936e9e175e
|
|
@ -27,8 +27,8 @@ LoadTimeInitializer::LoadTimeInitializer() //will be called when library is loa
|
|||
void LoadTimeInitializer::print_profiling()
|
||||
{
|
||||
double mean_val;
|
||||
cout << "Compute time "<<m_compute_time<<"\n";
|
||||
cout << "Data initialization time "<<m_data_transfer_time<<"\n";
|
||||
cout << "Compute time "<<m_compute_time*1e-9<<"\n";
|
||||
cout << "Data initialization time "<<m_data_transfer_time*1e-9<<"\n";
|
||||
cout <<"Invocations : "<<m_num_invocations<<"\n";
|
||||
cout << "term\tsum\tsumSq\tmean\tvar\tmax\n";
|
||||
mean_val = m_sumNumReads/m_num_invocations;
|
||||
|
|
|
|||
|
|
@ -26,8 +26,8 @@ class LoadTimeInitializer
|
|||
unsigned m_maxNumTestcases;
|
||||
unsigned m_num_invocations;
|
||||
//timing
|
||||
double m_compute_time;
|
||||
double m_data_transfer_time;
|
||||
uint64_t m_compute_time;
|
||||
uint64_t m_data_transfer_time;
|
||||
private:
|
||||
std::map<std::string, std::ofstream*> m_filename_to_fptr;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
OMPCFLAGS=-fopenmp
|
||||
OMPLFLAGS=-fopenmp #-openmp-link static
|
||||
#OMPCFLAGS=-fopenmp
|
||||
#OMPLFLAGS=-fopenmp #-openmp-link static
|
||||
|
||||
#CFLAGS=-O2 -std=c++11 -W -Wall -march=corei7-avx -Wa,-q -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
#CFLAGS=-O2 -W -Wall -march=corei7 -mfpmath=sse -msse4.2 -pedantic $(OMPCFLAGS) -Wno-unknown-pragmas
|
||||
|
|
@ -11,7 +11,7 @@ COMMON_COMPILATION_FLAGS=$(JNI_COMPILATION_FLAGS) -O3 -W -Wall -pedantic $(OMPCF
|
|||
CC=icc
|
||||
CXX=icc
|
||||
|
||||
LDFLAGS=-lm $(OMPLDFLAGS)
|
||||
LDFLAGS=-lm -lrt $(OMPLDFLAGS)
|
||||
|
||||
BIN=libJNILoglessPairHMM.so pairhmm-template-main checker
|
||||
#BIN=checker
|
||||
|
|
@ -54,7 +54,7 @@ pairhmm-template-main: pairhmm-template-main.o $(COMMON_OBJECTS)
|
|||
$(CXX) $(OMPLFLAGS) -o $@ $^ $(LDFLAGS)
|
||||
|
||||
libJNILoglessPairHMM.so: $(LIBOBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS)
|
||||
$(CXX) $(OMPLFLAGS) -shared -o $@ $(LIBOBJECTS) ${LDFLAGS}
|
||||
|
||||
|
||||
$(OBJECTS): %.o: %.cc
|
||||
|
|
|
|||
|
|
@ -208,7 +208,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
readBasesArrayVector.clear();
|
||||
readBasesArrayVector.resize(numReads);
|
||||
#ifdef DO_PROFILING
|
||||
start_time = getCurrClk();
|
||||
start_time = get_time();
|
||||
#endif
|
||||
for(unsigned i=0;i<numReads;++i)
|
||||
{
|
||||
|
|
@ -266,30 +266,26 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
jbyte* haplotypeBasesArray = haplotypeBasesArrayVector[j].second;
|
||||
tc_array[tc_idx].rslen = (int)readLength;
|
||||
tc_array[tc_idx].haplen = (int)haplotypeLength;
|
||||
tc_array[tc_idx].rs = (char*)readBasesArray;
|
||||
tc_array[tc_idx].hap = (char*)haplotypeBasesArray;
|
||||
//Can be avoided
|
||||
for(unsigned k=0;k<readLength;++k)
|
||||
{
|
||||
tc_array[tc_idx].q[k] = (int)readQualsArray[k];
|
||||
tc_array[tc_idx].i[k] = (int)insertionGOPArray[k];
|
||||
tc_array[tc_idx].d[k] = (int)deletionGOPArray[k];
|
||||
tc_array[tc_idx].c[k] = (int)overallGCPArray[k];
|
||||
}
|
||||
tc_array[tc_idx].rs = (char*)readBasesArray;
|
||||
tc_array[tc_idx].q = (char*)readQualsArray;
|
||||
tc_array[tc_idx].i = (char*)insertionGOPArray;
|
||||
tc_array[tc_idx].d = (char*)deletionGOPArray;
|
||||
tc_array[tc_idx].c = (char*)overallGCPArray;
|
||||
++tc_idx;
|
||||
}
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(overallGCP, overallGCPArray, JNI_RO_RELEASE_MODE); //order of GET-RELEASE is important
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(deletionGOP, deletionGOPArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(insertionGOP, insertionGOPArray, JNI_RO_RELEASE_MODE);
|
||||
RELEASE_BYTE_ARRAY_ELEMENTS(readQuals, readQualsArray, JNI_RO_RELEASE_MODE);
|
||||
|
||||
//Release readBases at end because it is used by compute_full_prob
|
||||
//Release read arrays at end because they are used by compute_full_prob
|
||||
//Maintain order in which GET_BYTE_ARRAY_ELEMENTS called
|
||||
readBasesArrayVector[i].clear();
|
||||
readBasesArrayVector[i].resize(1);
|
||||
readBasesArrayVector[i].resize(5);
|
||||
readBasesArrayVector[i][0] = make_pair(readBases, readBasesArray);
|
||||
readBasesArrayVector[i][1] = make_pair(readQuals, readQualsArray);
|
||||
readBasesArrayVector[i][2] = make_pair(insertionGOP, insertionGOPArray);
|
||||
readBasesArrayVector[i][3] = make_pair(deletionGOP, deletionGOPArray);
|
||||
readBasesArrayVector[i][4] = make_pair(overallGCP, overallGCPArray);
|
||||
}
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
|
||||
g_load_time_initializer.m_data_transfer_time += get_time();
|
||||
#endif
|
||||
|
||||
jdouble* likelihoodDoubleArray = (jdouble*)GET_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, &is_copy);
|
||||
|
|
@ -298,7 +294,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
assert(env->GetArrayLength(likelihoodArray) == numTestCases);
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
start_time = getCurrClk();
|
||||
start_time = get_time();
|
||||
#endif
|
||||
#pragma omp parallel for schedule (dynamic,10) private(tc_idx) num_threads(maxNumThreadsToUse)
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
|
|
@ -315,7 +311,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
likelihoodDoubleArray[tc_idx] = result;
|
||||
}
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_compute_time += (getCurrClk()-start_time);
|
||||
g_load_time_initializer.m_compute_time += get_time();
|
||||
#endif
|
||||
#ifdef DEBUG
|
||||
for(tc_idx=0;tc_idx<numTestCases;++tc_idx)
|
||||
|
|
@ -324,7 +320,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
}
|
||||
#endif
|
||||
#ifdef DO_PROFILING
|
||||
start_time = getCurrClk();
|
||||
start_time = get_time();
|
||||
#endif
|
||||
RELEASE_DOUBLE_ARRAY_ELEMENTS(likelihoodArray, likelihoodDoubleArray, 0); //release mode 0, copy back results to Java memory
|
||||
|
||||
|
|
@ -337,7 +333,7 @@ JNIEXPORT void JNICALL Java_org_broadinstitute_sting_utils_pairhmm_JNILoglessPai
|
|||
}
|
||||
readBasesArrayVector.clear();
|
||||
#ifdef DO_PROFILING
|
||||
g_load_time_initializer.m_data_transfer_time += (getCurrClk()-start_time);
|
||||
g_load_time_initializer.m_data_transfer_time += get_time();
|
||||
#endif
|
||||
tc_array.clear();
|
||||
#ifdef DO_PROFILING
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ int main(int argc, char** argv)
|
|||
vector<testcase> tc_vector;
|
||||
tc_vector.clear();
|
||||
testcase tc;
|
||||
double total_time = 0;
|
||||
uint64_t total_time = 0;
|
||||
while(1)
|
||||
{
|
||||
int break_value = use_old_read_testcase ? read_testcase(&tc, fptr) : read_mod_testcase(ifptr,&tc,true);
|
||||
|
|
@ -55,7 +55,7 @@ int main(int argc, char** argv)
|
|||
vector<double> results_vec;
|
||||
results_vec.clear();
|
||||
results_vec.resize(tc_vector.size());
|
||||
double start_time = getCurrClk();
|
||||
get_time();
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size) num_threads(12)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
|
|
@ -71,7 +71,8 @@ int main(int argc, char** argv)
|
|||
|
||||
results_vec[i] = result;
|
||||
}
|
||||
total_time += (getCurrClk()-start_time);
|
||||
total_time += get_time();
|
||||
#pragma omp parallel for schedule(dynamic,chunk_size)
|
||||
for(unsigned i=0;i<tc_vector.size();++i)
|
||||
{
|
||||
testcase& tc = tc_vector[i];
|
||||
|
|
@ -83,6 +84,10 @@ int main(int argc, char** argv)
|
|||
cout << std::scientific << baseline_result << " "<<results_vec[i]<<"\n";
|
||||
delete tc_vector[i].rs;
|
||||
delete tc_vector[i].hap;
|
||||
delete tc_vector[i].q;
|
||||
delete tc_vector[i].i;
|
||||
delete tc_vector[i].d;
|
||||
delete tc_vector[i].c;
|
||||
}
|
||||
results_vec.clear();
|
||||
tc_vector.clear();
|
||||
|
|
@ -90,7 +95,7 @@ int main(int argc, char** argv)
|
|||
if(break_value < 0)
|
||||
break;
|
||||
}
|
||||
cout << "Total time "<< total_time << "\n";
|
||||
cout << "Total time "<< ((double)total_time)/1e9 << "\n";
|
||||
if(use_old_read_testcase)
|
||||
fclose(fptr);
|
||||
else
|
||||
|
|
|
|||
|
|
@ -3,15 +3,15 @@ rm -f *.txt *.log
|
|||
GSA_ROOT_DIR=/home/karthikg/broad/gsa-unstable
|
||||
#-Djava.library.path is needed if you are using JNI_LOGLESS_CACHING, else not needed
|
||||
java -Djava.library.path=${GSA_ROOT_DIR}/PairHMM_JNI -jar $GSA_ROOT_DIR/dist/GenomeAnalysisTK.jar -T HaplotypeCaller \
|
||||
-R /opt/Genomics/ohsu/dnapipeline/humanrefgenome/human_g1k_v37.fasta \
|
||||
-I /data/simulated/sim1M_pairs_final.bam \
|
||||
-R /data/broad/samples/joint_variant_calling/broad_reference/human_g1k_v37_decoy.fasta \
|
||||
-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
--dbsnp /data/broad/samples/joint_variant_calling/dbSNP/00-All.vcf \
|
||||
-stand_call_conf 50.0 \
|
||||
-stand_emit_conf 10.0 \
|
||||
--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
-XL unmapped \
|
||||
-o output.raw.snps.indels.vcf
|
||||
|
||||
#-XL unmapped \
|
||||
#--pair_hmm_implementation JNI_LOGLESS_CACHING \
|
||||
#-I /data/simulated/sim1M_pairs_final.bam \
|
||||
#-I /data/broad/samples/joint_variant_calling/NA12878_low_coverage_alignment/NA12878.chrom11.ILLUMINA.bwa.CEU.low_coverage.20121211.bam \
|
||||
|
|
|
|||
|
|
@ -150,7 +150,8 @@ typedef struct
|
|||
{
|
||||
int rslen, haplen;
|
||||
/*int *q, *i, *d, *c;*/
|
||||
int q[MROWS], i[MROWS], d[MROWS], c[MROWS];
|
||||
/*int q[MROWS], i[MROWS], d[MROWS], c[MROWS];*/
|
||||
char *q, *i, *d, *c;
|
||||
char *hap, *rs;
|
||||
int *ihap;
|
||||
int *irs;
|
||||
|
|
|
|||
|
|
@ -90,10 +90,10 @@ int read_testcase(testcase *tc, FILE* ifp)
|
|||
tc->ihap = (int *) malloc(tc->haplen*sizeof(int));
|
||||
tc->irs = (int *) malloc(tc->rslen*sizeof(int));
|
||||
|
||||
//tc->q = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->i = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->d = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
//tc->c = (int *) malloc(sizeof(int) * tc->rslen);
|
||||
tc->q = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->i = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->d = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
tc->c = (char *) malloc(sizeof(char) * tc->rslen);
|
||||
|
||||
for (x = 0; x < tc->rslen; x++)
|
||||
{
|
||||
|
|
@ -199,18 +199,22 @@ int read_mod_testcase(ifstream& fptr, testcase* tc, bool reformat)
|
|||
memcpy(tc->hap, tokens[0].c_str(), tokens[0].size());
|
||||
tc->rs = new char[tokens[1].size()+2];
|
||||
tc->rslen = tokens[1].size();
|
||||
tc->q = new char[tc->rslen];
|
||||
tc->i = new char[tc->rslen];
|
||||
tc->d = new char[tc->rslen];
|
||||
tc->c = new char[tc->rslen];
|
||||
//cout << "Lengths "<<tc->haplen <<" "<<tc->rslen<<"\n";
|
||||
memcpy(tc->rs, tokens[1].c_str(),tokens[1].size());
|
||||
assert(tokens.size() == 2 + 4*(tc->rslen));
|
||||
assert(tc->rslen < MROWS);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->q[j] = convToInt(tokens[2+0*tc->rslen+j]);
|
||||
tc->q[j] = (char)convToInt(tokens[2+0*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->i[j] = convToInt(tokens[2+1*tc->rslen+j]);
|
||||
tc->i[j] = (char)convToInt(tokens[2+1*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->d[j] = convToInt(tokens[2+2*tc->rslen+j]);
|
||||
tc->d[j] = (char)convToInt(tokens[2+2*tc->rslen+j]);
|
||||
for(unsigned j=0;j<tc->rslen;++j)
|
||||
tc->c[j] = convToInt(tokens[2+3*tc->rslen+j]);
|
||||
tc->c[j] = (char)convToInt(tokens[2+3*tc->rslen+j]);
|
||||
|
||||
if(reformat)
|
||||
{
|
||||
|
|
@ -245,3 +249,14 @@ double getCurrClk() {
|
|||
gettimeofday(&tv, NULL);
|
||||
return (double)tv.tv_sec + (double)tv.tv_usec / 1000000.0;
|
||||
}
|
||||
|
||||
uint64_t get_time(struct timespec* store_struct)
|
||||
{
|
||||
static struct timespec start_time;
|
||||
struct timespec curr_time;
|
||||
struct timespec* ptr = (store_struct == 0) ? &curr_time : store_struct;
|
||||
clock_gettime(CLOCK_REALTIME, ptr);
|
||||
uint64_t diff_time = (ptr->tv_sec-start_time.tv_sec)*1000000000+(ptr->tv_nsec-start_time.tv_nsec);
|
||||
start_time = *ptr;
|
||||
return diff_time;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,4 +27,5 @@ template<class NUMBER>
|
|||
NUMBER compute_full_prob(testcase *tc, NUMBER *before_last_log=0);
|
||||
void initialize_function_pointers();
|
||||
double getCurrClk();
|
||||
uint64_t get_time(struct timespec* x=0);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -68,8 +68,8 @@ import java.util.HashMap;
|
|||
public class JNILoglessPairHMM extends LoglessPairHMM {
|
||||
|
||||
private static final boolean debug = false; //simulates ifdef
|
||||
private static final boolean verify = debug || true; //simulates ifdef
|
||||
private static final boolean debug0_1 = true; //simulates ifdef
|
||||
private static final boolean verify = debug || false; //simulates ifdef
|
||||
private static final boolean debug0_1 = false; //simulates ifdef
|
||||
private static final boolean debug1 = false; //simulates ifdef
|
||||
private static final boolean debug2 = false;
|
||||
private static final boolean debug3 = false;
|
||||
|
|
|
|||
Loading…
Reference in New Issue