diff --git a/CMakeLists.txt b/CMakeLists.txt index c823fe5..e0e4f84 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,5 +5,5 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON) # set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread") # set(CMAKE_BUILD_TYPE Debug) # set(CMAKE_BUILD_TYPE Release) -add_definitions(-DSHOW_PERF=1) +# add_definitions(-DSHOW_PERF=1) add_subdirectory(src) diff --git a/src/bqsr/apply_bqsr_pipeline.cpp b/src/bqsr/apply_bqsr_pipeline.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/bqsr/bqsr_entry.cpp b/src/bqsr/bqsr_entry.cpp index f7e765e..ee2d591 100644 --- a/src/bqsr/bqsr_entry.cpp +++ b/src/bqsr/bqsr_entry.cpp @@ -153,8 +153,10 @@ int SerialBQSR(AuxVar &aux) { ++round; // 一. 读取bam数据 size_t readNum = 0; + PROF_START(GP_read); if (inBamBuf.ReadStat() >= 0) readNum = inBamBuf.ReadBam(); + PROF_GP_END(GP_read); if (readNum < 1) { break; } @@ -176,7 +178,7 @@ int SerialBQSR(AuxVar &aux) { if (sd.read_len <= 0) continue; - PROF_START(clip_read); + PROF_START(GP_clip_read); // 4. 对read的两端进行检测,去除(hardclip)adapter ReadTransformer::hardClipAdaptorSequence(bw, sd); if (sd.read_len <= 0) @@ -187,7 +189,7 @@ int SerialBQSR(AuxVar &aux) { continue; // 应用所有的变换,计算samdata的相关信息 sd.applyTransformations(); - PROF_END(gprof[GP_clip_read], clip_read); + PROF_GP_END(GP_clip_read); //const char* qname = bam_get_qname(sd.bw->b); // fprintf(gf[4], "%ld %d %d %d\n", sd.rid, sd.read_len, 1 + BamWrap::bam_pos(sd.start_pos), 1 + BamWrap::bam_pos(sd.end_pos)); @@ -195,7 +197,7 @@ int SerialBQSR(AuxVar &aux) { // 6. 更新每个read的platform信息,好像没啥用,暂时忽略 // 这里计算snp和indel有点问题,snp和del结果不对,白天调试一下 - const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel); + const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel, 0); // fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); // for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[4], "%d ", skips[ii] ? 1 : 0); @@ -224,9 +226,9 @@ int SerialBQSR(AuxVar &aux) { // 到这里,基本的数据都准备好了,后续就是进行bqsr的统计了 // 8. 计算这条read对应的协变量 - PROF_START(covariate); - CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true); - PROF_END(gprof[GP_covariate], covariate); + PROF_START(GP_covariate); + CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true, 0); + PROF_GP_END(GP_covariate); // fprintf(gf[4], "%ld %d\n", sd.rid, sd.read_len); // for (auto &arr1 : readCovariates) { @@ -248,7 +250,7 @@ int SerialBQSR(AuxVar &aux) { // fprintf(gf[3], "\n"); // 9. 计算这条read需要跳过的位置 - PROF_START(read_vcf); + PROF_START(GP_read_vcf); RecalFuncs::calculateKnownSites(sd, aux.vcfArr, aux.header, skips, 0); for (int ii = 0; ii < sd.read_len; ++ii) { skips[ii] = skips[ii] || (ContextCovariate::baseIndexMap[sd.bases[ii]] == -1) || @@ -257,7 +259,7 @@ int SerialBQSR(AuxVar &aux) { //stringstream ss; //for (auto s : skips) ss << (int)s << ' '; //spdlog::info("{}", ss.str()); - PROF_GP_END(read_vcf); + PROF_GP_END(GP_read_vcf); // fprintf(gf[3], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); // for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[3], "%d ", skips[ii] ? 1 : 0); @@ -268,19 +270,19 @@ int SerialBQSR(AuxVar &aux) { // fprintf(gf[0], "\n"); // 10. 根据BAQ进一步处理snp,indel,得到处理后的数据 - PROF_START(frac_err); + PROF_START(GP_frac_err); RecalFuncs::calculateFractionalErrorArray(isSNP, baqArray, snpErrors); RecalFuncs::calculateFractionalErrorArray(isIns, baqArray, insErrors); RecalFuncs::calculateFractionalErrorArray(isDel, baqArray, delErrors); - PROF_GP_END(frac_err); + PROF_GP_END(GP_frac_err); // aggregate all of the info into our info object, and update the data // 11. 合并之前计算的数据,得到info,并更新bqsr table数据 ReadRecalInfo info(sd, readCovariates, skips, snpErrors, insErrors, delErrors); - PROF_START(update_info); + PROF_START(GP_update_info); RecalUtils::updateRecalTablesForRead(info, recalTables); - PROF_END(gprof[GP_update_info], update_info); + PROF_GP_END(GP_update_info); } readNumSum += readNum; inBamBuf.ClearAll(); // @@ -289,26 +291,32 @@ int SerialBQSR(AuxVar &aux) { spdlog::info("read count: {}", readNumSum); // 12. 创建总结数据 + PROF_START(GP_collapse_round); collapseQualityScoreTableToReadGroupTable(recalTables.readGroupTable, recalTables.qualityScoreTable); roundTableValues(recalTables); + PROF_GP_END(GP_collapse_round); #if 0 printRecalTables(recalTables); #endif // 13. 量化质量分数 + PROF_START(GP_quantize); QuantizationInfo quantInfo(recalTables, nsgv::gBqsrArg.QUANTIZING_LEVELS); + PROF_GP_END(GP_quantize); // 14. 输出结果 + PROF_START(GP_print_report); RecalUtils::outputRecalibrationReport(nsgv::gBqsrArg, quantInfo, recalTables); + PROF_GP_END(GP_print_report); return 0; } // 多线程处理bam数据, tmd是乱序的? -static void thread_worker(void* data, long idx, int tid, int steal) { +static void thread_worker(void* data, long idx, int thid, int steal) { // static void thread_worker(void* data, long idx, int tid) { - AuxVar& aux = (*(vector*)data)[tid]; + AuxVar& aux = (*(vector*)data)[thid]; auto& readCovariates = aux.readCovariates; RecalTables& recalTables = aux.recalTables; SamData& sd = aux.sd; @@ -319,7 +327,6 @@ static void thread_worker(void* data, long idx, int tid, int steal) { auto &bams = *aux.bamArr; if (steal) for (auto& vcf : aux.vcfArr) vcf.knownSites.clear(); - int f = tid * 4; #if 1 int startIdx = idx * aux.BAM_BLOCK_NUM; int stopIdx = std::min((size_t)(idx + 1) * aux.BAM_BLOCK_NUM, bams.size()); @@ -338,26 +345,16 @@ static void thread_worker(void* data, long idx, int tid, int steal) { sd.rid = i + aux.processedReads; if (sd.read_len <= 0) continue; - //PROF_START(clip_read); + PROF_START(TP_clip_read); ReadTransformer::hardClipAdaptorSequence(bw, sd); if (sd.read_len <= 0) continue; ReadTransformer::hardClipSoftClippedBases(bw, sd); if (sd.read_len <= 0) continue; sd.applyTransformations(); - // PROF_END(gprof[GP_clip_read], clip_read); + PROF_TP_END(TP_clip_read); + + const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel, thid); - const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel); -#if 0 - fprintf(gf[f + 0], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[f + 0], "%d ", isSNP[ii]); - fprintf(gf[f + 0], "\n"); - fprintf(gf[f + 1], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[f + 1], "%d ", isIns[ii]); - fprintf(gf[f + 1], "\n"); - fprintf(gf[f + 2], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[f + 2], "%d ", isDel[ii]); - fprintf(gf[f + 2], "\n"); -#endif bool baqCalculated = false; if (nErrors == 0 || !nsgv::gBqsrArg.enableBAQ) { baqCalculated = BAQ::flatBAQArray(sd, baqArray); @@ -366,44 +363,27 @@ static void thread_worker(void* data, long idx, int tid, int steal) { } if (!baqCalculated) continue; - // PROF_START(covariate); - CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true); - // PROF_END(gprof[GP_covariate], covariate); + PROF_START(TP_covariate); + CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true, thid); + PROF_TP_END(TP_covariate); - // PROF_START(read_vcf); - RecalFuncs::calculateKnownSites(sd, aux.vcfArr, aux.header, skips, tid); + RecalFuncs::calculateKnownSites(sd, aux.vcfArr, aux.header, skips, thid); for (int ii = 0; ii < sd.read_len; ++ii) { skips[ii] = skips[ii] || (ContextCovariate::baseIndexMap[sd.bases[ii]] == -1) || sd.base_quals[ii] < nsgv::gBqsrArg.PRESERVE_QSCORES_LESS_THAN; } - // PROF_GP_END(read_vcf); -#if 0 - fprintf(gf[f + 3], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[f + 3], "%d ", skips[ii] ? 1 : 0); - fprintf(gf[f + 3], "\n"); -#endif - -#if 0 - int fidx = 0 + 2 * tid; - //if (sd.rid % 2 == 0) fidx = 0 + 2 * tid; - //else fidx = 1 + 2 * tid; - fprintf(gf[fidx], "%ld %d\t", sd.rid, sd.read_len); - for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[fidx], "%d ", skips[ii] ? 1 : 0); - fprintf(gf[fidx], "\n"); -#endif - - // PROF_START(frac_err); + PROF_START(TP_frac_err); RecalFuncs::calculateFractionalErrorArray(isSNP, baqArray, snpErrors); RecalFuncs::calculateFractionalErrorArray(isIns, baqArray, insErrors); RecalFuncs::calculateFractionalErrorArray(isDel, baqArray, delErrors); - // PROF_GP_END(frac_err); + PROF_TP_END(TP_frac_err); ReadRecalInfo info(sd, readCovariates, skips, snpErrors, insErrors, delErrors); - //PROF_START(update_info); + PROF_START(TP_update_info); RecalUtils::updateRecalTablesForRead(info, recalTables); - //PROF_END(gprof[GP_update_info], update_info); + PROF_TP_END(TP_update_info); } } @@ -418,7 +398,9 @@ int ParallelBQSR(vector& auxArr) { ++round; // 一. 读取bam数据 size_t readNum = 0; + PROF_START(GP_read); if (inBamBuf.ReadStat() >= 0) readNum = inBamBuf.ReadBam(); + PROF_GP_END(GP_read); if (readNum < 1) { break; } auto bams = inBamBuf.GetBamArr(); for_each(auxArr.begin(), auxArr.end(), [&](AuxVar& aux) { @@ -426,11 +408,13 @@ int ParallelBQSR(vector& auxArr) { }); spdlog::info("{} reads processed in {} round", readNum, round); + PROF_START(GP_thread_worker); #if 1 kt_for_steal(auxArr.size(), thread_worker, &auxArr, (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM); #else kt_for_steal(auxArr.size(), thread_worker, &auxArr, auxArr.size()); #endif + PROF_GP_END(GP_thread_worker); readNumSum += readNum; AuxVar::processedReads += readNum; inBamBuf.ClearAll(); // @@ -442,6 +426,8 @@ int ParallelBQSR(vector& auxArr) { // printRecalTables(recalTables); for (int i = 0; i < auxArr.size(); ++i) spdlog::info("thread {} processed reads {}.", i, auxArr[i].threadProcessedReads); + + PROF_START(GP_merge_covs); for (int i = 1; i < auxArr.size(); ++i) { auxArr[0].threadProcessedReads += auxArr[i].threadProcessedReads; _Foreach3DK(auxArr[i].recalTables.qualityScoreTable, qualDatum, { @@ -460,19 +446,26 @@ int ParallelBQSR(vector& auxArr) { } }); } + PROF_GP_END(GP_merge_covs); spdlog::info("All processed reads {}.", auxArr[0].threadProcessedReads); - + // 创建总结数据 + PROF_START(GP_collapse_round); collapseQualityScoreTableToReadGroupTable(recalTables.readGroupTable, recalTables.qualityScoreTable); roundTableValues(recalTables); + PROF_GP_END(GP_collapse_round); // printRecalTables(recalTables); // 量化质量分数 + PROF_START(GP_quantize); QuantizationInfo quantInfo(recalTables, nsgv::gBqsrArg.QUANTIZING_LEVELS); + PROF_GP_END(GP_quantize); // 输出结果 + PROF_START(GP_print_report); RecalUtils::outputRecalibrationReport(nsgv::gBqsrArg, quantInfo, recalTables); + PROF_GP_END(GP_print_report); return 0; } @@ -562,7 +555,7 @@ static void globalDestroy() { int BaseRecalibrator() { int ret = 0; - PROF_START(whole_process); + PROF_START(GP_whole_process); globalInit(); if (nsgv::gBqsrArg.NUM_THREADS == 1) ret = SerialBQSR(nsgv::gAuxVars[0]); // 串行处理数据,生成recal table @@ -570,7 +563,7 @@ int BaseRecalibrator() { ret = ParallelBQSR(nsgv::gAuxVars); // 并行处理数据,生成recal table globalDestroy(); sam_close(nsgv::gInBamFp); - PROF_END(gprof[GP_whole_process], whole_process); + PROF_GP_END(GP_whole_process); return ret; } diff --git a/src/bqsr/bqsr_funcs.cpp b/src/bqsr/bqsr_funcs.cpp deleted file mode 100644 index e69de29..0000000 diff --git a/src/bqsr/covariate.cpp b/src/bqsr/covariate.cpp index 2cba189..ec58306 100644 --- a/src/bqsr/covariate.cpp +++ b/src/bqsr/covariate.cpp @@ -1,4 +1,5 @@ #include "covariate.h" +#include "util/profiling.h" // for EventType EventTypeValue EventType::BASE_SUBSTITUTION = {0, 'M', "Base Substitution"}; @@ -24,11 +25,19 @@ int CycleCovariate::MAXIMUM_CYCLE_VALUE; // for CovariateUtils // 对一条read计算协变量(该协变量被上一个read用过) void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values, - bool recordIndelValues) { + bool recordIndelValues, int thid) { + PROF_START(TP_readgroup); ReadGroupCovariate::RecordValues(sd, header, values, recordIndelValues); + PROF_TP_END(TP_readgroup); + PROF_START(TP_qualityscore); BaseQualityCovariate::RecordValues(sd, header, values, recordIndelValues); + PROF_TP_END(TP_qualityscore); + PROF_START(TP_context); ContextCovariate::RecordValues(sd, header, values, recordIndelValues); + PROF_TP_END(TP_context); + PROF_START(TP_cycle); CycleCovariate::RecordValues(sd, header, values, recordIndelValues); + PROF_TP_END(TP_cycle); } /* diff --git a/src/bqsr/covariate.h b/src/bqsr/covariate.h index f0f227a..800ac19 100644 --- a/src/bqsr/covariate.h +++ b/src/bqsr/covariate.h @@ -316,5 +316,5 @@ struct CovariateUtils { } // 对一条read计算协变量(该协变量被上一个read用过) - static void ComputeCovariates(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues); + static void ComputeCovariates(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues, int thid); }; \ No newline at end of file diff --git a/src/bqsr/recal_funcs.h b/src/bqsr/recal_funcs.h index ae0c640..b7682eb 100644 --- a/src/bqsr/recal_funcs.h +++ b/src/bqsr/recal_funcs.h @@ -29,19 +29,19 @@ struct RecalFuncs { } // 计算该read的每个碱基位置是否是SNP或Indel - static int calculateIsSNPOrIndel(AuxVar& aux, SamData& sd, StableArray& isSNP, StableArray& isIns, StableArray& isDel) { + static int calculateIsSNPOrIndel(AuxVar& aux, SamData& sd, StableArray& isSNP, StableArray& isIns, StableArray& isDel, int thid) { isSNP.resize_fill(sd.read_len, 0); isIns.resize_fill(sd.read_len, 0); isDel.resize_fill(sd.read_len, 0); // 1. 读取参考基因组,先看看串行运行性能,稍后可以将读入ref和vcf合并起来做成一个并行流水线步骤 Interval interval{sd.start_pos, sd.end_pos}; // 闭区间 - PROF_START(read_ref); + PROF_START(TP_read_ref); read_ref_base(aux, interval.left, interval); - PROF_GP_END(read_ref); + PROF_TP_END(TP_read_ref); const char *refBases = aux.ref_seq; // 2. 遍历cigar,计算每个碱基是否是SNP或Indel - PROF_START(calc_snp); + PROF_START(TP_calc_snp); int readPos = 0, refPos = 0, nEvents = 0; for (int i = 0; i < sd.cigars.size(); ++i) { const char c = sd.cigars[i].op; @@ -77,7 +77,7 @@ struct RecalFuncs { } } nEvents += std::accumulate(isIns.begin(), isIns.end(), 0) + std::accumulate(isDel.begin(), isDel.end(), 0); - PROF_GP_END(calc_snp); + PROF_TP_END(TP_calc_snp); return nEvents; } @@ -114,6 +114,7 @@ struct RecalFuncs { // update vcfs // int idx = 0; + PROF_START(TP_read_vcf); for (auto& vcf : vcfs) { // 为啥多线程环境会出现,deque的front和[0]不一样?好像是调试的时候的问题,实际运行时没再出现 // if (vcf.knownSites.front().left != vcf.knownSites[0].left || vcf.knownSites.front().right != vcf.knownSites[0].right) @@ -166,7 +167,9 @@ struct RecalFuncs { } } } + PROF_TP_END(TP_read_vcf); // fprintf(gf[0], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid); + PROF_START(TP_calc_skips); for (auto& vcf : vcfs) { for (auto& intv : vcf.knownSites) { // knownSite is outside clipping window for the read, ignore @@ -198,6 +201,7 @@ struct RecalFuncs { } //idx += 1; } + PROF_TP_END(TP_calc_skips); //fprintf(gf[0], "\n"); } diff --git a/src/main.cpp b/src/main.cpp index b00b24d..4f943d0 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -143,7 +143,7 @@ int main_BaseRecalibrator(int argc, char *argv[]) { BaseRecalibrator(); spdlog::info("fast base recalibration phase-1 end"); - DisplayProfiling(1); + DisplayProfiling(nsgv::gBqsrArg.NUM_THREADS); return 0; } diff --git a/src/util/profiling.cpp b/src/util/profiling.cpp index 6bfb7fc..91a4d01 100644 --- a/src/util/profiling.cpp +++ b/src/util/profiling.cpp @@ -36,50 +36,49 @@ static int CalcThreadTime(uint64_t *a, int len, double *max, double *min, double } #define PRINT_GP(gpname) \ - fprintf(stderr, "time G %-15s: %0.2lfs\n", #gpname, gprof[GP_##gpname] * 1.0 / proc_freq); + fprintf(stderr, "time %-15s: %0.2lfs\n", #gpname, gprof[gpname] * 1.0 / proc_freq); -#define PRINT_TP(tpname, nthread) \ - { \ - double maxTime, minTime, avgTime; \ - CalcThreadTime(tprof[TP_##tpname], nthread, &maxTime, &minTime, &avgTime); \ - fprintf(stderr, "time T %-15s: avg %0.2lfs min %0.2lfs max %0.2lfs\n", #tpname, avgTime, minTime, maxTime); \ +#define PRINT_TP(tpname) \ + { \ + double maxTime, minTime, avgTime; \ + CalcThreadTime(tprof[tpname], nthread, &maxTime, &minTime, &avgTime); \ + fprintf(stderr, "time %-15s: avg %0.2lfs min %0.2lfs max %0.2lfs\n", #tpname, avgTime, minTime, maxTime); \ } int DisplayProfiling(int nthread) { #ifdef SHOW_PERF fprintf(stderr, "\n"); - // PRINT_GP(read_wait); - // PRINT_GP(gen_wait); - // PRINT_GP(sort_wait); - // PRINT_GP(markdup_wait); - // PRINT_GP(intersect_wait); - PRINT_GP(clip_read); - PRINT_GP(read_ref); - PRINT_GP(calc_snp); - PRINT_GP(read_vcf); - PRINT_GP(covariate); - PRINT_GP(frac_err); - PRINT_GP(update_info); - //PRINT_GP(markdup); - //PRINT_GP(intersect); - // PRINT_GP(merge_result); - // PRINT_GP(sort_pair); - // PRINT_GP(sort_frag); - // PRINT_GP(markdup_pair); - // PRINT_GP(markdup_frag); - // PRINT_GP(merge_match); - // PRINT_GP(merge_markdup); - // PRINT_GP(merge_update); - // PRINT_GP(merge_add); - //PRINT_GP(markdup_all); - // PRINT_GP(final_read); - //PRINT_GP(write); - // PRINT_GP(whole_process); - - // PRINT_TP(gen, nthread); - // PRINT_TP(sort_frag, nthread); - // PRINT_TP(sort_pair, nthread); + PRINT_GP(GP_read); + if (nthread == 1) { + PRINT_GP(GP_clip_read); + PRINT_GP(GP_read_ref); + PRINT_GP(GP_calc_snp); + PRINT_GP(GP_read_vcf); + PRINT_TP(TP_read_vcf); + PRINT_TP(TP_calc_skips); + PRINT_GP(GP_covariate); + PRINT_TP(TP_readgroup); + PRINT_TP(TP_qualityscore); + PRINT_TP(TP_context); + PRINT_TP(TP_cycle); + PRINT_GP(GP_frac_err); + PRINT_GP(GP_update_info); + } else { + PRINT_TP(TP_clip_read); + PRINT_TP(TP_read_ref); + PRINT_TP(TP_calc_snp); + PRINT_TP(TP_read_vcf); + PRINT_TP(TP_calc_skips); + PRINT_TP(TP_covariate); + PRINT_TP(TP_readgroup); + PRINT_TP(TP_qualityscore); + PRINT_TP(TP_context); + PRINT_TP(TP_cycle); + PRINT_TP(TP_frac_err); + PRINT_TP(TP_update_info); + } + PRINT_GP(GP_whole_process); fprintf(stderr, "\n"); #endif diff --git a/src/util/profiling.h b/src/util/profiling.h index d89657b..bc96123 100644 --- a/src/util/profiling.h +++ b/src/util/profiling.h @@ -25,7 +25,8 @@ extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE]; #define PROF_START(tmp_time) uint64_t prof_tmp_##tmp_time = RealtimeMsec() #define PROF_START_AGAIN(tmp_time) prof_tmp_##tmp_time = RealtimeMsec() #define PROF_END(result, tmp_time) result += RealtimeMsec() - prof_tmp_##tmp_time -#define PROF_GP_END(tmp_time) gprof[GP_##tmp_time] += RealtimeMsec() - prof_tmp_##tmp_time +#define PROF_GP_END(tmp_time) gprof[tmp_time] += RealtimeMsec() - prof_tmp_##tmp_time +#define PROF_TP_END(tmp_time) tprof[tmp_time][thid] += RealtimeMsec() - prof_tmp_##tmp_time #define PROF_PRINT_START(tmp_time) uint64_t tmp_time = RealtimeMsec() #define PROF_PRINT_END(tmp_time) \ tmp_time = RealtimeMsec() - tmp_time; \ @@ -34,6 +35,7 @@ extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE]; #define PROF_START(tmp_time) #define PROF_END(result, tmp_time) #define PROF_GP_END(tmp_time) +#define PROF_TP_END(tmp_time) #define PROF_PRINT_START(tmp_time) #define PROF_PRINT_END(tmp_time) #endif @@ -41,40 +43,39 @@ extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE]; // GLOBAL enum { GP_0 = 0, GP_1, GP_2, GP_3, GP_4, GP_5, GP_6, GP_7, GP_8, GP_9, GP_10 }; enum { - GP_read_wait = 11, - GP_clip_read, + GP_clip_read = 11, GP_calc_snp, GP_covariate, GP_read_ref, GP_read_vcf, GP_frac_err, GP_update_info, - GP_gen_wait, - GP_sort_wait, - GP_markdup_wait, - GP_intersect_wait, + GP_merge_covs, + GP_collapse_round, + GP_quantize, + GP_print_report, GP_read, - GP_gen, - GP_sort, - GP_markdup, - GP_intersect, - GP_merge_result, - GP_markdup_pair, - GP_markdup_frag, - GP_sort_pair, - GP_sort_frag, - GP_merge_match, - GP_merge_markdup, - GP_merge_update, - GP_merge_add, - GP_markdup_all, - GP_final_read, GP_write, + GP_thread_worker, GP_whole_process }; // THREAD enum { TP_0 = 0, TP_1, TP_2, TP_3, TP_4, TP_5, TP_6, TP_7, TP_8, TP_9, TP_10 }; -enum { TP_gen = 11, TP_sort, TP_sort_frag, TP_sort_pair}; +enum { + TP_clip_read = 11, + TP_calc_snp, + TP_covariate, + TP_read_ref, + TP_read_vcf, + TP_calc_skips, + TP_frac_err, + TP_readgroup, + TP_qualityscore, + TP_context, + TP_cycle, + TP_update_info, + TP_whole_process +}; uint64_t RealtimeMsec(void);