From c04c6541efa24da1ce15e10a6ad93099ba1b5b57 Mon Sep 17 00:00:00 2001 From: zzh Date: Sun, 3 Sep 2023 23:59:24 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E4=BA=86=E8=AF=BB=E6=96=87?= =?UTF-8?q?=E4=BB=B6=E7=9A=84bug=EF=BC=8C=E5=8E=9F=E5=9B=A0=E6=98=AFtarget?= =?UTF-8?q?=E6=9C=89=E4=BA=9B=E8=A1=8C=E6=98=AF0=EF=BC=8C=E6=B2=A1?= =?UTF-8?q?=E6=9C=89=E7=A2=B1=E5=9F=BA=EF=BC=8C=E4=BF=9D=E7=95=99=E4=BA=86?= =?UTF-8?q?=E6=B5=8B=E8=AF=95=E4=BB=A3=E7=A0=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 8 +++- get_line | Bin 0 -> 16256 bytes get_line.c | 61 ++++++++++++++++++++++++ ksw_ext_avx2.c | 58 ++++++++++++----------- ksw_ext_avx2_u8.c | 49 ++++++++++---------- ksw_ext_normal.c | 50 ++++++++++---------- main.c | 116 ++++++++++++++++++++++++++++++++++++++-------- run_debug.sh | 4 +- utils.h | 4 ++ 9 files changed, 252 insertions(+), 98 deletions(-) create mode 100755 get_line create mode 100644 get_line.c diff --git a/Makefile b/Makefile index 5d685ea..a41c8fa 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,10 @@ CC= gcc #CFLAGS= -g -Wall -Wno-unused-function -mavx2 CFLAGS= -Wall -Wno-unused-function -O2 -mavx2 -DFLAGS= -DSHOW_PERF +DFLAGS= -DSHOW_PERF -DDEBUG_RETURN_VALUE #DFLAGS= -DSHOW_PERF -DDEBUG_OUT -DDEBUG_RETURN_VALUE PROG= sw_perf +PROG2= get_line INCLUDES= LIBS= SUBDIRS= . @@ -30,9 +31,12 @@ endif all:$(PROG) -sw_perf:$(OBJS) main.o +$(PROG):$(OBJS) main.o $(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) main.o -o $@ -L. $(LIBS) +$(PROG2): get_line.o + $(CC) $(CFLAGS) $(LDFLAGS) get_line.o -o $@ -L. $(LIBS) + clean: rm -f *.o a.out $(PROG) *~ *.a diff --git a/get_line b/get_line new file mode 100755 index 0000000000000000000000000000000000000000..b583d5744f5c46fca463163f2f6b6877cd1a335d GIT binary patch literal 16256 zcmeHOZEPIH8J;`m!-T}XKvEI|$&vy}3clD7r!kGmofA82qog6uXog9yrmq6RbD0(vLA`$cSQLeQGSfmsy#-| zC*_1b8`OFDu>m?NjDEiX<>$vO(oHZ@8&r0lwiBPEy)NaK(3~*BkLx5Y7`eUA!;kXv zpBBE%J5+nKU5A{?Kch;oSer`rtY5b_m8ef8GsVIB!42!{*RP8fve6ryE>OQTd}vH< zy>*vBmtlaJ7sAw@ESh;(gdPX96Ikx{MuRgu37l+g4gizmyIA6i#nmjq=v_T2;wS9l^r)j zBKufO(rhmbqj%6W`jQzlmAuCiX)~3|_JYND=`{+rnYWF!naqg3T+!|oec7Cq5d}MM zXH$SgwrCSr`Ml`MTV_J^?aL=^OOQ7WuJvMTXGiO11;+dZb;LLhB%clMoty};r~M5EAD z+!GId6T>bn($d`hUgP{|mrBJU^}WUXDRptHV^YsN%O^KUp2jp?6E3Gby!Sqs^zhF2 zx^kHI@C!Hy@BKQnx<6Mxjh(|ye4Ne}PbuSzlD z;nh!;qxrdq=NysZRS%B~5|khVK?Z^h1Q`f25M&_8z$ebYUu#!HTrI6@ATwjV4fAjb@Mt# zk54`T;TQ>>qf^gNF7(l#>7y@9#=wt0fB(e(?Tu&K8sBpI-#Bq*VeQJ8&_^ah&ClDV{*5Ue?`k522)y|YPw?t>~l^pbw!yuAbgu2BK1OQ+=dvp=^pTb@ zRrS%+_CozY%RC5^qaT$@lL_?orMXRzLwn%swom?3`=Epu)t!6Ay*oe#CCEUKfgl4x z27(L(83-~EWFW{ukbxir|0fxs_dQg)QkGe;v}6Xatu%YU()yAqOUs$|fM#YA4ucn= zmX;}|d#t=Rlr3ue%nVGkTEAsqwLs)WN9Jz46H90wJMbu>%d0-u0HnwKr`|1<62R-H zOQnOrjlk~#4+Eb9zJ9J$>IBleQYxW)c8JhmBy{=0`PJii<|Lk8YCZtFYsp~CeCJiw z)u^t+rwj3O#MeaHYnI(od-1;NVX>+1rqA4X^(vyte>XmdV8`#cn72_G2fvpHRW<^t zpC`d@qG#Urn#hCU)|zEwRjoDJ!8!4o)nAz#uW1;W7yk8vn$__d4Z7BvNNY`XyZ3#P z3Ku|-fgl4x27(L(83-~EWFW{ukbxirK?d9m@cKDkFGr6YRA@~em4y=A1$v8=e7#zC zw_M4*-uQAQ^IEbxCG+|`dX}QX`uEP4vLqi>i`(d#jY@;6@EW#LO3!QTo|b0r8oFgl zrdO2SqTIx|TF)k*KJnb8jOh7{3a^Qyb*fYv6v2ExL09L;jlu# z|HsMIF=?@N^X8kh)w_C%8M~-8HewlSLw#d&(UBY1DCXY;qrA-#=je55S)uC^g65z~ zoTjuiY%AH*b*sd$0UyTd)SPdz@O6qm;^S{tJl{_cozjhC2UunJcU5-7zWnvyOycpB!Ft-s5|-@YVPtrStN~Ujkn922k}V z_*E774)B+R@hju|9@!4Q;(GXPp-fTLTvTQ36ZK!NP%kPthna`U;hLy4N zLs(32rY$2;Os9w7;*ktk+GVDO(Y_}k>HNLH5GrX|n!f4Z#R^PHi z7+X8Hx5hh-?d|P5+q#YJcxz`HJp4=jW4vVlW0vX5<^QfVE%0~t0F*I?l`w5nIAC&4#z|?V-yQkLhW2AH@7kgMC~aQR10z^ z1MOibvy+<^$|%FYmITfz?Nz9_0ik^JM0o(ZP-<^PnL%&gShQ57kplVi!*`p{RIkM?H;F$9%vn42WoAD1#;>+IgwD+sYS` z*-V*WK$o{tCON2DE@g|T)AJ~*(f%w%suZkV5w)#Bh%{DVopr8j)EZEub0C3eEXf#h zm`V?>qbW0;L@?=&OCzGVKhhZX74F{uo}&37zEad%fPdYO$MZvuOt#+gX$UkY!`FjT zX4`kB)b2c!<@Km|{r&sT=>PiO!r#lxb)e$)=lKPr#)b<0{xq+CA9FhHKhJv^8OK{4@!UrbO(|^K3?*SHr|9{`}tsmgcPN&+}A9 zzJA<)wqx86f0_fc%=2JIK7W7zWp9KSLxpn6{yZ;doK}Y1zW@9mR{pKZj_3J|;~sy? zncx3WaFi2{@BVH@q$5B7NJ;jU>x75#@%n4Zk1^*pMqR00{JzhBTp2L3J%?eL@u<)L zh*B`_Wk-eV$M_SUKhGl=*YJh$`E%H3ef~TjW8~*cw)fvZ$Cba^jHWCVno4Bg>&Nmd zs8IgdKcPtt<2ot2M}PajgU0KBNRy_FzjQri#ozvE&{#R^+=FDn&&|AF)Nju+<2m@$ zdi?ph;ZQwAyD(m*bU|kSkD#Ej%g>WM&*%NW({yqubpKIdKfeBtgCi66=jZD2wPY*}a=T{{FoP9QR#%%419oydGb0ZkKIo+Fr^3JM_bbQs;B< GiQ=D?NdT_^ literal 0 HcmV?d00001 diff --git a/get_line.c b/get_line.c new file mode 100644 index 0000000..6712d5d --- /dev/null +++ b/get_line.c @@ -0,0 +1,61 @@ +/********************************************************************************************* + Description: Get specific line of a txt file + + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2023/08/28 +***********************************************************************************************/ +#include +#include +#include + +#define READ_BUF_SIZE 102400000 + +int main(int argc, char *argv[]) +{ + if (argc < 3) + { + fprintf(stderr, "Please input the file path and the line number you want to get!\n"); + return 1; + } + const char *in_path = argv[1]; + size_t expect_line_num = atoi(argv[2]); + FILE *in_f = fopen(in_path, "r"); + char *read_buf = (char *)malloc(READ_BUF_SIZE); + + int actual_size = 0; + int expect_size = READ_BUF_SIZE; + size_t cur_line_num = 0; + size_t last_line_pos = 0; + size_t cur_line_pos = 0; + int i, j; + int flag_found = 0; + // 读取数据 + actual_size = fread(read_buf, 1, expect_size, in_f); + while (actual_size > 0) + { + for (i = 0; i < actual_size; ++i) + if (read_buf[i] == '\n') + { + cur_line_num += 1; + last_line_pos = cur_line_pos; + cur_line_pos = i; + if (cur_line_num == expect_line_num) + { + for (j = last_line_pos + 1; j <= cur_line_pos; ++j) + { + fprintf(stdout, "%c", read_buf[j]); + } + flag_found = 1; + break; + } + } + if (flag_found) + break; + actual_size = fread(read_buf, 1, expect_size, in_f); + } + if (!flag_found) + fprintf(stderr, "Invalid line number!\n"); + return 0; +} \ No newline at end of file diff --git a/ksw_ext_avx2.c b/ksw_ext_avx2.c index 498b098..a35cff8 100644 --- a/ksw_ext_avx2.c +++ b/ksw_ext_avx2.c @@ -289,6 +289,13 @@ int ksw_extend_avx2(thread_mem_t *tmem, int16_t ins[tlen + 1][qlen + 2]; int16_t del[tlen + 1][qlen + 2]; int16_t score[tlen + 1][qlen + 2]; + for (dii = 0; dii <= tlen; ++dii) + { + for (djj = 0; djj <= qlen; ++djj) + { + ins[dii][djj] = del[dii][djj] = score[dii][djj] = 0; + } + } ins[0][0] = del[0][0] = score[0][0] = init_score; ins[0][1] = MAX(0, init_score - (o_ins + e_ins)); del[1][0] = MAX(0, init_score - (o_del + e_del)); @@ -307,16 +314,15 @@ int ksw_extend_avx2(thread_mem_t *tmem, end1 = D; // 闭区间 else end1 = qlen; - beg1 = MAX(D - window_size, beg1); - end1 = MIN(D + window_size, end1); + // beg1 = MAX(D - window_size, beg1); + // end1 = MIN(D + window_size, end1); + // beg = MAX(beg1, beg); + // end = MIN(end1, end); + // if (beg > end) + // break; - beg = MAX(beg1, beg); - end = MIN(end1, end); - if (beg > end) - break; - - // beg = beg1; - // end = end1; + beg = beg1; + end = end1; iend = D - beg; // ref开始计算的位置,倒序 span = end - beg; @@ -415,20 +421,20 @@ int ksw_extend_avx2(thread_mem_t *tmem, } // 调整计算的边界 - for (j = beg; LIKELY(j <= end); ++j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - beg = j; - for (j = end + 1; LIKELY(j >= beg); --j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - end = j + 1 <= qlen ? j + 1 : qlen; + // for (j = beg; LIKELY(j <= end); ++j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // beg = j; + // for (j = end + 1; LIKELY(j >= beg); --j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // end = j + 1 <= qlen ? j + 1 : qlen; // beg = 0; // end = qlen; // uncomment this line for debugging @@ -441,9 +447,9 @@ int ksw_extend_avx2(thread_mem_t *tmem, { for (djj = 0; djj <= qlen; ++djj) { - fprintf(score_f_arr[1], "%-3d", score[dii][djj]); - fprintf(ins_ext_f_arr[1], "%-3d", ins[dii][djj]); - fprintf(del_ext_f_arr[1], "%-3d", del[dii][djj]); + fprintf(score_f_arr[1], "%-4d", score[dii][djj]); + fprintf(ins_ext_f_arr[1], "%-4d", ins[dii][djj]); + fprintf(del_ext_f_arr[1], "%-4d", del[dii][djj]); } fprintf(score_f_arr[1], "\n"); fprintf(ins_ext_f_arr[1], "\n"); diff --git a/ksw_ext_avx2_u8.c b/ksw_ext_avx2_u8.c index 5053051..2a3ba43 100644 --- a/ksw_ext_avx2_u8.c +++ b/ksw_ext_avx2_u8.c @@ -317,16 +317,15 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem, end1 = D; // 闭区间 else end1 = qlen; - beg1 = MAX(D - window_size, beg1); - end1 = MIN(D + window_size, end1); + // beg1 = MAX(D - window_size, beg1); + // end1 = MIN(D + window_size, end1); + // beg = MAX(beg1, beg); + // end = MIN(end1, end); + // if (beg > end) + // break; - beg = MAX(beg1, beg); - end = MIN(end1, end); - if (beg > end) - break; - - // beg = beg1; - // end = end1; + beg = beg1; + end = end1; iend = D - beg; // ref开始计算的位置,倒序 span = end - beg; @@ -394,22 +393,22 @@ int ksw_extend_avx2_u8(thread_mem_t *tmem, } // 调整计算的边界 - for (j = beg; LIKELY(j <= end); ++j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - } - beg = j; - for (j = end + 1; LIKELY(j >= beg); --j) - { - int has_val = hA1[j - 1] | hA2[j]; - if (has_val) - break; - else - hA0[j - 1] = 0; - } - end = j + 1 <= qlen ? j + 1 : qlen; + // for (j = beg; LIKELY(j <= end); ++j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + //} + // beg = j; + // for (j = end + 1; LIKELY(j >= beg); --j) + //{ + // int has_val = hA1[j - 1] | hA2[j]; + // if (has_val) + // break; + // else + // hA0[j - 1] = 0; + //} + // end = j + 1 <= qlen ? j + 1 : qlen; // swap m, h, e, f SWAP_DATA_POINTER; diff --git a/ksw_ext_normal.c b/ksw_ext_normal.c index d8f3469..c0facc2 100644 --- a/ksw_ext_normal.c +++ b/ksw_ext_normal.c @@ -58,14 +58,14 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl max_off = 0; beg = 0, end = qlen; #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h0); - fprintf(ins_ext_f_arr[0], "%-3d", h0); - fprintf(del_ext_f_arr[0], "%-3d", h0); + fprintf(score_f_arr[0], "%-4d", h0); + fprintf(ins_ext_f_arr[0], "%-4d", h0); + fprintf(del_ext_f_arr[0], "%-4d", h0); for (j = 0; LIKELY(j < end); ++j) // 遍历query字符序列 { - fprintf(score_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); - fprintf(ins_ext_f_arr[0], "%-3d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); - fprintf(del_ext_f_arr[0], "%-3d", 0); + fprintf(score_f_arr[0], "%-4d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); + fprintf(ins_ext_f_arr[0], "%-4d", MAX(h0 - o_ins - (j + 1) * e_ins, 0)); + fprintf(del_ext_f_arr[0], "%-4d", 0); } fprintf(score_f_arr[0], "\n"); fprintf(ins_ext_f_arr[0], "\n"); @@ -83,8 +83,8 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl end = i + w + 1; if (end > qlen) // 终点不超过query长度 end = qlen; - // beg = 0; - // end = qlen; + beg = 0; + end = qlen; // compute the first column if (beg == 0) { @@ -95,15 +95,15 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl else h1 = 0; #ifdef DEBUG_OUT - fprintf(ins_ext_f_arr[0], "%-3d", 0); - fprintf(del_ext_f_arr[0], "%-3d", MAX(h0 - o_del - (i + 1) * e_del, 0)); + fprintf(ins_ext_f_arr[0], "%-4d", 0); + fprintf(del_ext_f_arr[0], "%-4d", MAX(h0 - o_del - (i + 1) * e_del, 0)); #endif for (j = beg; LIKELY(j < end); ++j) // 遍历query字符序列 { #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h1); - fprintf(ins_ext_f_arr[0], "%-3d", f); - fprintf(del_ext_f_arr[0], "%-3d", eh[j].e); + fprintf(score_f_arr[0], "%-4d", h1); + fprintf(ins_ext_f_arr[0], "%-4d", f); + fprintf(del_ext_f_arr[0], "%-4d", eh[j].e); #endif // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Similar to SSE2-SW, cells are computed in the following order: @@ -130,7 +130,7 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl f = f > t ? f : t; // computed F(i,j+1) } #ifdef DEBUG_OUT - fprintf(score_f_arr[0], "%-3d", h1); + fprintf(score_f_arr[0], "%-4d", h1); fprintf(score_f_arr[0], "\n"); fprintf(ins_ext_f_arr[0], "\n"); fprintf(del_ext_f_arr[0], "\n"); @@ -142,8 +142,8 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl max_ie = gscore > h1 ? max_ie : i; // max_ie表示取得全局最大分值时,target字符串的位置 gscore = gscore > h1 ? gscore : h1; } - if (m == 0) // 遍历完query之后,当前轮次的最大分值为0,则跳出循环 - break; + // if (m == 0) // 遍历完query之后,当前轮次的最大分值为0,则跳出循环 + // break; if (m > max) // 当前轮最大分值大于之前的最大分值 { max = m, max_i = i, max_j = mj; // 更新取得最大值的target和query的位置 @@ -163,15 +163,15 @@ int ksw_extend_normal(thread_mem_t *tmem, int qlen, const uint8_t *query, int tl } } // update beg and end for the next round - for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j) - ; - beg = j; - for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j) - ; - end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f,即insert - // beg = 0, end = qlen; // uncomment this line for debugging - // fprintf(stderr, "\n"); - // fprintf(stderr, "%d\n", end); + // for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j) + // ; + // beg = j; + // for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j) + // ; + // end = j + 2 < qlen ? j + 2 : qlen; // 剪枝没考虑f,即insert + // beg = 0, end = qlen; // uncomment this line for debugging + // fprintf(stderr, "\n"); + // fprintf(stderr, "%d\n", end); } // free(eh); // free(qp); diff --git a/main.c b/main.c index ccb4a69..866ae44 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,11 @@ +/********************************************************************************************* + Description: The entry for sw performance tests + + Copyright : All right reserved by NCIC.ICT + + Author : Zhang Zhonghai + Date : 2023/08/20 +***********************************************************************************************/ #include #include #include @@ -31,10 +39,43 @@ int64_t time_sw[KERNEL_NUM] = {0}; #endif #ifdef DEBUG_RETURN_VALUE -#define OUTPUT_RETVAL(kernel_num) \ +#define OUTPUT_RETVAL_1(kernel_num) \ + fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]); \ + for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\t"); \ + for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_SCORE \ fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\t%d\t%d\n", score[kernel_num], qle, tle, gtle, gscore, max_off[0]) +// fprintf(retval_f_arr[kernel_num], " %d %d\n", cur_query_pos, info_arr[i][0]); +// fprintf(retval_f_arr[kernel_num], "%d\t%d\t%d\t%d\n", score[kernel_num], info_arr[i][0], info_arr[i][1], info_arr[i][2]) + +#define OUTPUT_RETVAL_target(kernel_num) \ + for (j = cur_target_pos - info_arr[i][1]; j < cur_target_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)target_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_query(kernel_num) \ + for (j = cur_query_pos - info_arr[i][0]; j < cur_query_pos; ++j) \ + { \ + fprintf(retval_f_arr[kernel_num], "%c", t_2bit2char[(uint8_t)query_arr[j]]); \ + } \ + fprintf(retval_f_arr[kernel_num], "\n"); + +#define OUTPUT_RETVAL_INFO(kernel_num) \ + fprintf(retval_f_arr[kernel_num], "%-8d%-8d%-8d\n", info_arr[i][0], info_arr[i][1], info_arr[i][2]); +#define OUTPUT_RETVAL(kernel_num) OUTPUT_RETVAL_target(kernel_num) #else -#define OUTPUT_RETVAL(out_f) +#define OUTPUT_RETVAL(kernel_num) #endif #define _PERFORMANCE_TEST_NORMAL(kernel_num, func) \ @@ -78,16 +119,24 @@ int64_t time_sw[KERNEL_NUM] = {0}; cur_target_pos += info_arr[i][1]; \ OUTPUT_RETVAL(kernel_num); \ } - +#define _PERFORMANCE_TEST_AVX2_T1(kernel_num, func) \ + cur_query_pos = 0; \ + cur_target_pos = 0; \ + for (i = 0; i < block_line_num; ++i) \ + { \ + cur_query_pos += info_arr[i][0]; \ + cur_target_pos += info_arr[i][1]; \ + OUTPUT_RETVAL(kernel_num); \ + } #ifdef SHOW_PERF #define PERFORMANCE_TEST_NORMAL(kernel_num, func) \ start_time = get_mseconds(); \ _PERFORMANCE_TEST_NORMAL(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time -#define PERFORMANCE_TEST_AVX2(kernel_num, func) \ - start_time = get_mseconds(); \ - _PERFORMANCE_TEST_AVX2(kernel_num, func); \ +#define PERFORMANCE_TEST_AVX2(kernel_num, func) \ + start_time = get_mseconds(); \ + _PERFORMANCE_TEST_AVX2_T1(kernel_num, func); \ time_sw[kernel_num] += get_mseconds() - start_time #else #define PERFORMANCE_TEST_NORMAL(kernel_num, func) _PERFORMANCE_TEST_NORMAL(kernel_num, func) @@ -98,7 +147,7 @@ int64_t time_sw[KERNEL_NUM] = {0}; int read_seq_line(char *read_buf, FILE *f_ptr, char *out_arr) { if (fgets(read_buf, READ_BUF_SIZE, f_ptr) == NULL) - return 0; + return -1; int line_size = strlen(read_buf); assert(line_size < READ_BUF_SIZE); if (read_buf[line_size - 1] == '\n') @@ -152,8 +201,8 @@ int main(int argc, char *argv[]) FILE *query_f = 0, *target_f = 0, *info_f = 0; // 每次读取一定量的数据,然后执行,直到处理完所有数据 - int total_line_num = 0; // 目前处理的总的数据行数 - int block_line_num = 0; // 当前循环包含的数据行数 + int64_t total_line_num = 0; // 目前处理的总的数据行数 + int block_line_num = 0; // 当前循环包含的数据行数 int cur_query_pos, cur_target_pos; int64_t start_time; char read_buf[READ_BUF_SIZE]; // 读文件缓存 @@ -204,8 +253,14 @@ int main(int argc, char *argv[]) while (!feof(target_f) && cur_read_size < BLOCK_BUF_SIZE) { int line_size = read_seq_line(read_buf, target_f, target_arr + cur_read_size); - // fprintf(stderr, "line: %d\n", line_size); - if (line_size == 0) + // for (j = 0; j < line_size; ++j) + //{ + // // fprintf(stderr, "%c", t_2bit2char[(uint8_t)read_buf[j]]); + // fprintf(stderr, "%c", t_2bit2char[(uint8_t)target_arr[j + cur_read_size]]); + // } + // fprintf(stderr, "\n"); + // fprintf(retval_f_arr[1], "%d\n", line_size); + if (line_size == -1) break; cur_read_size += line_size; ++block_line_num; @@ -217,7 +272,13 @@ int main(int argc, char *argv[]) for (i = 0; i < block_line_num; ++i) { int line_size = read_seq_line(read_buf, query_f, query_arr + cur_read_size); - if (line_size == 0) + // int j; + // for (j = cur_read_size; j < cur_read_size + line_size; ++j) + //{ + // fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)query_arr[j]]); + // } + // fprintf(retval_f_arr[0], "\n"); + if (line_size == -1) break; cur_read_size += line_size; } @@ -236,26 +297,43 @@ int main(int argc, char *argv[]) // fprintf(stderr, "%s\n", read_buf); } + cur_read_size = 0; + for (i = 0; i < block_line_num; ++i) + { + for (j = cur_read_size; j < cur_read_size + info_arr[i][1]; ++j) + { + fprintf(retval_f_arr[0], "%c", t_2bit2char[(uint8_t)target_arr[j]]); + } + fprintf(retval_f_arr[0], "\n"); + cur_read_size += info_arr[i][1]; + } + // for (i = 0; i < block_line_num; ++i) + //{ + // fprintf(retval_f_arr[0], "%d\n", info_arr[i][1]); + //} + // 性能测试 // normal sw - PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); + // PERFORMANCE_TEST_NORMAL(0, ksw_extend_normal); // avx2 - PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); + // PERFORMANCE_TEST_AVX2(1, ksw_extend_avx2); // avx2 heuristics - PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); + // PERFORMANCE_TEST_AVX2(2, ksw_extend_avx2_heuristics); // avx2 mem aligned - PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); + // PERFORMANCE_TEST_AVX2(3, ksw_extend_avx2_aligned); // avx2 u8 - PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); + // PERFORMANCE_TEST_AVX2(4, ksw_extend_avx2_u8); // avx2 u8 heuristics - PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); + // PERFORMANCE_TEST_AVX2(5, ksw_extend_avx2_u8_heuristics); // avx2 u8 mem aligned - PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); + // PERFORMANCE_TEST_AVX2(6, ksw_extend_avx2_u8_aligned); } + fprintf(stderr, "%ld\n", total_line_num); + #ifdef SHOW_PERF char *kernel_names[7] = { "normal", diff --git a/run_debug.sh b/run_debug.sh index 17cc634..7e78b2f 100755 --- a/run_debug.sh +++ b/run_debug.sh @@ -1,2 +1,4 @@ #!/bin/bash -/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q.fa /home/zzh/work/sw_perf/input/t.fa /home/zzh/work/sw_perf/input/i.txt \ No newline at end of file +#/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q.fa /home/zzh/work/sw_perf/input/t.fa /home/zzh/work/sw_perf/input/i.txt +/home/zzh/work/sw_perf/sw_perf /home/zzh/work/sw_perf/input/q_d.fa /home/zzh/work/sw_perf/input/t_d.fa /home/zzh/work/sw_perf/input/i_d.txt + diff --git a/utils.h b/utils.h index 79ca7f7..6edb9ec 100644 --- a/utils.h +++ b/utils.h @@ -9,6 +9,10 @@ #ifndef __UTILS_H #define __UTILS_H +extern char t_2bit2char[5]; + +extern unsigned char nst_nt4_table[256]; + // 将碱基字符转成2位编码 void convert_char_to_2bit(char *str);