From 8f3cb79d6fe58870d7107a77f82a8e592e049638 Mon Sep 17 00:00:00 2001 From: zzh Date: Fri, 1 Mar 2024 18:45:09 +0800 Subject: [PATCH] =?UTF-8?q?=E5=9C=A8avx2=E4=B8=AD=E5=8A=A0=E5=85=A5?= =?UTF-8?q?=E4=BA=86=E4=B8=80=E4=BA=9B=E7=89=B9=E6=AE=8A=E5=88=A4=E6=96=AD?= =?UTF-8?q?=EF=BC=8C=E4=BF=9D=E8=AF=81=E4=BA=86=E5=92=8C=E4=B8=B2=E8=A1=8C?= =?UTF-8?q?=E7=9A=84sw=E7=BB=93=E6=9E=9C=E4=BF=9D=E6=8C=81=E5=9F=BA?= =?UTF-8?q?=E6=9C=AC=E4=B8=80=E8=87=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 2 +- debug.sh | 2 + fastmap.c | 10 +- ksw_extend2_avx2.c | 266 +++++++++++++++++++++++++++++++++++++++--- ksw_extend2_avx2_u8.c | 17 ++- out.sam | 108 ----------------- run.sh | 15 +-- utils.h | 2 +- 8 files changed, 279 insertions(+), 143 deletions(-) delete mode 100644 out.sam diff --git a/.vscode/launch.json b/.vscode/launch.json index 2fef6c8..49e1cf7 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -21,7 +21,7 @@ "~/fastq/diff_r1.fq", "~/fastq/diff_r2.fq", "-o", - "/dev/null" + "out.sam" ], "cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间 }, diff --git a/debug.sh b/debug.sh index b906d4e..6ba3248 100755 --- a/debug.sh +++ b/debug.sh @@ -12,6 +12,8 @@ thread=1 #n_r2=~/fastq/tiny_n_r2.fq n_r1=~/fastq/diff_r1.fq n_r2=~/fastq/diff_r2.fq +#n_r1=~/fastq/diff_all_r1.fq +#n_r2=~/fastq/diff_all_r2.fq #n_r1=~/fastq/d_r1.fq #n_r2=~/fastq/d_r2.fq reference=~/reference/human_g1k_v37_decoy.fasta diff --git a/fastmap.c b/fastmap.c index ea0ca56..92e9acd 100644 --- a/fastmap.c +++ b/fastmap.c @@ -64,7 +64,7 @@ int64_t time_ksw_extend2 = 0, int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0; int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0; int64_t g_num_smem2 = 0; -FILE *fp1; +FILE *gfp1, *gfp2, *gfp3; #endif @@ -163,7 +163,9 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0) int main_mem(int argc, char *argv[]) { #ifdef SHOW_PERF - fp1 = fopen("./test_out.txt", "w"); + gfp1 = fopen("./fp1.txt", "w"); + gfp2 = fopen("./fp2.txt", "w"); + gfp3 = fopen("./fp3.txt", "w"); #endif mem_opt_t *opt, opt0; int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0; @@ -443,7 +445,9 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads); fprintf(stderr, "\n"); - fclose(fp1); + fclose(gfp1); + fclose(gfp2); + fclose(gfp3); #endif return 0; diff --git a/ksw_extend2_avx2.c b/ksw_extend2_avx2.c index dc4a8f1..be4b068 100644 --- a/ksw_extend2_avx2.c +++ b/ksw_extend2_avx2.c @@ -6,6 +6,11 @@ #include #include +extern FILE *gfp1, *gfp2, *gfp3; +//define DEBUG_SW_EXTEND 1 + +#define NO_VAL -1 + #ifdef __GNUC__ #define LIKELY(x) __builtin_expect((x),1) #define UNLIKELY(x) __builtin_expect((x),0) @@ -25,9 +30,8 @@ typedef struct { size_t m; uint8_t *addr; } buf_t; extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off, buf_t *buf); -int ksw_extend2_origin(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, - int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); - +int ksw_extend2_origin(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del, + int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off); static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { {0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, @@ -154,6 +158,7 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { int pos = SIMD_WIDTH - 1 - (( __builtin_clz(mask)) >> 1); \ mj = j - 1 + pos; \ mi = i - 1 - pos; \ + /*if (m >= max) fprintf(stderr, "%d %d %d %d %d %d %d\n", iend, beg, mi, mj, mask, pos, m);*/ \ } \ } \ } @@ -192,16 +197,15 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 buf_t *buf) // 之前已经开辟过的缓存 { //return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off); -// fprintf(stderr, "qlen: %d, tlen: %d\n", qlen, tlen); - if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf); - + //if (qlen * a + h0 < 255) + //return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf); int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M int16_t *seq, *ref; uint8_t *mem; int16_t *qtmem, *vmem; int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH; - int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; + int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; int Dloop = tlen + qlen; // 循环跳出条件 int span, beg1, end1; // 边界条件计算 int col_size = qlen + 2 + SIMD_WIDTH; @@ -269,6 +273,34 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 int m_last=0; int iend; + int m3 = 0, m2 = 0, m1 = 0; + int midx = 1, icheck = 0, checkspecial = 1; + + int print_flag = 0; //(qlen == 64 && tlen == 123); +#ifdef DEBUG_SW_EXTEND + int dii, djj; + int16_t ins[tlen + 1][qlen + 2]; + int16_t del[tlen + 1][qlen + 2]; + int16_t score[tlen + 1][qlen + 2]; + for (dii = 0; dii <= tlen; ++dii) + { + for (djj = 0; djj <= qlen; ++djj) + { + ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL; + } + } + for (dii = 1; dii <= tlen; ++dii) + { + del[dii][0] = MAX(0, h0 - o_del - e_del * dii); + score[dii][0] = del[dii][0]; + } + for (djj = 1; djj <= qlen; ++djj) + { + ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj); + score[0][djj] = ins[0][djj]; + } + ins[0][0] = del[0][0] = score[0][0] = h0; +#endif for (D = 1; LIKELY(D < Dloop); ++D) { // 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况 @@ -287,18 +319,40 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 iend = D - (beg - 1); // ref开始计算的位置,倒序 span = end - beg; - iStart = iend - span - 1; // 0开始的ref索引位置 + ibeg = iend - span - 1; // 0开始的ref索引位置 // 每一轮需要记录的数据 int m = 0, mj = -1, mi = -1; max_vec = zero_vec; - + if (print_flag) + { + //fprintf(stderr, "D: %d, iend: %d, jbeg: %d\n", D, iend, beg); + } // 要处理边界 // 左边界 处理f (insert) - if (iStart == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } + if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } // 上边界 - if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } - else { hA1[beg-1] = 0; eA1[beg-1] = 0; } + if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } + else { + if (D & 1) { + hA1[beg - 1] = 0; + //eA1[beg - 1] = 0; + //fA1[beg - 1] = 0; + hA2[beg - 1] = 0; + //eA2[beg - 1] = 0; + //fA2[beg - 1] = 0; + //mA2[beg - 1] = 0; + //hA0[beg - 1] = 0; + } + else + { + //hA0[beg - 1] = 0; + //hA1[beg - 1] = 0; + //eA1[beg - 1] = 0; + //fA1[beg - 1] = 0; + //mA1[beg - 1] = 0; + } + } for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { // 取数据 @@ -326,17 +380,64 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 SIMD_FIND_MAX; + if (hA1[0] < 4 && checkspecial) { + if (hA1[0] == 3) { + icheck = iend + 1; + m3 = MAX(m3, hA2[midx]); + midx += 1; + } else if (midx == 2) { + m3 = MAX(m3, hA2[midx]); + m2 = MAX(m2, hA2[midx - 1]); + midx += 1; + } else { + m3 = MAX(m3, hA2[midx]); + m2 = MAX(m2, hA2[midx - 1]); + m1 = MAX(m1, hA2[midx - 2]); + midx += 1; + } + if (ibeg > icheck) { + if (!m1 || !m2 || !m3) + break; + else + checkspecial = 0; + } + + if (print_flag) { + //fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA2[midx + 1], hA2[midx + 2], hA2[midx + 3], midx); + if (midx > 2) fprintf(stderr, "%d, %d, %d\n", hA2[midx-1], hA2[midx-2], hA2[midx-3]); + fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, hA1: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA1[0], m1, m2, m3, midx); + } + } + +#ifdef DEBUG_SW_EXTEND + for (djj = beg; djj <= end; ++djj) + { + dii = D - djj + 1; + ins[dii][djj] = fA2[djj]; + del[dii][djj] = eA2[djj]; + score[dii][djj] = hA2[djj]; + } + if (print_flag) + { + //fprintf(stderr, "score: %d %d %d\n", hA2[beg], hA2[beg+1], hA2[beg+2]); + } +#endif + // 注意最后跳出循环j的值 j = end + 1; if (j == qlen + 1) { - max_ie = gscore > hA2[qlen] ? max_ie : iStart; + max_ie = gscore > hA2[qlen] ? max_ie : ibeg; gscore = gscore > hA2[qlen] ? gscore : hA2[qlen]; } if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点 + //if (m == 0 && m_last < 2) break; if (m > max) { max = m, max_i = mi, max_j = mj; - max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi); + max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi); + } else if (m == max && max_i >= mi && mj > max_j) { + max_i = mi, max_j = mj; + max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi); } else if (zdrop > 0) { if (mi - max_i > mj - max_j) { @@ -357,7 +458,49 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 SWAP_DATA_POINTER; } -// free(mem); +#ifdef DEBUG_SW_EXTEND + fprintf(gfp1, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); + fprintf(gfp2, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); + fprintf(gfp3, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); +#endif +#ifdef DEBUG_SW_EXTEND + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + for (djj = 0; djj < qlen; ++djj) { + fprintf(gfp1, "%-4c", "ACGTN"[query[djj]]); + fprintf(gfp2, "%-4c", "ACGTN"[query[djj]]); + fprintf(gfp3, "%-4c", "ACGTN"[query[djj]]); + } + fprintf(gfp1, "\n"); + fprintf(gfp2, "\n"); + fprintf(gfp3, "\n"); + for (dii = 0; dii <= tlen; ++dii) + { + if (dii > 0) { + fprintf(gfp1, "%-4c", "ACGTN"[target[dii - 1]]); + fprintf(gfp2, "%-4c", "ACGTN"[target[dii - 1]]); + fprintf(gfp3, "%-4c", "ACGTN"[target[dii - 1]]); + } else { + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + } + for (djj = 0; djj <= qlen; ++djj) + { + fprintf(gfp1, "%-4d", score[dii][djj]); + fprintf(gfp2, "%-4d", ins[dii][djj]); + fprintf(gfp3, "%-4d", del[dii][djj]); + } + fprintf(gfp1, "\n"); + fprintf(gfp2, "\n"); + fprintf(gfp3, "\n"); + } +#endif + // free(mem); if (_qle) *_qle = max_j + 1; if (_tle) *_tle = max_i + 1; if (_gtle) *_gtle = max_ie + 1; @@ -427,12 +570,39 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.); max_del = max_del > 1? max_del : 1; w = w < max_del? w : max_del; // TODO: is this necessary? -// printf("%d\n", w); + //fprintf(stderr, "%d\n", w); // DP loop max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1; max_off = 0; beg = 0, end = qlen; + int print_flag = 0; //(qlen == 116 && tlen == 241); + //fprintf(stderr, "%d %d %d\n", print_flag, qlen, tlen); +#ifdef DEBUG_SW_EXTEND + int dii, djj; + int16_t ins[tlen + 1][qlen + 2]; + int16_t del[tlen + 1][qlen + 2]; + int16_t score[tlen + 1][qlen + 2]; + for (dii = 0; dii <= tlen; ++dii) + { + for (djj = 0; djj <= qlen; ++djj) + { + ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL; + } + } + for (dii = 1; dii <= tlen; ++dii) + { + del[dii][0] = MAX(0, h0 - o_del - e_del * dii); + score[dii][0] = del[dii][0]; + } + for (djj = 1; djj <= qlen; ++djj) + { + ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj); + score[0][djj] = ins[0][djj]; + } + ins[0][0] = del[0][0] = score[0][0] = h0; +#endif + for (i = 0; LIKELY(i < tlen); ++i) { int t, f = 0, h1, m = 0, mj = -1; int8_t *q = &qp[ref[i] * qlen]; @@ -445,7 +615,11 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 h1 = h0 - (o_del + e_del * (i + 1)); if (h1 < 0) h1 = 0; } else h1 = 0; + //m = h1; for (j = beg; LIKELY(j < end); ++j) { +#ifdef DEBUG_SW_EXTEND + ins[i+1][j+1] = f; +#endif // At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1) // Similar to SSE2-SW, cells are computed in the following order: // H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)} @@ -454,16 +628,22 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 eh_t *p = &eh[j]; int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j) p->h = h1; // set H(i,j-1) for the next row - M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M" + M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M",保证分值不小于0,sw和nw的区别 h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0 h = h > f? h : f; h1 = h; // save H(i,j) to h1 for the next column +#ifdef DEBUG_SW_EXTEND + score[i+1][j+1] = h; +#endif mj = m > h? mj : j; // record the position where max score is achieved m = m > h? m : h; // m is stored at eh[mj+1] t = M - oe_del; t = t > 0? t : 0; e -= e_del; e = e > t? e : t; // computed E(i+1,j) +#ifdef DEBUG_SW_EXTEND + del[i+1][j+1] = e; +#endif p->e = e; // save E(i+1,j) for the next row t = M - oe_ins; t = t > 0? t : 0; @@ -479,6 +659,7 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 if (m > max) { max = m, max_i = i, max_j = mj; max_off = max_off > abs(mj - i)? max_off : abs(mj - i); + //fprintf(stderr, "%d %d %d %d\n", i, mj, max_off, m); } else if (zdrop > 0) { if (i - max_i > mj - max_j) { if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break; @@ -487,12 +668,61 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 } } // update beg and end for the next round - for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); + for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); // 这里为什么不考虑f(insert score) beg = j; for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j); end = j + 2 < qlen? j + 2 : qlen; //beg = 0; end = qlen; // uncomment this line for debugging + if (print_flag) { + fprintf(stderr, "beg: %d; end: %d\n", beg, end); + } } +#ifdef DEBUG_SW_EXTEND + fprintf(gfp1, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); + fprintf(gfp2, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); + fprintf(gfp3, "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max); +#endif +#ifdef DEBUG_SW_EXTEND + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + for (djj = 0; djj < qlen; ++djj) + { + fprintf(gfp1, "%-4c", "ACGTN"[query[djj]]); + fprintf(gfp2, "%-4c", "ACGTN"[query[djj]]); + fprintf(gfp3, "%-4c", "ACGTN"[query[djj]]); + } + fprintf(gfp1, "\n"); + fprintf(gfp2, "\n"); + fprintf(gfp3, "\n"); + for (dii = 0; dii <= tlen; ++dii) + { + if (dii > 0) + { + fprintf(gfp1, "%-4c", "ACGTN"[target[dii - 1]]); + fprintf(gfp2, "%-4c", "ACGTN"[target[dii - 1]]); + fprintf(gfp3, "%-4c", "ACGTN"[target[dii - 1]]); + } + else + { + fprintf(gfp1, "%-4d", -1); + fprintf(gfp2, "%-4d", -1); + fprintf(gfp3, "%-4d", -1); + } + for (djj = 0; djj <= qlen; ++djj) + { + fprintf(gfp1, "%-4d", score[dii][djj]); + fprintf(gfp2, "%-4d", ins[dii][djj]); + fprintf(gfp3, "%-4d", del[dii][djj]); + } + fprintf(gfp1, "\n"); + fprintf(gfp2, "\n"); + fprintf(gfp3, "\n"); + } +#endif free(eh); free(qp); free(qmem); if (_qle) *_qle = max_j + 1; diff --git a/ksw_extend2_avx2_u8.c b/ksw_extend2_avx2_u8.c index 7c0d49a..21c40a6 100644 --- a/ksw_extend2_avx2_u8.c +++ b/ksw_extend2_avx2_u8.c @@ -213,7 +213,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 uint8_t *seq, *ref; uint8_t *mem, *qtmem, *vmem; int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH; - int i, iStart, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; + int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off; int Dloop = tlen + qlen; // 循环跳出条件 int span, beg1, end1; // 边界条件计算 int col_size = qlen + 2 + SIMD_WIDTH; @@ -299,7 +299,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 iend = D - (beg - 1); // ref开始计算的位置,倒序 span = end - beg; - iStart = iend - span - 1; // 0开始的ref索引位置 + ibeg = iend - span - 1; // 0开始的ref索引位置 // 每一轮需要记录的数据 int m = 0, mj = -1, mi = -1; @@ -307,10 +307,13 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 // 要处理边界 // 左边界 处理f (insert) - if (iStart == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } + if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } // 上边界 if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } - else { hA1[beg-1] = 0; eA1[beg-1] = 0; } + else if (D & 1) { + hA1[beg - 1] = 0; + eA1[beg - 1] = 0; + fA1[beg - 1] = 0; } for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { // 取数据 @@ -342,7 +345,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 j = end + 1; if (j == qlen + 1) { - max_ie = gscore > hA2[qlen] ? max_ie : iStart; + max_ie = gscore > hA2[qlen] ? max_ie : ibeg; gscore = gscore > hA2[qlen] ? gscore : hA2[qlen]; } if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点 @@ -350,6 +353,10 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 max = m, max_i = mi, max_j = mj; max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi); } + else if (m == max && max_i >= mi && mj > max_j) { + max_i = mi, max_j = mj; + max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi); + } else if (zdrop > 0) { if (mi - max_i > mj - max_j) { if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) break; diff --git a/out.sam b/out.sam deleted file mode 100644 index 7a61bcc..0000000 --- a/out.sam +++ /dev/null @@ -1,108 +0,0 @@ -@SQ SN:1 LN:249250621 -@SQ SN:2 LN:243199373 -@SQ SN:3 LN:198022430 -@SQ SN:4 LN:191154276 -@SQ SN:5 LN:180915260 -@SQ SN:6 LN:171115067 -@SQ SN:7 LN:159138663 -@SQ SN:8 LN:146364022 -@SQ SN:9 LN:141213431 -@SQ SN:10 LN:135534747 -@SQ SN:11 LN:135006516 -@SQ SN:12 LN:133851895 -@SQ SN:13 LN:115169878 -@SQ SN:14 LN:107349540 -@SQ SN:15 LN:102531392 -@SQ SN:16 LN:90354753 -@SQ SN:17 LN:81195210 -@SQ SN:18 LN:78077248 -@SQ SN:19 LN:59128983 -@SQ SN:20 LN:63025520 -@SQ SN:21 LN:48129895 -@SQ SN:22 LN:51304566 -@SQ SN:X LN:155270560 -@SQ SN:Y LN:59373566 -@SQ SN:MT LN:16569 -@SQ SN:GL000207.1 LN:4262 -@SQ SN:GL000226.1 LN:15008 -@SQ SN:GL000229.1 LN:19913 -@SQ SN:GL000231.1 LN:27386 -@SQ SN:GL000210.1 LN:27682 -@SQ SN:GL000239.1 LN:33824 -@SQ SN:GL000235.1 LN:34474 -@SQ SN:GL000201.1 LN:36148 -@SQ SN:GL000247.1 LN:36422 -@SQ SN:GL000245.1 LN:36651 -@SQ SN:GL000197.1 LN:37175 -@SQ SN:GL000203.1 LN:37498 -@SQ SN:GL000246.1 LN:38154 -@SQ SN:GL000249.1 LN:38502 -@SQ SN:GL000196.1 LN:38914 -@SQ SN:GL000248.1 LN:39786 -@SQ SN:GL000244.1 LN:39929 -@SQ SN:GL000238.1 LN:39939 -@SQ SN:GL000202.1 LN:40103 -@SQ SN:GL000234.1 LN:40531 -@SQ SN:GL000232.1 LN:40652 -@SQ SN:GL000206.1 LN:41001 -@SQ SN:GL000240.1 LN:41933 -@SQ SN:GL000236.1 LN:41934 -@SQ SN:GL000241.1 LN:42152 -@SQ SN:GL000243.1 LN:43341 -@SQ SN:GL000242.1 LN:43523 -@SQ SN:GL000230.1 LN:43691 -@SQ SN:GL000237.1 LN:45867 -@SQ SN:GL000233.1 LN:45941 -@SQ SN:GL000204.1 LN:81310 -@SQ SN:GL000198.1 LN:90085 -@SQ SN:GL000208.1 LN:92689 -@SQ SN:GL000191.1 LN:106433 -@SQ SN:GL000227.1 LN:128374 -@SQ SN:GL000228.1 LN:129120 -@SQ SN:GL000214.1 LN:137718 -@SQ SN:GL000221.1 LN:155397 -@SQ SN:GL000209.1 LN:159169 -@SQ SN:GL000218.1 LN:161147 -@SQ SN:GL000220.1 LN:161802 -@SQ SN:GL000213.1 LN:164239 -@SQ SN:GL000211.1 LN:166566 -@SQ SN:GL000199.1 LN:169874 -@SQ SN:GL000217.1 LN:172149 -@SQ SN:GL000216.1 LN:172294 -@SQ SN:GL000215.1 LN:172545 -@SQ SN:GL000205.1 LN:174588 -@SQ SN:GL000219.1 LN:179198 -@SQ SN:GL000224.1 LN:179693 -@SQ SN:GL000223.1 LN:180455 -@SQ SN:GL000195.1 LN:182896 -@SQ SN:GL000212.1 LN:186858 -@SQ SN:GL000222.1 LN:186861 -@SQ SN:GL000200.1 LN:187035 -@SQ SN:GL000193.1 LN:189789 -@SQ SN:GL000194.1 LN:191469 -@SQ SN:GL000225.1 LN:211173 -@SQ SN:GL000192.1 LN:547496 -@SQ SN:NC_007605 LN:171823 -@SQ SN:hs37d5 LN:35477943 -@HD VN:1.5 SO:unsorted GO:query -@RG ID:normal SM:normal PL:illumina LB:normal PG:bwa -@PG ID:bwa PN:bwa VN:0.7.17-r1198-dirty CL:./bwa mem -t 1 -M -R @RG\tID:normal\tSM:normal\tPL:illumina\tLB:normal\tPG:bwa /home/zzh/reference/human_g1k_v37_decoy.fasta /home/zzh/fastq/diff_r1.fq /home/zzh/fastq/diff_r2.fq -o ./out.sam -A00289:748:HLCH3DSX3:4:1102:24749:34929 97 16 75642166 60 150M 12 66451372 0 AAAGCCATTGGCAGTAACATCCAAAGGCTGCTCAGGAACTGCACTCCAGCTGATGGCTATGAAAAGATAAGATTCTAGGGTTAAAACTGCTCAATTTCTTGTGAAAGTCAGAAGTATTTTAATAAAAATATTTCTCTGACTCTACAGACC FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF: NM:i:0 MD:Z:150 MC:Z:17S91M42S AS:i:150 XS:i:0 RG:Z:normal -A00289:748:HLCH3DSX3:4:1102:24749:34929 145 12 66451372 0 17S91M42S 16 75642166 0 TTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATAAATAATTCTTTGTGAAACCCCT F,F,FFF,:FFF,,F,F,,F,F:,FFF:F:FFF:FFFFFFF:F,F,F,FFFFFF::,,FFFFFFF,:F,:,F,::,,F:FF:,FF:F::F::FFFFFFFFFF,F:F,FFFF,FFFFF,FF,F,,F,F,,,,,,:,,,,,,,,,,,,,,,F NM:i:0 MD:Z:91 MC:Z:150M AS:i:91 XS:i:90 RG:Z:normal SA:Z:12,49106363,+,117S33M,0,0; XA:Z:12,-66451373,35S90M25S,0;12,-66451373,28S90M32S,0;6,-160521757,23S83M44S,0;15,-77910871,23S79M48S,0; -A00289:748:HLCH3DSX3:4:1102:24749:34929 385 12 49106363 0 117H33M 16 75642166 0 AAAAAAAAAAAAAAATAAAAAAAAAAAAAAAAA FFF:F:FFF,:F,F,,F,F,,FFF:,FFF,F,F NM:i:0 MD:Z:33 MC:Z:150M AS:i:33 XS:i:32 RG:Z:normal SA:Z:12,66451372,-,17S91M42S,0,0; XA:Z:4,-19079503,32M118S,0;15,-55218248,32M118S,0;X,-149551159,32M118S,0; -A00289:748:HLCH3DSX3:4:1104:6234:8656 113 12 66451373 8 12S91M47S 22 18873826 0 TTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTTTGTTTTTTTTTTTTTTGGGGGGGGGGGGGGGTGAGGTGGTCTGTG ,,:FFF:,:,::,,,,,,FF,FFF,,FF,F,FFF:,,:FFFFFF:FFF,:,:FF:F:::::FFF::,::F:FFFFFF:,FFFF,,,:FFFFFFFF:,,::,,,,,:,,FFF:,,,FF:F,:,FFFF:F::,,FF:,:FFFFFF:FFFFFF NM:i:1 MD:Z:13T77 MC:Z:150M AS:i:86 XS:i:81 RG:Z:normal SA:Z:3,121668661,-,89S32M29S,0,0; XA:Z:12,-66451372,25S91M34S,2;6,-160521757,19S84M47S,1;15,-77910874,26S77M47S,0;7,-107410599,25M1I3M15D74M47S,16; -A00289:748:HLCH3DSX3:4:1104:6234:8656 369 3 121668661 0 89H32M29H 22 18873826 0 TTTTTTTTTTTTTGTTTGTTTTTTTTTTTTTT FFFFFF:,,::,,,,,:,,FFF:,,,FF:F,: NM:i:0 MD:Z:32 MC:Z:150M AS:i:32 XS:i:32 RG:Z:normal SA:Z:12,66451373,-,12S91M47S,8,1; -A00289:748:HLCH3DSX3:4:1104:6234:8656 177 22 18873826 22 150M 12 66451373 0 CTATACCCCACATCCTCATTCCCACCAGATGTCTTGGATCATGGAGGCTCTCCAGACAAAAGCCAGCAGTTAAGCTCCAGATTTCCTATAGAATCCTTTTCTAACAACCAGTGAGTGATTCCAGAATACGTACCATTGAATGTGCTCCCT FFF:FFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:12S91M47S AS:i:150 XS:i:140 RG:Z:normal -A00289:748:HLCH3DSX3:4:1104:1741:18129 97 11 113220686 60 150M 12 66451373 0 TATTTCACCCGGATGCTGACTCCTTTTTTGGTTTCCTTGCAAGGTTACCCTGTTGGGATGTTGTTAGTTGGAGGACAGCTACCTCTGAGGTTATTATGGTTTTTTGCAGATTATTGGAAGCGCTGGTGTCATTTCTTGATTTCTCGGATA FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFF,FFFFFFFFFFFFFFFFFF:FFFFFFF:FFFF NM:i:0 MD:Z:150 MC:Z:90M60S AS:i:150 XS:i:19 RG:Z:normal -A00289:748:HLCH3DSX3:4:1104:1741:18129 145 12 66451373 7 90M60S 11 113220686 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTCCCAC FF:FFFF,F,,FFFFFFFF:FFFF:,F,,:F:F::::,F::::FFF::F::,,:FFF,:::FFFFFFFFFFF:FFFFFFFF,FFFF:FFF,:,:,,::FFFFFFFFFFF,FF:FFF:F:F:F:,F,,::F,,F,,FFFFF:F:,,,,,,, NM:i:1 MD:Z:81T8 MC:Z:150M AS:i:85 XS:i:81 RG:Z:normal SA:Z:8,112168232,-,79S59M12S,0,1; XA:Z:6,-160521757,81M69S,0;4,-82515382,13S69M68S,0; -A00289:748:HLCH3DSX3:4:1104:1741:18129 401 8 112168232 0 79H59M12H 11 113220686 0 TTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTT FF,FFFF:FFF,:,:,,::FFFFFFFFFFF,FF:FFF:F:F:F:,F,,::F,,F,,FFF NM:i:1 MD:Z:46T12 MC:Z:150M AS:i:54 XS:i:53 RG:Z:normal SA:Z:12,66451373,-,90M60S,7,1; -A00289:748:HLCH3DSX3:4:1105:27281:29700 113 X 117117442 60 150M 12 66451372 0 CCCTGAAGTTGATTTTTTGAATATATCCTTGAAATCGGCTTGTTTTAATGACAATTGCCTCCGTCTCCTGAAAGTGGAGTGCAAAAAGAAACCTGGTTAATTGTAGTTTTATTATATTTAGTTCAAATTTTTCAAAGCATACCCTGACAG FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:12S89M49S AS:i:150 XS:i:0 RG:Z:normal -A00289:748:HLCH3DSX3:4:1105:27281:29700 177 12 66451372 0 12S89M49S X 117117442 0 TTTTTTAAAAATATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTATTTATTTTTTTATTTTTTTATTATTTTTTTTTTTTTTTTTTTTTTTTTGGTGCCGCCCCC ,,,,:,,F:F,,,,:FF,F:FF,::F:F:F,,F::FFF,FFFFFFFFF,FFFFFF::FFF:F:FFFFFFF,FFFFFFF::FFFF:,:FF:,,F,:,FFFFF,:,,:F,,,,:,FFFFF:F,,FFF:FFFFF:,F,F,,,:,,,,,:FFFF NM:i:3 MD:Z:73T3T3T7 MC:Z:150M AS:i:74 XS:i:72 RG:Z:normal SA:Z:15,85942734,+,20S48M82S,0,1; -A00289:748:HLCH3DSX3:4:1105:27281:29700 417 15 85942734 0 20H48M82H X 117117442 0 AAAAAAAAAAAAAAAAATAATAAAAAAATAAAAAAATAAATAAATAAA FFFF:FFF,,F:FFFFF,:,,,,F:,,:,FFFFF,:,F,,:FF:,:FF NM:i:1 MD:Z:17A30 MC:Z:150M AS:i:43 XS:i:36 RG:Z:normal SA:Z:12,66451372,-,12S89M49S,0,3; XA:Z:11,-118049564,69S35M7D34M12S,11;2,+204499190,12S31M1D21M86S,3; -A00289:748:HLCH3DSX3:4:1106:2880:19288 97 4 169630630 0 29M1I4M1I68M9D22M25S 12 66451373 0 CAGCCTGGGTGACAGAGTGAGACTCCAGCTAAGGCAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAAGGAATGAAGGAAGGAAGGAGGGAGGGAGGGAGGGAGGGAGAGATCGGAAGAGCACACGTCTGAAC FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFF:FFF:FFF:FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFF NM:i:14 MD:Z:18C8T59G13^AAGGAAGGA22 MC:Z:36S91M23S AS:i:73 XS:i:72 RG:Z:normal -A00289:748:HLCH3DSX3:4:1106:2880:19288 145 12 66451373 9 36S91M23S 4 169630630 0 TATATACTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTGGGTGGGTGGGTGGGTTGGCAG :,,,,,,,:FF:FFFF:,:FF:,F,F,FFF,,,FFF:FFFFFFFFF,:,,F:F,FFFF,:FFFFFFFFFFFFFFFFFFFF:FF:FFFF:FFFFFFF:FFF:FFFFFFF:FF,FFF:,:::,:F::,,:,,:,:,,,,,:,::,,,:F,FF NM:i:1 MD:Z:10T80 MC:Z:29M1I4M1I68M9D22M25S AS:i:86 XS:i:80 RG:Z:normal SA:Z:15,55218228,-,9S52M89S,0,0; XA:Z:15,-77910871,47S80M23S,0;6,-160521761,47S80M23S,0;6,-160521757,47S79M24S,0;7,-107410599,25S28M15D74M23S,16; -A00289:748:HLCH3DSX3:4:1106:2880:19288 401 15 55218228 0 9H52M89H 4 169630630 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTATTTTTTTTTTTTTT FF:FFFF:,:FF:,F,F,FFF,,,FFF:FFFFFFFFF,:,,F:F,FFFF,:F NM:i:0 MD:Z:52 MC:Z:29M1I4M1I68M9D22M25H AS:i:52 XS:i:50 RG:Z:normal SA:Z:12,66451373,-,36S91M23S,9,1; -A00289:748:HLCH3DSX3:4:1107:2365:20462 97 22 18737670 0 150M 12 66451372 0 GAGGACACCATACAAGCCGTCGCTAACAAGGACACTGTACACAACATCGCTAATGACGGCACCGTACAAGACATCACCAATGAGGGCGCTTTATACGACATTGCTAATGATACCGACAAGGCACGCTAACGTGGACGCTGTACACGACAT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:FFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:21S90M39S AS:i:150 XS:i:150 RG:Z:normal -A00289:748:HLCH3DSX3:4:1107:2365:20462 145 12 66451372 13 21S90M39S 22 18737670 0 TTTTATATATTTATATTTATTATTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGTTTGTTTTTTGTGGTGGGGGGGGGGTTTGGCATAGCAT F:F,,F,F,F:FFF,,,:,F:,:,,:,,,,:,F:,,FFFFFFFFFFFFF,F:FFFFFFFFF:FFFF,,FFFFF:FFF::FFFFF,F:F,,F:F,FFFFFFFF,:F:F::::,,:F,F,,F,:,,,,,::,,,,:,,:,,,,,,F,F::FF NM:i:0 MD:Z:90 MC:Z:150M AS:i:90 XS:i:84 RG:Z:normal XA:Z:6,-160521757,28S84M38S,0;15,-77910871,32S80M38S,0;7,-107410642,38S74M38S,0; -A00289:748:HLCH3DSX3:4:1107:26377:28385 113 GL000220.1 122227 60 150M 6 160521757 0 TGCTCTGTTGCTCACGCTGGTCTCAAACTCCTGGCCTTGACGCTTCTCCCGTCACATCCGCCGTCTGGTTGTTGAAATGAGCATCTCTCGTAAAATGGAAAAGATGAAAGAAATAAACACGAAGACGGAAAGCACGGTGTGAACGTTTCT FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF NM:i:0 MD:Z:150 MC:Z:85M65S AS:i:150 XS:i:113 RG:Z:normal -A00289:748:HLCH3DSX3:4:1107:26377:28385 177 6 160521757 0 85M65S GL000220.1 122227 0 TTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTGGTGTTTTTTTTTTGTTGGGGGTTGGGTGTTGGGGGGCGGGGGGGGGGGCGCCCCCCCCCAAGGAGA F:FF,F,,F,F,FFFFFF,:FF,:FF:F,:FF,FFFF:FFF:FF,FFFFFFFFFFFFFFFFFFFFFFFF:FF,F,F:F,:FF,,,FFFF,F,:,,,,,:::,,:,,,,::,,,,,,,,,F,,FF,:F:,,F,,,,,FF:FF:F,FFFF:: NM:i:0 MD:Z:85 MC:Z:150M AS:i:85 XS:i:84 RG:Z:normal XA:Z:12,-66451380,84M66S,0;12,-66451373,83M67S,0;15,-77910871,4S80M66S,0; diff --git a/run.sh b/run.sh index b4c21f7..2bca222 100755 --- a/run.sh +++ b/run.sh @@ -1,15 +1,15 @@ -thread=1 +thread=32 #n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq #n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq #n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq #n_r2=~/data/fastq/ZY2105177532213000/sn_r2.fq #reference=~/data/reference/human_g1k_v37_decoy.fasta -#n_r1=~/fastq/sn_r1.fq -#n_r2=~/fastq/sn_r2.fq +n_r1=~/fastq/sn_r1.fq +n_r2=~/fastq/sn_r2.fq #n_r1=~/fastq/ssn_r1.fq #n_r2=~/fastq/ssn_r2.fq -n_r1=~/fastq/ERR194147_1_120w.fastq -n_r2=~/fastq/ERR194147_2_120w.fastq +#n_r1=~/fastq/ERR194147_1_120w.fastq +#n_r2=~/fastq/ERR194147_2_120w.fastq #n_r1=~/fastq/tiny_n_r1.fq #n_r2=~/fastq/tiny_n_r2.fq #n_r1=~/fastq/diff_r1.fq @@ -18,9 +18,10 @@ n_r2=~/fastq/ERR194147_2_120w.fastq #n_r2=~/fastq/d_r2.fq reference=~/reference/human_g1k_v37_decoy.fasta #out=./all.sam -#out=./ssn.sam +out=./sn.sam #out=./out.sam -out=/dev/null +#out=/dev/null +#out=na12878.sam #time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ # /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ # /home/zzh/data/fastq/nm1.fq \ diff --git a/utils.h b/utils.h index 52107cc..764cc45 100644 --- a/utils.h +++ b/utils.h @@ -53,7 +53,7 @@ extern int64_t time_ksw_extend2, extern int64_t dn, n16, n17, n18, n19, nall, num_sa; extern int64_t s1n, s2n, s3n, s1l, s2l, s3l; extern int64_t g_num_smem2; -extern FILE *fp1; +extern FILE *gfp1, *gfp2, *gfp3; #endif