From d728ddbe2cd15c61ae2c3d087bf08edc3b5f0cde Mon Sep 17 00:00:00 2001 From: zzh Date: Sat, 23 Mar 2024 03:23:05 +0800 Subject: [PATCH] =?UTF-8?q?=E8=A7=A3=E5=86=B3=E4=BA=86bsw=E4=B8=8A?= =?UTF-8?q?=E8=BE=B9=E7=95=8C=E5=90=8C=E6=A0=B7=E7=9A=84=E9=97=AE=E9=A2=98?= =?UTF-8?q?=EF=BC=8C=E8=A7=A3=E5=86=B3=E4=BA=86seed3=E7=9A=84=E4=B8=80?= =?UTF-8?q?=E4=B8=AAbug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 5 +++-- bwamem.c | 43 ++++++++++++++++++++++++++++++++----------- debug.sh | 6 ++++-- fastmap.c | 4 +++- fmt_idx.c | 41 +++++++++++++++++++++++++++++++++-------- fmt_idx.h | 2 +- ksw_extend2_avx2.c | 37 +++++++++++++++++++++++++++++++------ ksw_extend2_avx2_u8.c | 6 +++--- run.sh | 20 +++++++++++++++----- seed1.py | 21 +++++++++++++++++++++ seed2.py | 22 ++++++++++++++++++++++ sw1.py | 23 +++++++++++++++++++++++ utils.h | 1 + 13 files changed, 192 insertions(+), 39 deletions(-) create mode 100755 seed1.py create mode 100755 seed2.py create mode 100755 sw1.py diff --git a/Makefile b/Makefile index b371b00..0b7ae7c 100644 --- a/Makefile +++ b/Makefile @@ -3,9 +3,10 @@ CC= gcc # CFLAGS= -g -Wall -Wno-unused-function -O2 CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS -SHOW_PERF= -DSHOW_PERF +SHOW_PERF= #-DSHOW_PERF +FILTER_FULL_MATCH=#-DFILTER_FULL_MATCH AR= ar -DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) -DUSE_AVX2 -DKSW_EQUAL #-DUSE_MT_READ #-DFILTER_FULL_MATCH +DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) $(FILTER_FULL_MATCH) -DUSE_AVX2 -DKSW_EQUAL -DUSE_MT_READ LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ diff --git a/bwamem.c b/bwamem.c index b287346..7143efb 100644 --- a/bwamem.c +++ b/bwamem.c @@ -133,7 +133,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, while (x < len) { if (seq[x] < 4) { start_flag = 0; - x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]); + x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); // seed length @@ -163,7 +163,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, bwtintv_t *p = &a->mem.a[k]; int start = p->info>>32, end = (int32_t)p->info; if (end - start < split_len || p->x[2] > opt->split_width) continue; - fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]); + fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); @@ -1384,10 +1384,13 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in int64_t tmp_time = realtime_msec(); #endif // 1. first pass: find all SMEMs + //fprintf(gfp1, "read\n"); while (x < len) { if (seq[x] < 4) { start_flag = 0; - x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]); + //int last_x = x; + x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]); + //fprintf(gfp1, "%d\n", x - last_x); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); // seed length @@ -1405,12 +1408,11 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in #ifdef SHOW_PERF a->time_seed_1 += realtime_msec() - tmp_time; tmp_time = realtime_msec(); -#endif - s2n += 1; if (max_seed_len == len - start_N_num) s1n += 1; - +#endif + #ifdef FILTER_FULL_MATCH if (max_seed_len == len - start_N_num) goto collect_intv_end; #endif @@ -1421,14 +1423,17 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in bwtintv_t *p = &smem->mem.a[k]; int start = p->info>>32, end = (int32_t)p->info; if (end - start < split_len || p->x[2] > opt->split_width) continue; - fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]); +// fprintf(gfp1, "start\n"); + fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]); for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info >> 32); +// fprintf(gfp1, "%d\n", slen); if (slen >= opt->min_seed_len) { kv_push(bwtintv_t, smem->mem, a->mem1.a[i]); } } +// fprintf(gfp1, "end\n"); } #ifdef SHOW_PERF a->time_seed_2 += realtime_msec() - tmp_time; @@ -1442,6 +1447,7 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in bwtintv_t m; x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); if (m.x[2] > 0) { + // fprintf(stderr, "3: %ld %ld %ld\n", m.info >> 32, m.info & ((1L << 32) - 1), m.x[2]); kv_push(bwtintv_t, smem->mem, m); } } else ++x; @@ -1456,6 +1462,10 @@ collect_intv_end: #endif // sort ks_introsort(mem_intv, smem->mem.n, smem->mem.a); + //for (i = 0; i < smem->mem.n; ++i) { + // fprintf(stderr, "seed %d: %ld %ld %ld\n", i, smem->mem.a[i].x[0], smem->mem.a[i].x[1], smem->mem.a[i].x[2]); + //} + //fprintf(stderr, "split\n"); } void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv) @@ -1642,10 +1652,10 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux); free(chnp->a[j].seeds); } -#ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_ksw_extend2, tmp_time); -#endif +//#ifdef SHOW_PERF +// tmp_time = realtime_msec() - tmp_time; +// __sync_fetch_and_add(&time_ksw_extend2, tmp_time); +//#endif free(chnp->a); chnp->m = 0; chnp->a = 0; regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a); if (bwa_verbose >= 4) { @@ -1660,6 +1670,11 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t if (p->rid >= 0 && bns->anns[p->rid].is_alt) p->is_alt = 1; } +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_time); +#endif + } #if 1 @@ -1747,11 +1762,15 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed w->isize_arr[i][j].n = 0; } #endif +#ifdef SHOW_PERF int64_t tmp_time = realtime_msec(); +#endif //kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions +#ifdef SHOW_PERF time_work1 += realtime_msec() - tmp_time; tmp_time = realtime_msec(); +#endif if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data @@ -1759,7 +1778,9 @@ void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed } kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment //kt_for(opt->n_threads, worker_sam, w, n_batch); +#ifdef SHOW_PERF time_work2 += realtime_msec() - tmp_time; +#endif //free(w.regs); if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); diff --git a/debug.sh b/debug.sh index 6ba3248..709d0e1 100755 --- a/debug.sh +++ b/debug.sh @@ -1,4 +1,6 @@ thread=1 +n_r1=~/data/fastq/dataset/na12878_wes_144/diff_r1.fq +n_r2=~/data/fastq/dataset/na12878_wes_144/diff_r2.fq #n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq #n_r2=~/data/fastq/ZY2105177532213000/n_r2.fq #n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq @@ -10,8 +12,8 @@ thread=1 #n_r2=~/fastq/ssn_r2.fq #n_r1=~/fastq/tiny_n_r1.fq #n_r2=~/fastq/tiny_n_r2.fq -n_r1=~/fastq/diff_r1.fq -n_r2=~/fastq/diff_r2.fq +#n_r1=~/fastq/diff_r1.fq +#n_r2=~/fastq/diff_r2.fq #n_r1=~/fastq/diff_all_r1.fq #n_r2=~/fastq/diff_all_r2.fq #n_r1=~/fastq/d_r1.fq diff --git a/fastmap.c b/fastmap.c index a14dbd7..625b89c 100644 --- a/fastmap.c +++ b/fastmap.c @@ -63,6 +63,7 @@ int64_t time_ksw_extend2 = 0, int64_t dn = 0, n16 = 0, n17 = 0, n18 = 0, n19 = 0, nall = 0, num_sa = 0; int64_t s1n = 0, s2n = 0, s3n = 0, s1l = 0, s2l = 0, s3l = 0; +int64_t gn[100] = {0}; int64_t g_num_smem2 = 0; FILE *gfp1, *gfp2, *gfp3; @@ -292,7 +293,7 @@ int main_mem(int argc, char *argv[]) } if (opt->n_threads < 1) opt->n_threads = 1; - if (opt->batch_size < 1) opt->batch_size = 512; + if (opt->batch_size < 1) opt->batch_size = 256; fprintf(stderr, "batch_size: %d\n", opt->batch_size); if (optind + 1 >= argc || optind + 3 < argc) { @@ -430,6 +431,7 @@ int main_mem(int argc, char *argv[]) bwa_print_sam_hdr(aux.idx->bns, hdr_line); aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads; kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3); + //kt_pipeline(3, process, &aux, 3); free(hdr_line); free(opt); bwa_idx_destroy(aux.idx); diff --git a/fmt_idx.c b/fmt_idx.c index c63b85a..d27d0f3 100644 --- a/fmt_idx.c +++ b/fmt_idx.c @@ -708,7 +708,10 @@ int fmt_smem_forward(const FMTIndex *fmt, int len, const uint8_t *q, int x, int #if 1 if (min_intv == 1 && ok2.x[2] == min_intv) { + // fprintf(gfp1, "%d\t", i + 2 - x); direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt); + //mt.x[0] = ok2.x[0]; + //fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]); kv_push(bwtintv_t, *mem, mt); ret = (uint32_t)mt.info; goto fmt_smem_forward_end; @@ -767,8 +770,8 @@ fmt_smem_forward_end: return ret; } -// 找smem(seed) -int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec) +// 找smem(seed)(lm: long_smem) +int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec) { if (x == 0 || q[x - 1] > 3) return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem); @@ -788,7 +791,7 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv qbit = build_forward_kmer(&q[x], len - x, HASH_KMER_LEN, &kmer_len); bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, 0); // 初始碱基位置 ik.info = x + 1; - + //int print_flag = 0; // check change of the interval size and whether the interval size is too small to be extended further #define CHECK_INTV_CHANGE(iv, ov, end_pos) \ if (ov.x[2] != iv.x[2]) \ @@ -826,18 +829,37 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2); __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); CHECK_INTV_CHANGE(ik, ok1, i + 1); +#if 0 + if (min_intv == 2 && ok1.x[2] == 2 && ok2.x[2] == 1 && lm && lm->num_match == 1) + { + bwtint_t bwt_mtx_row = ok1.x[0]; + if (bwt_mtx_row == ok2.x[0]) + bwt_mtx_row = ok1.x[0] + 1; + direct_extend(fmt, len, q, x, i + 1, bwt_mtx_row, &mt); + kv_push(bwtintv_t, *mem, mt); + //fprintf(gfp1, "cond-appear: %d %d %ld %ld %ld %ld\n", x, i + 2, ok1.x[0], ok1.x[1], ok2.x[0], ok2.x[1]); + //print_flag = 1; + } + else + { + CHECK_INTV_CHANGE(ik, ok2, i + 2); + } +#else CHECK_INTV_CHANGE(ik, ok2, i + 2); - // 在这里进行判断是否只有一个候选了 +#endif #if 1 + // 在这里进行判断是否只有一个候选了 if (min_intv == 1 && ok2.x[2] == min_intv) { direct_extend(fmt, len, q, x, i + 2, ok2.x[0], &mt); - kv_push(bwtintv_t, *mem, mt); + //mt.x[0] = ok2.x[0]; + //fprintf(gfp1, "mt %ld %ld\n", ok2.x[0], ok2.x[1]); + kv_push(bwtintv_t, *mem, mt); // 这里可以判断一下是否大于min_seed_len ret = (uint32_t)mt.info; if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3) goto fmt_smem_end; goto backward_search; - } + } // if (min_intv == 2 && ok2.x[2] == min_intv && !print_flag) #endif } else if (q[i] < 4) // q[i+1] >= 4 @@ -868,7 +890,6 @@ backward_search: fmt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first if (mt.num_match == 0) ret = curr->a[0].info; // this will be the returned value,扩展到的最远的位置 - // 按照种子进行遍历,反向扩展 #define CHECK_ADD_MEM(pos, intv, mem) \ if (((int)((intv).info) - (pos) >= min_seed_len) && (mem->n == 0 || (pos) < mem->a[mem->n - 1].info >> 32)) \ @@ -966,6 +987,7 @@ backward_search: } fmt_smem_end: + //if (mem->n == 0 && min_intv > 1 && print_flag) fprintf(gfp1, "\n"); fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate return ret; } @@ -1029,10 +1051,12 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); COND_SET_RETURN(ok2, *mem, x, i + 1, max_intv, min_len); ik = ok2; + //fprintf(stderr, "d: %d %ld\n", i, ok2.x[2]); } else if (q[i] < 4) // q[i+1] >= 4 { fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); + //fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]); COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); return i + 2; } @@ -1041,9 +1065,10 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in return i + 1; } } - if (i == len - 1) + if (i == len - 1 && q[i] < 4) { fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]); + //fprintf(stderr, "d: %d %ld\n", i, ok1.x[2]); COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); } return len; diff --git a/fmt_idx.h b/fmt_idx.h index a102ce0..0d6a5f9 100644 --- a/fmt_idx.h +++ b/fmt_idx.h @@ -107,7 +107,7 @@ void kmer_setval_at(uint8_t *mem_addr, bwtintv_t ik, int pos); void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos); void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos); // 找smem(seed) -int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_v *mem, bwtintv_v *tmpvec); +int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec); int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem); diff --git a/ksw_extend2_avx2.c b/ksw_extend2_avx2.c index 6684129..4279e26 100644 --- a/ksw_extend2_avx2.c +++ b/ksw_extend2_avx2.c @@ -6,7 +6,10 @@ #include #include +#ifdef SHOW_PERF extern FILE *gfp1, *gfp2, *gfp3; +#endif + //#define DEBUG_SW_EXTEND 1 #define NO_VAL -1 @@ -148,8 +151,8 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = { max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \ max_vec = _mm256_max_epu16(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \ int16_t *maxVal = (int16_t*)&max_vec; \ - m = maxVal[0]; \ - if (m > 0) { \ + m = MAX(m,maxVal[0]); \ + if (maxVal[0] > 0) { \ for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \ __m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \ __m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \ @@ -197,7 +200,9 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 buf_t *buf) // 之前已经开辟过的缓存 { //return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off); - + + //fprintf(gfp1, "%d\n", qlen); + if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf); int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M int16_t *seq, *ref; @@ -335,7 +340,7 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度 //} // 要处理边界 // 左边界 处理f (insert) - if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } + if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end];} // 上边界 if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } else if (D & 1) { @@ -616,14 +621,26 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 } ins[0][0] = del[0][0] = score[0][0] = h0; #endif - +#ifdef SHOW_PERF + // fprintf(gfp1, "start\n"); + int bsw_cal_num = 0; + int real_cal_num = 0; + for (i = 0; i < tlen; ++i) + { + int beg = MAX(0, i - w); + int end = MIN(qlen, i + w + 1); + if (beg >= end) break; + bsw_cal_num += end - beg; + } + // fprintf(gfp1, "%d\n", bsw_cal_num); +#endif for (i = 0; LIKELY(i < tlen); ++i) { int t, f = 0, h1, m = 0, mj = -1; int8_t *q = &qp[ref[i] * qlen]; // apply the band and the constraint (if provided) if (beg < i - w) beg = i - w; if (end > i + w + 1) end = i + w + 1; - //if (end > qlen) end = qlen; 没用 + if (end > qlen) end = qlen; // 没用 // compute the first column if (beg == 0) { h1 = h0 - (o_del + e_del * (i + 1)); @@ -631,6 +648,11 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 } else h1 = 0; //m = h1; for (j = beg; LIKELY(j < end); ++j) { + +#ifdef SHOW_PERF + real_cal_num++; +#endif + #ifdef DEBUG_SW_EXTEND ins[i+1][j+1] = f; #endif @@ -737,6 +759,9 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长 fprintf(gfp3, "\n"); } #endif +#ifdef SHOW_PERF + //fprintf(gfp1, "%d\nend\n", real_cal_num); +#endif free(eh); free(qp); free(qmem); if (_qle) *_qle = max_j + 1; diff --git a/ksw_extend2_avx2_u8.c b/ksw_extend2_avx2_u8.c index e0c277d..19b7dd7 100644 --- a/ksw_extend2_avx2_u8.c +++ b/ksw_extend2_avx2_u8.c @@ -162,8 +162,8 @@ static const uint8_t reverse_mask[SIMD_WIDTH] = {7,6,5,4,3,2,1,0,15,14,13,12,11, max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \ max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \ max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \ - m = maxVal[0]; \ - if (m > 0) { \ + m = MAX(m, maxVal[0]); \ + if (maxVal[0] > 0) { \ for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \ __m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \ __m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \ @@ -313,7 +313,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 // 要处理边界 // 左边界 处理f (insert) - if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); } + if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end]; } // 上边界 if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); } else if (D & 1) { diff --git a/run.sh b/run.sh index b7a899f..7d755eb 100755 --- a/run.sh +++ b/run.sh @@ -1,4 +1,12 @@ -thread=1 +thread=64 +#n_r1=~/data/fastq/dataset/na12878_wgs_150/bn1.fq +#n_r2=~/data/fastq/dataset/na12878_wgs_150/bn2.fq +n_r1=~/data/fastq/dataset/na12878_wes_144/SRR25735653_1.fastq +n_r2=~/data/fastq/dataset/na12878_wes_144/SRR25735653_2.fastq +#n_r1=~/data/fastq/dataset/na12878_wgs_150/n1.fq +#n_r2=~/data/fastq/dataset/na12878_wgs_150/n2.fq +#n_r1=~/data/fastq/dataset/na12878_wgs_101/na_1.fq +#n_r2=~/data/fastq/dataset/na12878_wgs_101/na_2.fq #n_r1=~/data/fastq/zy/wgs/n1.fq #n_r2=~/data/fastq/zy/wgs/n2.fq #n_r1=~/data/fastq/ds/n1.fq @@ -7,10 +15,12 @@ thread=1 #n_r2=~/data/fastq/platinum/n2.fq #n_r1=~/data/fastq/zy/t1.fq #n_r2=~/data/fastq/zy/t2.fq -n_r1=~/data/fastq/zy/n1.fq -n_r2=~/data/fastq/zy/n2.fq +#n_r1=~/data/fastq/zy/n1.fq +#n_r2=~/data/fastq/zy/n2.fq #n_r1=~/data/fastq/ds/d1_1.fq #n_r2=~/data/fastq/ds/d1_2.fq +#n_r1=~/data/fastq/ds/d2_1.fq +#n_r2=~/data/fastq/ds/d2_2.fq #n_r1=~/data/fastq/ds/wes/n1.fq #n_r2=~/data/fastq/ds/wes/n2.fq #n_r1=~/fastq/ZY2105177532213010_L4_1.fq @@ -44,8 +54,8 @@ reference=~/data/reference/human_g1k_v37_decoy.fasta #out=./all.sam #out=./sn.sam #out=./ssn-x1.sam -#out=./out.sam -out=/dev/null +out=./out.sam +#out=/dev/null #out=./na12878.sam #time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ # /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ diff --git a/seed1.py b/seed1.py new file mode 100755 index 0000000..467467d --- /dev/null +++ b/seed1.py @@ -0,0 +1,21 @@ +#!/bin/env python3 +import sys +import os + + +fn = sys.argv[1] +n_all = 0 +n_2 = 0 +n_intv1_len = 0 +n_seed_len = 0 + +with open(fn, 'r') as f: + for line in f: + dat = line.strip().split('\t') + n_all += 1 + if (len(dat) == 2): + n_2 += 1 + n_intv1_len += int(dat[0]) + n_seed_len += int(dat[1]) + +print(n_all, n_2, n_2 / n_all, n_intv1_len / n_2, n_seed_len / n_2) diff --git a/seed2.py b/seed2.py new file mode 100755 index 0000000..b8f967d --- /dev/null +++ b/seed2.py @@ -0,0 +1,22 @@ +#!/bin/env python3 +import sys +import os + + +fn = sys.argv[1] +n_all = 0 +n_direct = 0 +n_direct_start_len = 0 +n_seed_len = 0 + +with open(fn, 'r') as f: + for line in f: + dat = line.strip().split('\t') + if dat[0] == "start": + n_all += 1 + if (len(dat) == 2): + n_direct += 1 + n_direct_start_len += int(dat[0]) + n_seed_len += int(dat[1]) + +print(n_all, n_direct, n_direct / n_all, n_direct_start_len / n_direct, n_seed_len / n_direct) diff --git a/sw1.py b/sw1.py new file mode 100755 index 0000000..8020eff --- /dev/null +++ b/sw1.py @@ -0,0 +1,23 @@ +#!/bin/env python3 +import sys +import os + + +fn = sys.argv[1] +n_bsw = 0 +n_prun = 0 + +with open(fn, 'r') as f: + offset = 0 + for line in f: + dat = line.strip().split('\t') + if dat[0] == "start": + offset = 1 + elif offset == 1: + n_bsw += int(dat[0]) + offset = 2 + elif offset == 2: + n_prun += int(dat[0]) + offset = 3 + +print(n_prun / n_bsw * 100, 100) diff --git a/utils.h b/utils.h index 6f1cb4e..6441a6a 100644 --- a/utils.h +++ b/utils.h @@ -50,6 +50,7 @@ time_work1, time_work2, time_flt_chain; +extern int64_t gn[100]; extern int64_t dn, n16, n17, n18, n19, nall, num_sa; extern int64_t s1n, s2n, s3n, s1l, s2l, s3l; extern int64_t g_num_smem2;