From bf678f4dae4ee3e8287bdc24d5da241231339e1b Mon Sep 17 00:00:00 2001 From: zzh Date: Wed, 27 Dec 2023 10:42:12 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=9E=E7=8E=B0=E4=BA=86=E7=94=A833bit?= =?UTF-8?q?=E8=A1=A8=E7=A4=BAsa=EF=BC=8C=E9=97=B4=E9=9A=94=E4=B8=BA4?= =?UTF-8?q?=EF=BC=8C=E9=87=8A=E6=94=BE=E5=86=85=E5=AD=98=E7=9A=84=E6=97=B6?= =?UTF-8?q?=E5=80=99=E4=BC=9A=E5=B4=A9=E6=BA=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .vscode/launch.json | 2 +- .vscode/settings.json | 10 +----- bwa.c | 6 ++-- bwamem.c | 35 +++++++++++++++++++++ bwt.c | 71 +++++++++++++++++++++++++++++++++++-------- bwt.h | 7 ++++- fastmap.c | 6 +++- run.sh | 35 +++++++++------------ utils.h | 11 ++++--- 9 files changed, 129 insertions(+), 54 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 791fe75..54cb5f4 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -13,7 +13,7 @@ "args": [ "mem", "-t", - "1", + "12", "-M", "-R", "'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'", diff --git a/.vscode/settings.json b/.vscode/settings.json index 433979a..1728226 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,13 +1,5 @@ { "files.associations": { - "bwt.h": "c", - "bwa.h": "c", - "malloc_wrap.h": "c", - "bntseq.h": "c", - "utils.h": "c", - "rle.h": "c", - "rope.h": "c", - "random": "c", - "kseq.h": "c" + "random": "c" } } \ No newline at end of file diff --git a/bwa.c b/bwa.c index 104c95c..347e8a9 100644 --- a/bwa.c +++ b/bwa.c @@ -280,7 +280,7 @@ bwt_t *bwa_idx_load_bwt(const char *hint) tmp = calloc(strlen(prefix) + 5, 1); strcat(strcpy(tmp, prefix), ".bwt"); // FM-index bwt = bwt_restore_bwt(tmp); - strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) + strcat(strcpy(tmp, prefix), ".33.4.sa"); // partial suffix array (SA) bwt_restore_sa(tmp, bwt); free(tmp); free(prefix); return bwt; @@ -342,7 +342,7 @@ int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx) // generate idx->bwt x = sizeof(bwt_t); idx->bwt = malloc(x); memcpy(idx->bwt, mem + k, x); k += x; x = idx->bwt->bwt_size * 4; idx->bwt->bwt = (uint32_t*)(mem + k); k += x; - x = idx->bwt->n_sa * sizeof(bwtint_t); idx->bwt->sa = (bwtint_t*)(mem + k); k += x; + x = SA_BYTES(idx->bwt->n_sa); idx->bwt->sa = (uint8_t*)(mem + k); k += x; // generate idx->bns and idx->pac x = sizeof(bntseq_t); idx->bns = malloc(x); memcpy(idx->bns, mem + k, x); k += x; @@ -370,7 +370,7 @@ int bwa_idx2mem(bwaidx_t *idx) mem = realloc(idx->bwt->bwt, sizeof(bwt_t) + x); idx->bwt->bwt = 0; memmove(mem + sizeof(bwt_t), mem, x); memcpy(mem, idx->bwt, sizeof(bwt_t)); k = sizeof(bwt_t) + x; - x = idx->bwt->n_sa * sizeof(bwtint_t); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x; + x = SA_BYTES(idx->bwt->n_sa); mem = realloc(mem, k + x); memcpy(mem + k, idx->bwt->sa, x); k += x; free(idx->bwt->sa); free(idx->bwt); idx->bwt = 0; diff --git a/bwamem.c b/bwamem.c index 74b5d53..80e801b 100644 --- a/bwamem.c +++ b/bwamem.c @@ -146,7 +146,14 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co // first pass: find all SMEMs while (x < len) { if (seq[x] < 4) { +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_bwt_smem1a, tmp_time); +#endif for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; int slen = (uint32_t)p->info - (p->info>>32); // seed length @@ -161,7 +168,14 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co bwtintv_t *p = &a->mem.a[k]; int start = p->info>>32, end = (int32_t)p->info; if (end - start < split_len || p->x[2] > opt->split_width) continue; +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_bwt_smem1a, tmp_time); +#endif for (i = 0; i < a->mem1.n; ++i) if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len) kv_push(bwtintv_t, a->mem, a->mem1.a[i]); @@ -176,7 +190,14 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m); } else { // for now, we never come to this block which is slower +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_bwt_smem1a, tmp_time); +#endif for (i = 0; i < a->mem1.n; ++i) kv_push(bwtintv_t, a->mem, a->mem1.a[i]); } @@ -761,7 +782,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rs[j]]); putchar('\n'); printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n'); } +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_time); +#endif if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); } if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; } @@ -789,7 +817,14 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n'); printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n'); } +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_time); +#endif if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; } diff --git a/bwt.c b/bwt.c index 6fa38fd..aa41749 100644 --- a/bwt.c +++ b/bwt.c @@ -65,10 +65,33 @@ static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inver return k == bwt->primary? 0 : x; } +// 设置某一行的排序值,sa的索引有效值从1开始,(0设置为-1, 小端模式) +void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val) +{ + const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组,共享33个字节 + const int val_idx_in_block = k & 7; + const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); + bwtint_t *sa_addr = (bwtint_t *)(sa_arr + start_byte_idx); + // *sa_addr &= (1 << val_idx_in_block) - 1; // 如果开辟内存的时候清零了,这一步可以省略,会清除后面的数据,只适合按递增顺序赋值 + *sa_addr |= val << val_idx_in_block; +} + +// 获取某一行的排序值(小端模式) +bwtint_t bwt_get_sa(uint8_t *sa_arr, bwtint_t k) +{ + const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组,共享33个字节 + const int val_idx_in_block = k & 7; + const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); + bwtint_t val = *(bwtint_t *)(sa_arr + start_byte_idx); + val = (val >> val_idx_in_block) & 8589934591; + return val; +} + // bwt->bwt and bwt->occ must be precalculated void bwt_cal_sa(bwt_t *bwt, int intv) { - bwtint_t isa, sa, i; // S(isa) = sa isa是后缀数组的索引,sa表示位置 + bwtint_t isa, sa, i, block_size; // S(isa) = sa isa是后缀数组的索引,sa表示位置 + double tmp_time, elapsed_time; int intv_round = intv; // 间隔多少来保存一个位置信息 kv_roundup32(intv_round); @@ -78,16 +101,31 @@ void bwt_cal_sa(bwt_t *bwt, int intv) if (bwt->sa) free(bwt->sa); bwt->sa_intv = intv; bwt->n_sa = (bwt->seq_len + intv) / intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); // 用64位无符号整数来保存位置信息,其实用33位就够了 + bwt->sa = (uint8_t *)calloc(SA_BYTES(bwt->n_sa), 1); // 用33位表示位置 + fprintf(stderr, "bytes: %ld, sa size: %ld\n", SA_BYTES(bwt->n_sa), bwt->n_sa); // calculate SA value isa = 0; sa = bwt->seq_len; - for (i = 0; i < bwt->seq_len; ++i) { - if (isa % intv == 0) bwt->sa[isa/intv] = sa; // 第一个位置是$,所以位置就是序列长度 + block_size = bwt->seq_len / 100; + tmp_time = realtime(); + for (i = 0; i < bwt->seq_len; ++i) + { + if (i % block_size == 0) { + elapsed_time = realtime() - tmp_time; + fprintf(stderr, "%ld%% percent complished. %f s elapsed.\n", i / block_size, elapsed_time); + } + if (isa % intv == 0) { + bwt_set_sa(bwt->sa, isa / intv, sa); // 第一个位置是$,所以位置就是序列长度 + if (i % (block_size / 2) == 0) + { + fprintf(stderr, "%ld %ld\n", sa, bwt_get_sa(bwt->sa, isa / intv)); + } + } --sa; // 从后往前,一个位置一个位置的找对应的后缀数组,isa就是与sa对应的后缀数组排序后在sa数组中的相对位置 isa = bwt_invPsi(bwt, isa); // 下一个后缀数组的相对位置 } - if (isa % intv == 0) bwt->sa[isa/intv] = sa; - bwt->sa[0] = (bwtint_t)-1; // before this line, bwt->sa[0] = bwt->seq_len + if (isa % intv == 0) + bwt_set_sa(bwt->sa, isa / intv, sa); + bwt_set_sa(bwt->sa, 0, (bwtint_t)-1); // before this line, bwt->sa[0] = bwt->seq_len } bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k) @@ -99,7 +137,15 @@ bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k) } /* without setting bwt->sa[0] = -1, the following line should be changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */ - return sa + bwt->sa[k/bwt->sa_intv]; +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif + sa += bwt_get_sa(bwt->sa, k / bwt->sa_intv); +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_bwt_sa_read, tmp_time); +#endif + return sa; } static inline int __occ_aux(uint64_t y, int c) @@ -409,7 +455,7 @@ void bwt_dump_sa(const char *fn, const bwt_t *bwt) err_fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); err_fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); err_fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); - err_fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + err_fwrite(bwt->sa, sizeof(bwtint_t), SA_BYTES(bwt->n_sa) >> 3, fp); err_fflush(fp); err_fclose(fp); } @@ -441,10 +487,9 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); - bwt->sa[0] = -1; + bwt->sa = (uint8_t*)malloc(SA_BYTES(bwt->n_sa)); - fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1); + fread_fix(fp, SA_BYTES(bwt->n_sa), bwt->sa); err_fclose(fp); } @@ -472,6 +517,6 @@ bwt_t *bwt_restore_bwt(const char *fn) void bwt_destroy(bwt_t *bwt) { if (bwt == 0) return; - free(bwt->sa); free(bwt->bwt); - free(bwt); + //free(bwt->sa); free(bwt->bwt); + //free(bwt); } diff --git a/bwt.h b/bwt.h index daba87f..158288e 100644 --- a/bwt.h +++ b/bwt.h @@ -56,7 +56,7 @@ typedef struct { // suffix array int sa_intv; bwtint_t n_sa; - bwtint_t *sa; + uint8_t *sa; } bwt_t; typedef struct { @@ -65,6 +65,8 @@ typedef struct { typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v; +#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8) + /* For general OCC_INTERVAL, the following is correct: #define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) #define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) @@ -103,6 +105,9 @@ extern "C" { void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]); bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k); + void bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val); + bwtint_t bwt_get_sa(uint8_t *sa_arr, bwtint_t k); + // more efficient version of bwt_occ/bwt_occ4 for retrieving two close Occ values void bwt_gen_cnt_table(bwt_t *bwt); void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol); diff --git a/fastmap.c b/fastmap.c index a3fa4ad..1bf0b61 100644 --- a/fastmap.c +++ b/fastmap.c @@ -49,7 +49,8 @@ int64_t time_ksw_extend2 = 0, time_ksw_align2 = 0, time_bwt_smem1a = 0, time_bwt_occ4 = 0, - time_bwt_sa = 0; + time_bwt_sa = 0, + time_bwt_sa_read = 0; #endif @@ -421,7 +422,10 @@ int main_mem(int argc, char *argv[]) #ifdef SHOW_PERF fprintf(stderr, "\n"); + fprintf(stderr, "time_bwt_smem1a: %f s\n", time_bwt_smem1a / 1000.0 / opt->n_threads); fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads); + fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_bwt_sa_read: %f s\n", time_bwt_sa_read / 1000.0 / opt->n_threads); fprintf(stderr, "\n"); #endif diff --git a/run.sh b/run.sh index 4724618..690b563 100755 --- a/run.sh +++ b/run.sh @@ -1,25 +1,18 @@ +#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ +# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ +# /home/zzh/data/fastq/nm1.fq \ +# /home/zzh/data/fastq/nm2.fq -o /dev/null + +#time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ +# /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ +# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_1.fq.gz \ +# /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \ +# -o /dev/null + time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ - /home/zzh/data/fastq/nm1.fq \ - /home/zzh/data/fastq/nm2.fq -o /dev/null - -# time ./bwa mem -t 1 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ -# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \ -# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \ -# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \ -# -o /dev/null - -# time ./bwa mem -t 64 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ -# /public/home/zzh/data/reference/human_g1k_v37_decoy.fasta \ -# /public/home/zzh/data/fastq/ZY2003109152013000/nm1.fq \ -# /public/home/zzh/data/fastq/ZY2003109152013000/nm2.fq \ -# -o /dev/null - -#/public/home/zzh/data/fastq/n1.fq \ -#/public/home/zzh/data/fastq/n2.fq \ -#/share_nas3/zyseq-release-v1.1.3/zyseq/wes/resource/reference/human_g1k_v37_decoy.fasta \ -#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n1.fq \ -#/share_nas3/zyseq-release-v1.1.3/zyseq/data/n2.fq \ -#-o reads_mapping.sam + /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213030_L3_1.fq.gz \ + /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213030_L3_2.fq.gz \ + -o /dev/null diff --git a/utils.h b/utils.h index 1a92542..1b88b4e 100644 --- a/utils.h +++ b/utils.h @@ -34,11 +34,12 @@ #ifdef SHOW_PERF extern int64_t time_ksw_extend2, - time_ksw_global2, - time_ksw_align2, - time_bwt_smem1a, - time_bwt_occ4, - time_bwt_sa; + time_ksw_global2, + time_ksw_align2, + time_bwt_smem1a, + time_bwt_occ4, + time_bwt_sa, + time_bwt_sa_read; #endif