From e335e79543241d3afcc05612f6a66aa86e244b17 Mon Sep 17 00:00:00 2001 From: zzh Date: Sat, 17 Jan 2026 01:00:43 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=93=E6=9E=9C=E6=AD=A3=E7=A1=AE=E4=BA=86?= =?UTF-8?q?=EF=BC=8Cgen-sam=E8=BF=87=E7=A8=8B=E5=BA=94=E8=AF=A5=E8=BF=98?= =?UTF-8?q?=E6=9C=89=E6=80=A7=E8=83=BD=E6=8F=90=E5=8D=87=E7=A9=BA=E9=97=B4?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Makefile | 2 +- paired_sam.c | 72 ++++++++++++++++++++++++++++++++++------------------ run.sh | 14 +++++----- 3 files changed, 56 insertions(+), 32 deletions(-) diff --git a/Makefile b/Makefile index 5351642..33673f8 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC= gcc -CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw #-O3 +CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS SHOW_PERF= -DSHOW_PERF diff --git a/paired_sam.c b/paired_sam.c index b37893d..bbed6de 100644 --- a/paired_sam.c +++ b/paired_sam.c @@ -363,11 +363,11 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) { static void worker_calc_matesw_avx512_i16(void* data, int i, int tid) {} // 检测添加新的align -static int check_add_align(kswr_avx_t aln, int min_seed_len, int is_rev, int64_t l_pac, mem_alnreg_t* a, int l_ms, uint8_t* ms, mem_alnreg_v* ma, - int64_t rb) { +static int check_add_align(const mem_opt_t* opt, kswr_avx_t aln, int is_rev, int64_t l_pac, mem_alnreg_t* a, int l_ms, uint8_t* ms, + mem_alnreg_v* ma, int64_t rb) { int res = 0; - if (aln.score >= min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 - // int i, tmp; + if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 + int i, tmp; mem_alnreg_t b; memset(&b, 0, sizeof(mem_alnreg_t)); b.rid = a->rid; @@ -383,14 +383,15 @@ static int check_add_align(kswr_avx_t aln, int min_seed_len, int is_rev, int64_t // printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev, // is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re); kv_push(mem_alnreg_t, *ma, b); // make room for a new element -// // move b s.t. ma is sorted -// for (i = 0; i < ma->n - 1; ++i) // find the insertion point -// if (ma->a[i].score < b.score) -// break; -// tmp = i; -// for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i - 1]; -// ma->a[i] = b; + // move b s.t. ma is sorted + for (i = 0; i < ma->n - 1; ++i) // find the insertion point + if (ma->a[i].score < b.score) + break; + tmp = i; + for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i - 1]; + ma->a[i] = b; res = 1; + ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a); } return res; } @@ -572,33 +573,48 @@ static void workder_gen_sam(void* data, int idx, int tid) { endIdx = w->n_reads; int id = 0, i = 0, j = 0, k = 0; - + mem_alnreg_v bb[2]; + kv_init(bb[0]); kv_init(bb[1]); for (id = startIdx; id < endIdx; id += 2) { // 初始化变量 bseq1_t* s = &w->seqs[id]; mem_alnreg_v* a = &w->regs[id]; seq_sam_t* ss = &w->sams[id]; - int cmp = strcmp("ERR194147.42331121", s[0].name) == 0; +#if 1 + int cmp = strcmp("ERR194147.1629456", s[0].name) == 0; // int cmp = (id + w->n_processed) == 202924; if (cmp == 1) { fprintf(stderr, "%s\n", s[0].name); fflush(stderr); //exit(0); } +#endif + + int n[2] = {0}; + // , nn[2] = {0}; + // + _clear_vec(bb[0]); + _clear_vec(bb[1]); + + for (i = 0; i < 2; ++i) + for (j = 0; j < s[!i].msw.n; ++j) { + msw_task_t* t = s[!i].msw.a[j]; + kv_push(mem_alnreg_t, bb[i], a[i].a[t->aj]); + } - int n[2] = {0}, nn[2] = {0}; for (i = 0; i < 2; ++i) { int si = !i; int origin_n = a[si].n; // 这里应该先给task排序,因为u8和i16是分开排序的,需要合在一起 for (k = 0; k < s[si].msw.n; ++k) { msw_task_t* t = s[si].msw.a[k]; - mem_alnreg_t* b = &a[i].a[t->aj]; + // mem_alnreg_t* b = &a[i].a[t->aj]; + mem_alnreg_t* b = &bb[i].a[k]; mem_alnreg_v* ma = &a[si]; uint8_t skip = 0; if (n[si]) { // 添加了新的aln,需要检测skip - for (j = origin_n; j < ma->n; ++j) { // check which orinentation has been found + for (j = 0; j < ma->n; ++j) { // check which orinentation has been found int64_t dist; int r; r = mem_infer_dir(bns->l_pac, b->rb, ma->a[j].rb, &dist); @@ -606,18 +622,25 @@ static void workder_gen_sam(void* data, int idx, int tid) { skip |= 1 << r; } } - //if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了 - kswr_avx_t aln = t->aln; // fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); - if (cmp) { - fprintf(stderr, "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); - fflush(stderr); - } - nn[si] += 1; - n[si] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, b, s[si].l_seq, (uint8_t*)s[si].seq, ma, t->rb); + // if (skip == 15) + if ((t->skip | skip) == 15) + continue; + // if ((t->skip | skip) != 15) { // 检查新添加的aln是不是把当前的任务skip掉了 +#if 1 + kswr_avx_t aln = t->aln; // fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + + // w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); + if (cmp) { + fprintf(stderr, "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, + aln.qe, aln.score2, aln.te2, aln.tb, aln.qb); fflush(stderr); + } +#endif + // nn[si] += 1; + n[si] += check_add_align(opt, t->aln, t->is_rev, bns->l_pac, b, s[si].l_seq, (uint8_t*)s[si].seq, ma, t->rb); //} } } // 处理完2个pair read之后再排序,因为上面操作有插入,排序后之前记录的索引就无效了,所以上面不能排序,要两个pair read处理完之后再排序 +#if 0 for (i = 0; i < 2; ++i) { int tmp; if (nn[i] > 0) { @@ -645,6 +668,7 @@ static void workder_gen_sam(void* data, int idx, int tid) { // a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a); } } +#endif //////////////////////////////////////////////////////////// // 这里需要传入全局id diff --git a/run.sh b/run.sh index 77b8263..289b6ad 100755 --- a/run.sh +++ b/run.sh @@ -1,11 +1,11 @@ -thread=1 +thread=64 make clean; make -j 32 -#n1=~/data/dataset/D2/n1.fq.gz -#n2=~/data/dataset/D2/n2.fq.gz -n1=~/data/dataset/D2/d1.fq -n2=~/data/dataset/D2/d2.fq +n1=~/data/dataset/D2/n1.fq.gz +n2=~/data/dataset/D2/n2.fq.gz +#n1=~/data/dataset/D2/d1.fq +#n2=~/data/dataset/D2/d2.fq #n1=/file-data/zzh/dataset/D1/45mr1.fq.gz #n2=/file-data/zzh/dataset/D1/45mr2.fq.gz @@ -14,8 +14,8 @@ reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta #out=/dev/null #out=./oldsam-D2-out.sam -#out=./D2-1.sam -out=./d.sam +out=./D2-1.sam +#out=./d.sam prog=./fastalign #prog=/home/zzh/fastbwa