修正了一个bug,是xtra赋值搞错了,第二阶段应该赋值给xtras2
This commit is contained in:
parent
4c5e1e2fa1
commit
e997699a47
|
|
@ -1,5 +1,6 @@
|
|||
*.log
|
||||
*.sam
|
||||
output/
|
||||
|
||||
# Prerequisites
|
||||
*.d
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@
|
|||
"args": [
|
||||
"mem",
|
||||
"-t",
|
||||
"32",
|
||||
"1",
|
||||
"-M",
|
||||
"-R",
|
||||
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
|
||||
"~/data/reference/fmt/human_g1k_v37_decoy.fasta",
|
||||
"~/data/dataset/D1/n1.fq.gz",
|
||||
"~/data/dataset/D1/n2.fq.gz",
|
||||
"~/data/dataset/D2/n1.fq.gz",
|
||||
"~/data/dataset/D2/n2.fq.gz",
|
||||
"-o",
|
||||
"D1-out.sam",
|
||||
],
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
{
|
||||
"label": "Build",
|
||||
"type": "shell",
|
||||
"command": "make clean; make -j 16",
|
||||
"command": "make clean; make -j 32",
|
||||
"problemMatcher": [],
|
||||
"group": {
|
||||
"kind": "build",
|
||||
|
|
|
|||
4
Makefile
4
Makefile
|
|
@ -1,9 +1,9 @@
|
|||
CC= gcc
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw #-O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
|
||||
SHOW_PERF= -DSHOW_PERF
|
||||
SHOW_DATA_PERF= #-DSHOW_DATA_PERF
|
||||
SHOW_DATA_PERF= -DSHOW_DATA_PERF
|
||||
FILTER_FULL_MATCH= #-DFILTER_FULL_MATCH
|
||||
USE_MT_READ= -DUSE_MT_READ
|
||||
|
||||
|
|
|
|||
76
paired_sam.c
76
paired_sam.c
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include "ksw_align_avx.h"
|
||||
#include "kvec.h"
|
||||
#include "debug.h"
|
||||
|
||||
// 原版生成sam函数
|
||||
static void worker_sam(void* data, int i, int tid) {
|
||||
|
|
@ -76,7 +77,7 @@ static inline void revseq(int l, uint8_t* s) {
|
|||
|
||||
// 计算当前seq是否需要做matesw,需要的话保存必要的数据
|
||||
static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, const mem_pestat_t pes[4], const mem_alnreg_t* a, int a_j,
|
||||
bseq1_t* seq, mem_alnreg_v* ma, int64_t sid, msw_stats_t* stats, msw_task_v* msw8, msw_task_v* msw16) {
|
||||
bseq1_t* seq, mem_alnreg_v* ma, int64_t sid, msw_stats_t* stats, msw_task_v* msw8, msw_task_v* msw16, int64_t n_processed, int tid) {
|
||||
int64_t l_pac = bns->l_pac;
|
||||
int l_ms = seq->l_seq;
|
||||
int i, r, skip[4], rid;
|
||||
|
|
@ -112,6 +113,8 @@ static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const ui
|
|||
re = l_pac << 1;
|
||||
rid = get_rid_range(bns, pac, &rb, (rb + re) >> 1, &re); // 计算ref对应的染色体id和区间起始终止位置
|
||||
if (a->rid == rid && re - rb >= opt->min_seed_len) { // no funny things happening
|
||||
tdat[TD_MSW_CNT][tid] += 1;
|
||||
|
||||
stats->max_ref_len = max_(stats->max_ref_len, re - rb);
|
||||
// fprintf(stderr, "zzh here\n");
|
||||
int xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250 ? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a);
|
||||
|
|
@ -169,7 +172,7 @@ static void worker_matesw_tasks(void* data, int idx, int tid) {
|
|||
for (i = 0; i < 2; ++i)
|
||||
for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
|
||||
get_matesw_tasks(opt, w->bns, w->pac, w->pes, &b[i].a[j], aj[i].a[j], &s[!i], &a[!i], i_s + !i, &w->msw->t_msw_stats[tid],
|
||||
&w->msw->t_msw_tasks_u8[tid], &w->msw->t_msw_tasks_i16[tid]);
|
||||
&w->msw->t_msw_tasks_u8[tid], &w->msw->t_msw_tasks_i16[tid], w->n_processed, tid);
|
||||
}
|
||||
free(b[0].a);
|
||||
free(b[1].a);
|
||||
|
|
@ -178,7 +181,7 @@ static void worker_matesw_tasks(void* data, int idx, int tid) {
|
|||
}
|
||||
|
||||
static void msw_avx512_u8(const mem_opt_t* opt, int num_tasks, msw_buf_t* msw_buf, msw_ref_v* refs, msw_seq_v* seqs, int_v* xtras,
|
||||
msw_kswr_v* kswrs, int tid) {
|
||||
msw_kswr_v* kswrs, int phase, int tid) {
|
||||
int i = 0, j = 0, k = 0;
|
||||
int refLen[SIMD512_WIDTH8] = {0};
|
||||
|
||||
|
|
@ -239,9 +242,14 @@ static void msw_avx512_u8(const mem_opt_t* opt, int num_tasks, msw_buf_t* msw_bu
|
|||
}
|
||||
PROF_END(tprof[T_MSW_PACK_SEQ][tid], msw_pack_seq);
|
||||
|
||||
if (phase == 0)
|
||||
tdat[TD_ALIGN_1_CNT][tid] += 1;
|
||||
else
|
||||
tdat[TD_ALIGN_2_CNT][tid] += 1;
|
||||
|
||||
// 利用smid指令计算
|
||||
ksw_align_avx512_u8(opt->a, -1 * opt->b, opt->o_ins, opt->e_ins, opt->o_del, opt->e_del, msw_buf, mySeq1SoA, mySeq2SoA, maxRefLen, maxSeqLen,
|
||||
&xtras->a[i], refLen, &kswrs->a[i], 0);
|
||||
&xtras->a[i], refLen, &kswrs->a[i], phase);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -292,7 +300,7 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
|||
PROF_END(tprof[T_MSW_GET_REF][tid], msw_get_ref);
|
||||
|
||||
PROF_START(msw_1);
|
||||
msw_avx512_u8(opt, endIdx - startIdx, msw_buf, refs, seqs, xtras, kswrs, tid);
|
||||
msw_avx512_u8(opt, endIdx - startIdx, msw_buf, refs, seqs, xtras, kswrs, 0, tid); // 第一阶段
|
||||
PROF_END(tprof[T_MSW_1][tid], msw_1);
|
||||
|
||||
// 第二阶段计算
|
||||
|
|
@ -307,19 +315,25 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
|||
msw_task_t* task = w->msw->p_msw_tasks_u8.a[i];
|
||||
kswr_avx_t r = kswrs->a[j];
|
||||
task->aln = r;
|
||||
// 打印测试
|
||||
// kswr_avx_t aln = r;
|
||||
//fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", task->seq_id + w->n_processed, aln.score, aln.te, aln.qe,
|
||||
// aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
|
||||
if ((task->xtra & KSW_XSTART) == 0 || ((task->xtra & KSW_XSUBO) && r.score < (task->xtra & 0xffff)))
|
||||
continue;
|
||||
task->xtra = KSW_XSTOP | task->aln.score;
|
||||
task->xtra = KSW_XSTOP | r.score;
|
||||
kv_push(int, msw_task_ids, i);
|
||||
// 1. 获取对应的ref
|
||||
refs2->a[k] = refs->a[j];
|
||||
revseq(r.te + 1, refs2->a[k].ref);
|
||||
// refs2->a[k].l_ref = r.te + 1; // zzh add, for test
|
||||
// 2. 获取对应的seq
|
||||
seqs2->a[k] = seqs->a[j];
|
||||
revseq(r.qe + 1, seqs2->a[k].seq);
|
||||
seqs2->a[k].l_seq = r.qe + 1;
|
||||
// 3. 其他数据
|
||||
xtras->a[k] = task->xtra;
|
||||
xtras2->a[k] = task->xtra;
|
||||
kswrs2->a[k] = r;
|
||||
++k;
|
||||
}
|
||||
|
|
@ -327,10 +341,9 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
|||
for (; k < roundEndIdx; ++k) {
|
||||
refs2->a[k].l_ref = 0;
|
||||
seqs2->a[k].l_seq = 0;
|
||||
xtras2->a[k] = 0;
|
||||
}
|
||||
PROF_START(msw_2);
|
||||
msw_avx512_u8(opt, msw_task_ids.n, msw_buf, refs2, seqs2, xtras2, kswrs2, tid);
|
||||
msw_avx512_u8(opt, msw_task_ids.n, msw_buf, refs2, seqs2, xtras2, kswrs2, 1, tid); // 第二阶段
|
||||
PROF_END(tprof[T_MSW_2][tid], msw_2);
|
||||
|
||||
// 结果赋值
|
||||
|
|
@ -338,6 +351,9 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
|||
i = msw_task_ids.a[k];
|
||||
msw_task_t* task = w->msw->p_msw_tasks_u8.a[i];
|
||||
task->aln = kswrs2->a[k];
|
||||
// kswr_avx_t aln = task->aln;
|
||||
//fprintf(gf[1], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", task->seq_id + w->n_processed, aln.score, aln.te, aln.qe,
|
||||
// aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
}
|
||||
|
||||
kv_destroy(msw_task_ids);
|
||||
|
|
@ -350,7 +366,7 @@ static int check_add_align(kswr_avx_t aln, int min_seed_len, int is_rev, int64_t
|
|||
int64_t rb) {
|
||||
int res = 0;
|
||||
if (aln.score >= min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0
|
||||
int i, tmp;
|
||||
// int i, tmp;
|
||||
mem_alnreg_t b;
|
||||
memset(&b, 0, sizeof(mem_alnreg_t));
|
||||
b.rid = a->rid;
|
||||
|
|
@ -366,13 +382,13 @@ static int check_add_align(kswr_avx_t aln, int min_seed_len, int is_rev, int64_t
|
|||
// printf("*** %d, [%lld,%lld], %d:%d, (%lld,%lld), (%lld,%lld) == (%lld,%lld)\n", aln.score, rb, re, is_rev,
|
||||
// is_larger, a->rb, a->re, ma->a[0].rb, ma->a[0].re, b.rb, b.re);
|
||||
kv_push(mem_alnreg_t, *ma, b); // make room for a new element
|
||||
// move b s.t. ma is sorted
|
||||
for (i = 0; i < ma->n - 1; ++i) // find the insertion point
|
||||
if (ma->a[i].score < b.score)
|
||||
break;
|
||||
tmp = i;
|
||||
for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i - 1];
|
||||
ma->a[i] = b;
|
||||
// // move b s.t. ma is sorted
|
||||
// for (i = 0; i < ma->n - 1; ++i) // find the insertion point
|
||||
// if (ma->a[i].score < b.score)
|
||||
// break;
|
||||
// tmp = i;
|
||||
// for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i - 1];
|
||||
// ma->a[i] = b;
|
||||
res = 1;
|
||||
}
|
||||
return res;
|
||||
|
|
@ -406,16 +422,34 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
memset(g, 0, sizeof(mem_aln_t) * 2);
|
||||
n_aa[0] = n_aa[1] = 0;
|
||||
|
||||
int n[2] = {0};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int si = !i;
|
||||
int n = 0;
|
||||
// 这里应该先给task排序,因为u8和i16是分开排序的,需要合在一起
|
||||
for (k = 0; k < s[si].msw.n; ++k) {
|
||||
msw_task_t* t = s[si].msw.a[k];
|
||||
n += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, &a[!si].a[t->aj], s[si].l_seq, (uint8_t*)s[si].seq, &a[si], t->rb);
|
||||
// kswr_avx_t aln = t->aln; fprintf(gf[0], "id-%ld score-%d te-%d qe-%d score2-%d te2-%d tb-%d qb-%d\n", s[si].id + w->n_processed, aln.score, aln.te, aln.qe, aln.score2, aln.te2, aln.tb, aln.qb);
|
||||
|
||||
n[i] += check_add_align(t->aln, opt->min_seed_len, t->is_rev, bns->l_pac, &a[!si].a[t->aj], s[si].l_seq, (uint8_t*)s[si].seq, &a[si], t->rb);
|
||||
}
|
||||
if (n > 0) {
|
||||
a[si].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[si].n, a[si].a);
|
||||
}
|
||||
// 处理完2个pair read之后再排序,因为上面操作有插入,排序后之前记录的索引就无效了,所以上面不能排序,要两个pair read处理完之后再排序
|
||||
for (i = 0; i < 2; ++i) {
|
||||
int tmp;
|
||||
if (n[i] > 0) {
|
||||
for (j = 0; j < n[i]; ++j) { // 在这里排序
|
||||
// move b s.t. ma is sorted
|
||||
int nidx = a[i].n - n[i] + j;
|
||||
mem_alnreg_t* b = &a[i].a[nidx];
|
||||
for (k = 0; k < nidx; ++k) // find the insertion point
|
||||
if (a[i].a[k].score < b->score)
|
||||
break;
|
||||
tmp = k;
|
||||
for (k = nidx; k > tmp; --k) a[i].a[k] = a[i].a[k - 1];
|
||||
a[i].a[k] = *b;
|
||||
}
|
||||
|
||||
a[i].n = mem_sort_dedup_patch(opt, 0, 0, 0, a[i].n, a[i].a);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
17
profiling.c
17
profiling.c
|
|
@ -21,7 +21,7 @@ uint64_t proc_freq = 1000;
|
|||
uint64_t gprof[LIM_GLOBAL_PROF_TYPE] = {0};
|
||||
#endif
|
||||
|
||||
#ifdef SHOW_DATA_PERF
|
||||
//#ifdef SHOW_DATA_PERF
|
||||
/*
|
||||
tdat[0]: read nums
|
||||
tdat[1]: seed-1 full match nums
|
||||
|
|
@ -29,7 +29,7 @@ tdat[1]: seed-1 full match nums
|
|||
int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
|
||||
int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0};
|
||||
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
|
||||
{
|
||||
|
|
@ -47,6 +47,15 @@ int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
|
|||
return 1;
|
||||
}
|
||||
|
||||
int64_t get_sum(int64_t* a, int len) {
|
||||
int i = 0;
|
||||
int64_t all = 0;
|
||||
for (i = 0; i < len; i++) {
|
||||
all += a[i];
|
||||
}
|
||||
return all;
|
||||
}
|
||||
|
||||
int display_stats(int nthreads)
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
|
|
@ -146,5 +155,9 @@ int display_stats(int nthreads)
|
|||
fprintf(stderr, "\n");
|
||||
#endif
|
||||
|
||||
fprintf(stderr, "all msw cnt: %ld\n", get_sum(tdat[TD_MSW_CNT], nthreads));
|
||||
fprintf(stderr, "align 1 cnt: %ld\n", get_sum(tdat[TD_ALIGN_1_CNT], nthreads));
|
||||
fprintf(stderr, "align 2 cnt: %ld\n", get_sum(tdat[TD_ALIGN_2_CNT], nthreads));
|
||||
|
||||
return 1;
|
||||
}
|
||||
10
profiling.h
10
profiling.h
|
|
@ -25,10 +25,10 @@ extern uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD];
|
|||
extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE];
|
||||
#endif
|
||||
|
||||
#ifdef SHOW_DATA_PERF
|
||||
//#ifdef SHOW_DATA_PERF
|
||||
extern int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD];
|
||||
extern int64_t gdat[LIM_GLOBAL_DATA_TYPE];
|
||||
#endif
|
||||
//#endif
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
#if USE_RDTSC
|
||||
|
|
@ -47,6 +47,12 @@ extern uint64_t calc_num;
|
|||
extern uint64_t more_num;
|
||||
extern uint64_t not_more_num;
|
||||
|
||||
enum {
|
||||
TD_ALIGN_1_CNT,
|
||||
TD_ALIGN_2_CNT,
|
||||
TD_MSW_CNT,
|
||||
};
|
||||
|
||||
// GLOBAL
|
||||
enum {
|
||||
G_ALL = 0,
|
||||
|
|
|
|||
9
run.sh
9
run.sh
|
|
@ -1,14 +1,17 @@
|
|||
thread=32
|
||||
thread=64
|
||||
|
||||
make -j 16
|
||||
|
||||
n1=~/data/dataset/D2/n1.fq.gz
|
||||
n2=~/data/dataset/D2/n2.fq.gz
|
||||
|
||||
#n1=/file-data/zzh/dataset/D1/45mr1.fq.gz
|
||||
#n2=/file-data/zzh/dataset/D1/45mr2.fq.gz
|
||||
|
||||
reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta
|
||||
|
||||
#out=/dev/null
|
||||
out=./D2-out.sam
|
||||
out=/dev/null
|
||||
#out=~/data/D2-out.sam
|
||||
prog=./fastalign
|
||||
#prog=/home/zzh/fastbwa
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue