改变了seq与task关联的方式,在线程中将task信息与对应的seq关联
This commit is contained in:
parent
2c1f58fb50
commit
56c687d23d
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@ CC= gcc
|
||||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
||||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||||
|
|
||||||
SAM_EXACT= -DSAM_EXACT
|
SAM_EXACT= #-DSAM_EXACT
|
||||||
SHOW_PERF= -DSHOW_PERF
|
SHOW_PERF= -DSHOW_PERF
|
||||||
SHOW_DATA_PERF= -DSHOW_DATA_PERF
|
SHOW_DATA_PERF= -DSHOW_DATA_PERF
|
||||||
FILTER_FULL_MATCH= #-DFILTER_FULL_MATCH
|
FILTER_FULL_MATCH= #-DFILTER_FULL_MATCH
|
||||||
|
|
|
||||||
1
bwa.h
1
bwa.h
|
|
@ -86,6 +86,7 @@ typedef struct {
|
||||||
int m_name, m_comment, m_seq, m_qual;
|
int m_name, m_comment, m_seq, m_qual;
|
||||||
char *name, *comment, *seq, *qual;
|
char *name, *comment, *seq, *qual;
|
||||||
msw_task_ptr_v msw;
|
msw_task_ptr_v msw;
|
||||||
|
msw_seq_task_v msw_task;
|
||||||
// int_v msw;
|
// int_v msw;
|
||||||
// kstring_t sam;
|
// kstring_t sam;
|
||||||
} bseq1_t;
|
} bseq1_t;
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,15 @@ typedef kvec_t(msw_task_t) msw_task_v;
|
||||||
// mate sw task的指针数组
|
// mate sw task的指针数组
|
||||||
typedef kvec_t(msw_task_t*) msw_task_ptr_v;
|
typedef kvec_t(msw_task_t*) msw_task_ptr_v;
|
||||||
|
|
||||||
|
// 每个seq保留task所在的线程,和数组idx
|
||||||
|
typedef struct {
|
||||||
|
int arr_idx; // 是u8还是i16
|
||||||
|
int thread_idx; // 在哪个线程
|
||||||
|
int task_idx; // 数组内的索引
|
||||||
|
} msw_seq_task_t;
|
||||||
|
|
||||||
|
typedef kvec_t(msw_seq_task_t) msw_seq_task_v;
|
||||||
|
|
||||||
//////////////////////////////////////////////
|
//////////////////////////////////////////////
|
||||||
|
|
||||||
// 用来存放获取的mate sw阶段的ref序列
|
// 用来存放获取的mate sw阶段的ref序列
|
||||||
|
|
|
||||||
34
paired_sam.c
34
paired_sam.c
|
|
@ -130,7 +130,10 @@ static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const ui
|
||||||
p->re = re;
|
p->re = re;
|
||||||
p->seq_id = sid;
|
p->seq_id = sid;
|
||||||
p->aj = a_j;
|
p->aj = a_j;
|
||||||
p->to = task_order++;
|
p->to = task_order++; // 有啥用? 应该是用来合并u8和i16的时候排序用
|
||||||
|
|
||||||
|
msw_seq_task_t seq_task = {0, tid, msw8->n - 1};
|
||||||
|
kv_push(msw_seq_task_t, seq->msw_task, seq_task);
|
||||||
// kv_push(msw_task_t*, seq->msw, p); // 将matesw任务和对应的seq关联起来,这里放指针是不行的,因为指针位置会变,要保存offset才行
|
// kv_push(msw_task_t*, seq->msw, p); // 将matesw任务和对应的seq关联起来,这里放指针是不行的,因为指针位置会变,要保存offset才行
|
||||||
// kv_push(int, seq->msw, msw8->n - 1); // 这里需要考虑i16, 本线程的msw的offset
|
// kv_push(int, seq->msw, msw8->n - 1); // 这里需要考虑i16, 本线程的msw的offset
|
||||||
}
|
}
|
||||||
|
|
@ -556,8 +559,11 @@ no_pairing:
|
||||||
free(h[1].cigar);
|
free(h[1].cigar);
|
||||||
|
|
||||||
end_clear:
|
end_clear:
|
||||||
_destory_clear_vec(s[0].msw);
|
//_destory_clear_vec(s[0].msw);
|
||||||
_destory_clear_vec(s[1].msw);
|
//_destory_clear_vec(s[1].msw);
|
||||||
|
|
||||||
|
_destory_clear_vec(s[0].msw_task);
|
||||||
|
_destory_clear_vec(s[1].msw_task);
|
||||||
|
|
||||||
free(a[0].a);
|
free(a[0].a);
|
||||||
free(a[1].a);
|
free(a[1].a);
|
||||||
|
|
@ -607,8 +613,11 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
||||||
_clear_vec(bb[0]);
|
_clear_vec(bb[0]);
|
||||||
_clear_vec(bb[1]);
|
_clear_vec(bb[1]);
|
||||||
for (i = 0; i < 2; ++i)
|
for (i = 0; i < 2; ++i)
|
||||||
for (j = 0; j < s[!i].msw.n; ++j) {
|
//for (j = 0; j < s[!i].msw.n; ++j) {
|
||||||
msw_task_t* t = s[!i].msw.a[j];
|
// msw_task_t* t = s[!i].msw.a[j];
|
||||||
|
for (k = 0; k < s[!i].msw_task.n; ++k) {
|
||||||
|
msw_seq_task_t st = s[!i].msw_task.a[k];
|
||||||
|
msw_task_t* t = &w->msw->t_msw_tasks_u8[st.thread_idx].a[st.task_idx];
|
||||||
kv_push(mem_alnreg_t, bb[i], a[i].a[t->aj]);
|
kv_push(mem_alnreg_t, bb[i], a[i].a[t->aj]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -619,8 +628,11 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
||||||
int origin_n = a[si].n;
|
int origin_n = a[si].n;
|
||||||
#endif
|
#endif
|
||||||
// 这里应该先给task排序,因为u8和i16是分开排序的,需要合在一起
|
// 这里应该先给task排序,因为u8和i16是分开排序的,需要合在一起
|
||||||
for (k = 0; k < s[si].msw.n; ++k) {
|
//for (k = 0; k < s[si].msw.n; ++k) {
|
||||||
msw_task_t* t = s[si].msw.a[k];
|
// msw_task_t* t = s[si].msw.a[k];
|
||||||
|
for (k = 0; k < s[si].msw_task.n; ++k) {
|
||||||
|
msw_seq_task_t st = s[si].msw_task.a[k];
|
||||||
|
msw_task_t* t = &w->msw->t_msw_tasks_u8[st.thread_idx].a[st.task_idx];
|
||||||
#ifdef SAM_EXACT
|
#ifdef SAM_EXACT
|
||||||
mem_alnreg_t* b = &bb[i].a[k];
|
mem_alnreg_t* b = &bb[i].a[k];
|
||||||
#else
|
#else
|
||||||
|
|
@ -686,7 +698,7 @@ static void gather_matesw_task(mem_worker_t* w, msw_task_v* thread_tasks, msw_ta
|
||||||
for (j = 0; j < thread_tasks[i].n; ++j) {
|
for (j = 0; j < thread_tasks[i].n; ++j) {
|
||||||
msw_task_t* tp = &thread_tasks[i].a[j];
|
msw_task_t* tp = &thread_tasks[i].a[j];
|
||||||
kv_push(msw_task_t*, *tasks, tp);
|
kv_push(msw_task_t*, *tasks, tp);
|
||||||
kv_push(msw_task_t*, w->seqs[tp->seq_id].msw, tp);
|
// kv_push(msw_task_t*, w->seqs[tp->seq_id].msw, tp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -755,7 +767,6 @@ void gen_paired_sam(mem_worker_t* w) {
|
||||||
// 更新stats
|
// 更新stats
|
||||||
PROF_START(update_stats_cache);
|
PROF_START(update_stats_cache);
|
||||||
update_msw_stats(w);
|
update_msw_stats(w);
|
||||||
|
|
||||||
// 开辟缓冲区
|
// 开辟缓冲区
|
||||||
alloc_update_cache_avx512(w);
|
alloc_update_cache_avx512(w);
|
||||||
PROF_END(gprof[G_update_stats_cache], update_stats_cache);
|
PROF_END(gprof[G_update_stats_cache], update_stats_cache);
|
||||||
|
|
@ -765,10 +776,9 @@ void gen_paired_sam(mem_worker_t* w) {
|
||||||
gather_matesw_task(w, w->msw->t_msw_tasks_u8, &w->msw->p_msw_tasks_u8);
|
gather_matesw_task(w, w->msw->t_msw_tasks_u8, &w->msw->p_msw_tasks_u8);
|
||||||
gather_matesw_task(w, w->msw->t_msw_tasks_i16, &w->msw->p_msw_tasks_i16);
|
gather_matesw_task(w, w->msw->t_msw_tasks_i16, &w->msw->p_msw_tasks_i16);
|
||||||
PROF_END(gprof[G_gather_matesw_task], gather_matesw_task);
|
PROF_END(gprof[G_gather_matesw_task], gather_matesw_task);
|
||||||
|
|
||||||
PROF_START(calc_matesw);
|
PROF_START(calc_matesw);
|
||||||
|
|
||||||
int msw_batch_n = (w->msw->p_msw_tasks_u8.n + w->opt->msw_batch_size - 1) / w->opt->msw_batch_size;
|
int msw_batch_n = (w->msw->p_msw_tasks_u8.n + w->opt->msw_batch_size - 1) / w->opt->msw_batch_size;
|
||||||
|
|
||||||
// 3. 处理msw任务
|
// 3. 处理msw任务
|
||||||
if (w->msw->p_msw_tasks_u8.n > 0)
|
if (w->msw->p_msw_tasks_u8.n > 0)
|
||||||
kt_for(w->opt->n_threads, worker_calc_matesw_avx512_u8, w, msw_batch_n);
|
kt_for(w->opt->n_threads, worker_calc_matesw_avx512_u8, w, msw_batch_n);
|
||||||
|
|
@ -777,7 +787,9 @@ void gen_paired_sam(mem_worker_t* w) {
|
||||||
PROF_END(gprof[G_calc_matesw], calc_matesw);
|
PROF_END(gprof[G_calc_matesw], calc_matesw);
|
||||||
|
|
||||||
// 4. 生成sam
|
// 4. 生成sam
|
||||||
|
PROF_START(gen_sam);
|
||||||
kt_for(w->opt->n_threads, workder_gen_sam, w, batch_n);
|
kt_for(w->opt->n_threads, workder_gen_sam, w, batch_n);
|
||||||
|
PROF_END(gprof[G_gen_sam], gen_sam);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "zzh %d : 8: %ld 16: %ld\n", i, w->msw->p_msw_tasks_u8.n, w->msw->p_msw_tasks_i16.n);
|
fprintf(stderr, "zzh %d : 8: %ld 16: %ld\n", i, w->msw->p_msw_tasks_u8.n, w->msw->p_msw_tasks_i16.n);
|
||||||
}
|
}
|
||||||
14
run.sh
14
run.sh
|
|
@ -1,11 +1,11 @@
|
||||||
thread=1
|
thread=64
|
||||||
|
|
||||||
make clean; make -j 32
|
make clean; make -j 32
|
||||||
|
|
||||||
#n1=~/data/dataset/D2/n1.fq.gz
|
n1=~/data/dataset/D2/n1.fq.gz
|
||||||
#n2=~/data/dataset/D2/n2.fq.gz
|
n2=~/data/dataset/D2/n2.fq.gz
|
||||||
n1=~/data/dataset/D2/d1.fq
|
#n1=~/data/dataset/D2/d1.fq
|
||||||
n2=~/data/dataset/D2/d2.fq
|
#n2=~/data/dataset/D2/d2.fq
|
||||||
|
|
||||||
#n1=~/data/SRR25735656_1.fastq.gz
|
#n1=~/data/SRR25735656_1.fastq.gz
|
||||||
#n2=~/data/SRR25735656_2.fastq.gz
|
#n2=~/data/SRR25735656_2.fastq.gz
|
||||||
|
|
@ -17,8 +17,8 @@ reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta
|
||||||
|
|
||||||
#out=/dev/null
|
#out=/dev/null
|
||||||
#out=./oldsam-D2-out.sam
|
#out=./oldsam-D2-out.sam
|
||||||
#out=./D2-1.sam
|
out=./D2-1.sam
|
||||||
out=./d.sam
|
#out=./d.sam
|
||||||
prog=./fastalign
|
prog=./fastalign
|
||||||
#prog=/home/zzh/fastbwa
|
#prog=/home/zzh/fastbwa
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue