处理read seq的潜在的bug
This commit is contained in:
parent
fd21021e6a
commit
e701805337
2
Makefile
2
Makefile
|
|
@ -2,7 +2,7 @@ CC= gcc
|
|||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -mavx512bw -O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
|
||||
SAM_EXACT= #-DSAM_EXACT
|
||||
SAM_EXACT= -DSAM_EXACT
|
||||
SHOW_PERF= -DSHOW_PERF
|
||||
SHOW_DATA_PERF= -DSHOW_DATA_PERF
|
||||
FILTER_FULL_MATCH= #-DFILTER_FULL_MATCH
|
||||
|
|
|
|||
5
bwa.c
5
bwa.c
|
|
@ -164,7 +164,10 @@ void bseq_read_pe_mt(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_c
|
|||
*n_ = n; *seqs_ptr = seqs; *size_ = size; *m_ = n;
|
||||
return;
|
||||
}
|
||||
m = (chunk_size + size / init_n_reads - 1) / (size / init_n_reads);
|
||||
float bases_per_read = size * 1.0 / init_n_reads;
|
||||
m = (chunk_size + bases_per_read * 100) / (bases_per_read); // 多开辟100个reads的冗余空间
|
||||
|
||||
// m = (chunk_size + size / init_n_reads - 1) / (size / init_n_reads);
|
||||
#else
|
||||
m = 50000;
|
||||
#endif
|
||||
|
|
|
|||
2
bwa.h
2
bwa.h
|
|
@ -85,7 +85,7 @@ typedef struct {
|
|||
int l_seq, id;
|
||||
int m_name, m_comment, m_seq, m_qual;
|
||||
char *name, *comment, *seq, *qual;
|
||||
msw_task_ptr_v msw;
|
||||
// msw_task_ptr_v msw;
|
||||
msw_seq_task_v msw_task;
|
||||
// int_v msw;
|
||||
// kstring_t sam;
|
||||
|
|
|
|||
5
bwamem.c
5
bwamem.c
|
|
@ -1248,7 +1248,7 @@ static void worker_sam(void *data, int i, int tid)
|
|||
}
|
||||
}
|
||||
|
||||
static void worker_sam_single_end(void *data, int i, int tid)
|
||||
static void worker_sam_single_end(void *data, long i, int tid)
|
||||
{
|
||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], seq_sam_t ss[2], int tid);
|
||||
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
|
||||
|
|
@ -1641,7 +1641,7 @@ void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t
|
|||
}
|
||||
|
||||
// smem,chain,align
|
||||
static void worker_smem_align(void *data, int i, int tid)
|
||||
static void worker_smem_align(void *data, long i, int tid)
|
||||
{
|
||||
mem_worker_t *w = (mem_worker_t *)data;
|
||||
int start = i * w->opt->batch_size;
|
||||
|
|
@ -1651,7 +1651,6 @@ static void worker_smem_align(void *data, int i, int tid)
|
|||
|
||||
void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0, seq_sam_t *sams)
|
||||
{
|
||||
extern void kt_for(int n_threads, void (*func)(void *, int, int), void *data, int n);
|
||||
PROF_START(mem_prepare);
|
||||
mem_pestat_t pes[4];
|
||||
int batch_size = opt->batch_size; // pair-end,2,pair-end
|
||||
|
|
|
|||
|
|
@ -376,7 +376,7 @@ int main_mem(int argc, char *argv[])
|
|||
PROF_START(all);
|
||||
PROF_START(prepare);
|
||||
mem_opt_t *opt, opt0;
|
||||
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 0;
|
||||
int fd, fd2, i, c, ignore_alt = 0, no_mt_io = 2;
|
||||
int fixed_chunk_size = -1;
|
||||
gzFile fp, fp2 = 0;
|
||||
char *p, *rg_line = 0, *hdr_line = 0;
|
||||
|
|
|
|||
28
paired_sam.c
28
paired_sam.c
|
|
@ -7,9 +7,11 @@
|
|||
#include "ksw_align_avx.h"
|
||||
#include "kvec.h"
|
||||
#include "debug.h"
|
||||
#include "mate_sw.h"
|
||||
#include "profiling.h"
|
||||
|
||||
// 原版生成sam函数
|
||||
static void worker_sam(void* data, int i, int tid) {
|
||||
static void worker_sam(void* data, long i, int tid) {
|
||||
mem_worker_t* w = (mem_worker_t*)data;
|
||||
if (bwa_verbose >= 4)
|
||||
printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i << 1 | 0].name);
|
||||
|
|
@ -140,7 +142,7 @@ static void get_matesw_tasks(const mem_opt_t* opt, const bntseq_t* bns, const ui
|
|||
}
|
||||
}
|
||||
// 先计算哪些read需要做matesw
|
||||
static void worker_matesw_tasks(void* data, int idx, int tid) {
|
||||
static void worker_matesw_tasks(void* data, long idx, int tid) {
|
||||
mem_worker_t* w = (mem_worker_t*)data;
|
||||
const mem_opt_t* opt = w->opt;
|
||||
int startIdx = idx * opt->batch_size;
|
||||
|
|
@ -160,7 +162,7 @@ static void worker_matesw_tasks(void* data, int idx, int tid) {
|
|||
int64_t i_s = id;
|
||||
bseq1_t* s = &w->seqs[i_s];
|
||||
mem_alnreg_v* a = &w->regs[i_s];
|
||||
s->msw.n = 0; // 清空之前的matesw数据
|
||||
//s->msw.n = 0; // 清空之前的matesw数据
|
||||
int i, j;
|
||||
|
||||
_clear_vec(b[0]);
|
||||
|
|
@ -259,7 +261,7 @@ static void msw_avx512_u8(const mem_opt_t* opt, int num_tasks, msw_buf_t* msw_bu
|
|||
}
|
||||
|
||||
// 再多线程计算matesw,利用inter-query的simd并行
|
||||
static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
||||
static void worker_calc_matesw_avx512_u8(void* data, long idx, int tid) {
|
||||
mem_worker_t* w = (mem_worker_t*)data;
|
||||
const mem_opt_t* opt = w->opt;
|
||||
int startIdx = idx * w->opt->msw_batch_size;
|
||||
|
|
@ -364,7 +366,7 @@ static void worker_calc_matesw_avx512_u8(void* data, int idx, int tid) {
|
|||
kv_destroy(msw_task_ids);
|
||||
}
|
||||
|
||||
static void worker_calc_matesw_avx512_i16(void* data, int i, int tid) {}
|
||||
static void worker_calc_matesw_avx512_i16(void* data, long idx, int tid) {}
|
||||
|
||||
// 检测添加新的align
|
||||
static int check_add_align(const mem_opt_t* opt, kswr_avx_t aln, int is_rev, int64_t l_pac, mem_alnreg_t* a, int l_ms, uint8_t* ms,
|
||||
|
|
@ -570,7 +572,7 @@ end_clear:
|
|||
}
|
||||
|
||||
// 最后再计算并生成sam数据
|
||||
static void workder_gen_sam(void* data, int idx, int tid) {
|
||||
static void workder_gen_sam(void* data, long idx, int tid) {
|
||||
mem_worker_t* w = (mem_worker_t*)data;
|
||||
const mem_opt_t* opt = w->opt;
|
||||
const bntseq_t* bns = w->bns;
|
||||
|
|
@ -689,8 +691,10 @@ static void workder_gen_sam(void* data, int idx, int tid) {
|
|||
|
||||
// 划分matesw任务
|
||||
static void gather_matesw_task(mem_worker_t* w, msw_task_v** thread_tasks) {
|
||||
_clear_vec(w->msw->p_msw_tasks_u8);
|
||||
_clear_vec(w->msw->p_msw_tasks_i16);
|
||||
//_clear_vec(w->msw->p_msw_tasks_u8);
|
||||
//_clear_vec(w->msw->p_msw_tasks_i16);
|
||||
_destory_clear_vec(w->msw->p_msw_tasks_u8);
|
||||
_destory_clear_vec(w->msw->p_msw_tasks_i16);
|
||||
int i = 0, j = 0;
|
||||
for (i = 0; i < w->opt->n_threads; ++i) {
|
||||
for (j = 0; j < thread_tasks[i][0].n; ++j) {
|
||||
|
|
@ -756,8 +760,10 @@ void gen_paired_sam(mem_worker_t* w) {
|
|||
// 清空一下任务数组,不能放到线程里去做
|
||||
int i = 0;
|
||||
for (i = 0; i < w->opt->n_threads; ++i) {
|
||||
_clear_vec(w->msw->t_msw_tasks[i][0]);
|
||||
_clear_vec(w->msw->t_msw_tasks[i][1]);
|
||||
//_clear_vec(w->msw->t_msw_tasks[i][0]);
|
||||
//_clear_vec(w->msw->t_msw_tasks[i][1]);
|
||||
_destory_clear_vec(w->msw->t_msw_tasks[i][0]);
|
||||
_destory_clear_vec(w->msw->t_msw_tasks[i][1]);
|
||||
}
|
||||
int batch_n = (w->n_reads + w->opt->batch_size - 1) / w->opt->batch_size;
|
||||
|
||||
|
|
@ -797,4 +803,4 @@ void gen_paired_sam(mem_worker_t* w) {
|
|||
PROF_END(gprof[G_gen_sam], gen_sam);
|
||||
}
|
||||
fprintf(stderr, "zzh : u8: %ld i16: %ld\n", w->msw->p_msw_tasks_u8.n, w->msw->p_msw_tasks_i16.n);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ extern int mem_sam_pe(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t*
|
|||
|
||||
extern void mem_reg2ovlp(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, bseq1_t* s, mem_alnreg_v* a);
|
||||
|
||||
extern void kt_for(int n_threads, void (*func)(void*, int, int), void* data, int n);
|
||||
extern void kt_for(int n_threads, void (*func)(void*, long, int), void* data, int n);
|
||||
|
||||
extern int mem_sort_dedup_patch(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, uint8_t* query, int n, mem_alnreg_t* a);
|
||||
|
||||
|
|
@ -41,4 +41,4 @@ extern char** mem_gen_alt(const mem_opt_t* opt, const bntseq_t* bns, const uint8
|
|||
extern void mem_reorder_primary5(int T, mem_alnreg_v* a);
|
||||
|
||||
extern int mem_pair(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id,
|
||||
int* sub, int* n_sub, int z[2], int n_pri[2]);
|
||||
int* sub, int* n_sub, int z[2], int n_pri[2]);
|
||||
|
|
|
|||
16
run.sh
16
run.sh
|
|
@ -1,11 +1,11 @@
|
|||
thread=1
|
||||
thread=64
|
||||
|
||||
make clean; make -j 32
|
||||
|
||||
#n1=~/data/dataset/D2/n1.fq.gz
|
||||
#n2=~/data/dataset/D2/n2.fq.gz
|
||||
n1=~/data/dataset/D2/d1.fq
|
||||
n2=~/data/dataset/D2/d2.fq
|
||||
n1=~/data/dataset/D2/n1.fq.gz
|
||||
n2=~/data/dataset/D2/n2.fq.gz
|
||||
#n1=~/data/dataset/D2/d1.fq
|
||||
#n2=~/data/dataset/D2/d2.fq
|
||||
|
||||
#n1=~/data/SRR25735656_1.fastq.gz
|
||||
#n2=~/data/SRR25735656_2.fastq.gz
|
||||
|
|
@ -15,10 +15,10 @@ n2=~/data/dataset/D2/d2.fq
|
|||
|
||||
reference=~/data/reference/fmt/human_g1k_v37_decoy.fasta
|
||||
|
||||
#out=/dev/null
|
||||
out=/dev/null
|
||||
#out=./oldsam-D2-out.sam
|
||||
#out=./D2-1.sam
|
||||
out=./d.sam
|
||||
#out=./d.sam
|
||||
prog=./fastalign
|
||||
#prog=/home/zzh/fastbwa
|
||||
|
||||
|
|
@ -26,4 +26,4 @@ time $prog mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:n
|
|||
$reference \
|
||||
$n1 \
|
||||
$n2 \
|
||||
-o $out -2
|
||||
-o $out
|
||||
|
|
|
|||
Loading…
Reference in New Issue