From 856a0e0c0140050feae396d88c2a6b8efce09df1 Mon Sep 17 00:00:00 2001 From: zzh Date: Sat, 9 Mar 2024 11:39:40 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E6=88=90=E4=BA=86batch=E6=A8=A1?= =?UTF-8?q?=E5=BC=8F=EF=BC=8C=E5=AF=B9na12878=E6=9C=89=E6=95=88=E6=9E=9C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 1 + Makefile | 2 +- bwa.sh | 12 ++ bwamem.c | 418 +++++++++++++++++++++++++++++++++++++++--- bwamem.h | 20 +- bwamem_pair.c | 276 +++++++++++++++++++++++++++- ert.sh | 14 ++ fastmap.c | 31 +++- fmt_idx.c | 21 +++ fmt_idx.h | 1 + ksw_extend2_avx2_u8.c | 2 +- mem2.sh | 12 ++ na.sh | 4 + run.sh | 14 +- sbwa.sh | 12 ++ utils.h | 2 +- 16 files changed, 792 insertions(+), 50 deletions(-) create mode 100755 bwa.sh create mode 100755 ert.sh create mode 100755 mem2.sh create mode 100755 na.sh create mode 100755 sbwa.sh diff --git a/.gitignore b/.gitignore index cd6e820..9ca8a2b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.[oa] *.txt *.sam +*.log bwa test test64 diff --git a/Makefile b/Makefile index fbf2faa..b371b00 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ CC= gcc # CFLAGS= -g -Wall -Wno-unused-function -O2 CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2 WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS -SHOW_PERF= #-DSHOW_PERF +SHOW_PERF= -DSHOW_PERF AR= ar DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) $(SHOW_PERF) -DUSE_AVX2 -DKSW_EQUAL #-DUSE_MT_READ #-DFILTER_FULL_MATCH LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \ diff --git a/bwa.sh b/bwa.sh new file mode 100755 index 0000000..35b9088 --- /dev/null +++ b/bwa.sh @@ -0,0 +1,12 @@ +thread=32 +n_r1=~/fastq/na12878/na12878_r1.fq +n_r2=~/fastq/na12878/na12878_r2.fq +reference=~/reference/human_g1k_v37_decoy.fasta +out=/dev/null +time /home/zzh/work/bwa/bwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ + $reference \ + $n_r1 \ + $n_r2 \ + -o $out + + diff --git a/bwamem.c b/bwamem.c index 41f38ea..621786d 100644 --- a/bwamem.c +++ b/bwamem.c @@ -149,8 +149,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, } } #ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_seed_1, tmp_time); + a->time_seed_1 += realtime_msec() - tmp_time; tmp_time = realtime_msec(); #endif @@ -174,8 +173,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, } } #ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_seed_2, tmp_time); + a->time_seed_2 = realtime_msec() - tmp_time; tmp_time = realtime_msec(); #endif // 3. third pass: LAST-like @@ -192,8 +190,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const FMTIndex *fmt, int len, } } #ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_seed_3, tmp_time); + a->time_seed_3 += realtime_msec() - tmp_time; #endif #ifdef FILTER_FULL_MATCH @@ -847,8 +844,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf); #endif #ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_ksw_extend2, tmp_time); + aux->time_bsw += realtime_msec() - tmp_time; #endif if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); } if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; @@ -888,8 +884,7 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf); #endif #ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_ksw_extend2, tmp_time); + aux->time_bsw += realtime_msec() - tmp_time; #endif if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; @@ -1338,6 +1333,7 @@ static smem_aux_t *smem_aux_init() a->tmpv[1] = calloc(1, sizeof(bwtintv_v)); a->sw_buf = calloc(1, sizeof(buf_t)); a->seq_buf = calloc(1, sizeof(buf_t)); + a->time_seed_1 = a->time_seed_2 = a->time_seed_3 = a->time_bsw = a->time_sa = 0; return a; } @@ -1352,50 +1348,414 @@ static void smem_aux_destroy(smem_aux_t *a) // 初始化线程需要的数据 mem_worker_t *init_mem_worker(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac) { - int i; + int i = opt->n_threads, j; mem_worker_t *w = calloc(1, sizeof(mem_worker_t)); w->opt = opt; w->bns = bns; w->pac = pac; w->fmt = fmt; - w->aux = malloc(opt->n_threads * sizeof(smem_aux_t)); - for (i = 0; i < opt->n_threads; ++i) + w->calc_isize = 0; w->n = 0; w->regs = 0; + w->aux = malloc(i * sizeof(smem_aux_t)); + w->smem_arr = malloc(i * sizeof(smem_v *)); + w->chain_arr = malloc(i * sizeof(mem_chain_v *)); + w->isize_arr = malloc(i * sizeof(uint64_v *)); + + for (i = 0; i < opt->n_threads; ++i) { w->aux[i] = smem_aux_init(); + w->smem_arr[i] = malloc(opt->batch_size * sizeof(smem_v)); + w->chain_arr[i] = malloc(opt->batch_size * sizeof(mem_chain_v)); + w->isize_arr[i] = calloc(4, sizeof(uint64_v)); + for (j = 0; j < opt->batch_size; ++j) { + kv_init(w->smem_arr[i][j].mem); + kv_init(w->smem_arr[i][j].pos_arr); + kv_init(w->chain_arr[i][j]); + } + } return w; } +static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_v *smem, smem_aux_t *a) +{ + int i, k, x = 0, old_n; + int start_width = 1; + int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); + int max_seed_len = 0; + int start_N_num = 0, start_flag = 1; + smem->mem.n = 0; + +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif + // 1. first pass: find all SMEMs + while (x < len) { + if (seq[x] < 4) { + start_flag = 0; + x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, &a->mem1, a->tmpv[0]); + for (i = 0; i < a->mem1.n; ++i) { + bwtintv_t *p = &a->mem1.a[i]; + int slen = (uint32_t)p->info - (p->info >> 32); // seed length + max_seed_len = MAX(max_seed_len, slen); + if (slen >= opt->min_seed_len) { + kv_push(bwtintv_t, smem->mem, *p); + } + } + //break; // for test full match time + } else { + ++x; + if (start_flag) ++start_N_num; + } + } +#ifdef SHOW_PERF + a->time_seed_1 += realtime_msec() - tmp_time; + tmp_time = realtime_msec(); +#endif + +#ifdef FILTER_FULL_MATCH + if (max_seed_len == len - start_N_num) goto collect_intv_end; +#endif + + // 2. second pass: find MEMs inside a long SMEM + old_n = smem->mem.n; + for (k = 0; k < old_n; ++k) { + bwtintv_t *p = &smem->mem.a[k]; + int start = p->info>>32, end = (int32_t)p->info; + if (end - start < split_len || p->x[2] > opt->split_width) continue; + fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, &a->mem1, a->tmpv[0]); + for (i = 0; i < a->mem1.n; ++i) { + bwtintv_t *p = &a->mem1.a[i]; + int slen = (uint32_t)p->info - (p->info >> 32); + if (slen >= opt->min_seed_len) { + kv_push(bwtintv_t, smem->mem, a->mem1.a[i]); + } + } + } +#ifdef SHOW_PERF + a->time_seed_2 += realtime_msec() - tmp_time; + tmp_time = realtime_msec(); +#endif + // 3. third pass: LAST-like + if (opt->max_mem_intv > 0) { + x = 0; + while (x < len) { + if (seq[x] < 4) { + bwtintv_t m; + x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m); + if (m.x[2] > 0) { + kv_push(bwtintv_t, smem->mem, m); + } + } else ++x; + } + } +#ifdef SHOW_PERF + a->time_seed_3 += realtime_msec() - tmp_time; +#endif + +#ifdef FILTER_FULL_MATCH +collect_intv_end: +#endif + // sort + ks_introsort(mem_intv, smem->mem.n, smem->mem.a); +} + +void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv) +{ + int i; + + if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match + mem_collect_intv_batch(opt, fmt, len, seq, smemv, aux); + smemv->pos_arr.n = 0; + for (i=0; imem.n; ++i) { + bwtintv_t *p = &smemv->mem.a[i]; + int step, count; // seed length + int64_t k; + if (p->num_match > 0) { + uint64_t pos = p->rm[0].rs; + if (p->rm[0].reverse) + pos = (fmt->l_pac << 1) - 1 - pos; + //pos |= 1LL << 63; + kv_push(uint64_t, smemv->pos_arr, pos); + } else { + step = p->x[2] > opt->max_occ ? p->x[2] / opt->max_occ : 1; + for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) + { +#ifdef SHOW_PERF + int64_t tmp_time = realtime_msec(); +#endif + uint64_t pos = fmt_sa_offset(fmt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference +#ifdef SHOW_PERF + aux->time_sa += realtime_msec() - tmp_time; +#endif + kv_push(uint64_t, smemv->pos_arr, pos); + } + } + } +} + + +void generate_chain(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, int len, const uint8_t *seq, smem_v *smemv, mem_chain_v *chain) +{ + int i, j, b, e, l_rep; + int64_t l_pac = bns->l_pac; + kbtree_t(chn) * tree; + chain->n = 0; + + if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match + tree = kb_init(chn, KB_DEFAULT_SIZE); + +#define CHECK_ADD_CHAIN(tmp, lower, upper) \ + int rid, to_add = 0; \ + mem_chain_t tmp, *lower, *upper; \ + tmp.pos = s.rbeg; \ + s.qbeg = p->info >> 32; \ + s.score = s.len = slen; \ + rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len); \ + if (rid < 0) \ + continue; \ + if (kb_size(tree)) \ + { \ + kb_intervalp(chn, tree, &tmp, &lower, &upper); \ + if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid)) \ + to_add = 1; \ + } \ + else \ + to_add = 1; \ + if (to_add) \ + { \ + tmp.n = 1; \ + tmp.m = 4; \ + tmp.seeds = calloc(tmp.m, sizeof(mem_seed_t)); \ + tmp.seeds[0] = s; \ + tmp.rid = rid; \ + tmp.is_alt = !!bns->anns[rid].is_alt; \ + kb_putp(chn, tree, &tmp); \ + } + + for (i = 0, b = e = l_rep = 0; i < smemv->mem.n; ++i) { // compute frac_rep + bwtintv_t *p = &smemv->mem.a[i]; + int sb = (p->info >> 32), se = (uint32_t)p->info; + if (p->x[2] <= opt->max_occ) continue; + if (sb > e) l_rep += e - b, b = sb, e = se; + else e = e > se ? e : se; + } + l_rep += e - b; + for (i = 0, j = 0; i < smemv->mem.n; ++i) { + bwtintv_t *p = &smemv->mem.a[i]; + int step, count, slen = (uint32_t)p->info - (p->info >> 32); // seed length + int64_t k; + if (p->num_match > 0) { + mem_seed_t s; + s.rbeg = smemv->pos_arr.a[j++]; + CHECK_ADD_CHAIN(tmp, lower, upper); + } else { + step = p->x[2] > opt->max_occ ? p->x[2] / opt->max_occ : 1; + for (k = count = 0; k < p->x[2] && count < opt->max_occ; k += step, ++count) + { + mem_seed_t s; + uint64_t sa = smemv->pos_arr.a[j++]; + uint64_t sa_idx = sa << 16 >> 16; + s.rbeg = (sa >> 48) + bwt_get_sa((uint8_t *)fmt->sa, sa_idx); + CHECK_ADD_CHAIN(tmp, lower, upper); + } + } + } + if (chain->m < kb_size(tree)) { + kv_resize(mem_chain_t, *chain, kb_size(tree)); + } + +#define traverse_func(p_) (chain->a[chain->n++] = *(p_)) + __kb_traverse(mem_chain_t, tree, traverse_func); +#undef traverse_func + + for (i = 0; i < chain->n; ++i) chain->a[i].frac_rep = (float)l_rep / len; + if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len); + kb_destroy(chn, tree); +} + +static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *dist) +{ + int64_t p2; + int r1 = (b1 >= l_pac), r2 = (b2 >= l_pac); + p2 = r1 == r2 ? b2 : (l_pac << 1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand + *dist = p2 > b1 ? p2 - b1 : b1 - p2; + return (r1 == r2 ? 0 : 1) ^ (p2 > b1 ? 0 : 3); +} + +static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r) +{ + int j; + for (j = 1; j < r->n; ++j) { // choose unique alignment + int b_max = r->a[j].qb > r->a[0].qb? r->a[j].qb : r->a[0].qb; + int e_min = r->a[j].qe < r->a[0].qe? r->a[j].qe : r->a[0].qe; + if (e_min > b_max) { // have overlap + int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb; + if (e_min - b_max >= min_l * opt->mask_level) break; // significant overlap + } + } + return j < r->n? r->a[j].score : opt->min_seed_len * opt->a; +} +// mem主要流程 +void mem_core_process(const mem_opt_t *opt, const FMTIndex *fmt, const bntseq_t *bns, const uint8_t *pac, + bseq1_t *seq_arr, int nseq, smem_aux_t *aux, smem_v *smem_arr, mem_chain_v *chain_arr, mem_alnreg_v *reg_arr, + int calc_isize, int64_t l_pac, uint64_v *isize) +{ + + int i, j, l_seq; + mem_chain_v *chnp; + mem_alnreg_v *regp; + char *seq; + + // 1. seeding + for (i = 0; i < nseq; ++i) { + seq = seq_arr[i].seq; + l_seq = seq_arr[i].l_seq; + for (j = 0; j < l_seq; ++j) { + seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]]; + } + find_smem(opt, fmt, l_seq, (uint8_t *)seq, aux, &smem_arr[i]); + } + + // 2. chain + for (i=0; in = mem_chain_flt(opt, chnp->n, chnp->a); + mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a, aux->seq_buf); + if (bwa_verbose >= 4) mem_print_chain(bns, chnp); + } + + // 3. align + for (i=0; in; ++j) { + mem_chain_t *p = &chnp->a[j]; + if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", j); + mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux); + free(chnp->a[j].seeds); + } +#ifdef SHOW_PERF + tmp_time = realtime_msec() - tmp_time; + __sync_fetch_and_add(&time_ksw_extend2, tmp_time); +#endif + free(chnp->a); chnp->m = 0; chnp->a = 0; + regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t *)seq, regp->n, regp->a); + if (bwa_verbose >= 4) { + err_printf("* %ld chains remain after removing duplicated chains\n", regp->n); + for (j = 0; j < regp->n; ++j) { + mem_alnreg_t *p = ®p->a[j]; + printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re); + } + } + for (j = 0; j < regp->n; ++j) { + mem_alnreg_t *p = ®p->a[j]; + if (p->rid >= 0 && bns->anns[p->rid].is_alt) + p->is_alt = 1; + } + } + +#if 1 + // 4. calc insert size +#define MIN_RATIO 0.8 + if (calc_isize) { + for (i = 0; i < nseq>>1; ++i) { + int dir; + int64_t is; + mem_alnreg_v *r[2]; + r[0] = (mem_alnreg_v*)®_arr[i<<1|0]; + r[1] = (mem_alnreg_v*)®_arr[i<<1|1]; + if (r[0]->n == 0 || r[1]->n == 0) continue; + if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue; + if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue; + if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr + dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is); + if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is); + } + } +#endif +} +// 找smem,生成chain,然后align +static void worker_smem_align(void *data, int i, int tid) +{ + mem_worker_t *w = (mem_worker_t *)data; + int start = i * w->opt->batch_size; + int end = MIN(start + w->opt->batch_size, w->n_reads); + mem_core_process(w->opt, w->fmt, w->bns, w->pac, w->seqs + start, end - start, w->aux[tid], w->smem_arr[tid], w->chain_arr[tid], w->regs + start, w->calc_isize, w->bns->l_pac, w->isize_arr[tid]); +} + +static void worker_sam(void *data, int i, int tid) +{ + extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]); + extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a); + extern void mem_matesw_batch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t *s, mem_alnreg_v *a, int data_size); + mem_worker_t *w = (mem_worker_t*)data; + int start = i*w->opt->batch_size; + int end = MIN(start + w->opt->batch_size, w->n_reads); + + if (!(w->opt->flag&MEM_F_PE)) { + for (i=start; i= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name); + mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i); + if (w->opt->flag & MEM_F_PRIMARY5) mem_reorder_primary5(w->opt->T, &w->regs[i]); + mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0); + free(w->regs[i].a); + } + } else { + if (!(w->opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment + mem_matesw_batch(w->opt, w->bns, w->pac, w->pes, &w->seqs[start], &w->regs[start], end-start); + } + + for (i=start; i= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i].name); + mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed+i)>>1, &w->seqs[i], &w->regs[i]); + free(w->regs[i].a); free(w->regs[i+1].a); + } + } +} + void mem_process_seqs(const mem_opt_t *opt, mem_worker_t *w, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0) { extern void kt_for(int n_threads, void (*func)(void*,int,int), void *data, int n); mem_pestat_t pes[4]; + int batch_size = opt->batch_size; // 对于pair-end数据,必须是2的整数倍,因为要保证pair-end数据在同一个线程里 + int n_batch = (n + batch_size - 1) / batch_size; double ctime, rtime; - ctime = cputime(); rtime = realtime(); global_bns = w->bns; + w->opt = opt; if (w->n < n) { w->n = n; w->regs = realloc(w->regs, n * sizeof(mem_alnreg_v)); } w->seqs = seqs; w->n_processed = n_processed; - w->pes = &pes[0]; - -#ifdef SHOW_PERF + w->n_reads = n; w->pes = &pes[0]; +#if 1 + if ((opt->flag & MEM_F_PE) && !pes0) { // infer insert sizes if not provided + int i, j; + w->calc_isize = 1; + for (i = 0; i < opt->n_threads; ++i) + for (j = 0; j < 4; ++j) + w->isize_arr[i][j].n = 0; + } +#endif int64_t tmp_time = realtime_msec(); -#endif - kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions -#ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_work1, tmp_time); + //kt_for(opt->n_threads, worker1, w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions + kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions + time_work1 += realtime_msec() - tmp_time; tmp_time = realtime_msec(); -#endif - if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0 - else mem_pestat(opt, w->bns->l_pac, n, w->regs, pes); // otherwise, infer the insert size distribution from data + else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data + //else mem_pestat_old(opt, w->bns->l_pac, n, w->regs, pes); } kt_for(opt->n_threads, worker2, w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment -#ifdef SHOW_PERF - tmp_time = realtime_msec() - tmp_time; - __sync_fetch_and_add(&time_work2, tmp_time); -#endif + //kt_for(opt->n_threads, worker_sam, w, n_batch); + time_work2 += realtime_msec() - tmp_time; //free(w.regs); if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime); diff --git a/bwamem.h b/bwamem.h index 4a615c0..cd5c93b 100644 --- a/bwamem.h +++ b/bwamem.h @@ -72,6 +72,7 @@ typedef struct { int max_occ; // skip a seed if its occurence is larger than this value int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed int n_threads; // number of threads + int batch_size; // batch size of seqs to process at one time int chunk_size; // process chunk_size-bp sequences in a batch float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits float drop_ratio; // drop a chain if its seed coverage is below drop_ratio times the seed coverage of a better chain overlapping with the small chain @@ -144,9 +145,20 @@ typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v; typedef struct { bwtintv_v mem, mem1, *tmpv[2]; buf_t *sw_buf, *seq_buf; + int64_t time_seed_1; + int64_t time_seed_2; + int64_t time_seed_3; + int64_t time_sa; + int64_t time_bsw; } smem_aux_t; typedef struct { + bwtintv_v mem; + uint64_v pos_arr; +} smem_v; + +typedef struct { + int calc_isize; const mem_opt_t *opt; const bwt_t *bwt; const FMTIndex *fmt; @@ -155,12 +167,15 @@ typedef struct { const mem_pestat_t *pes; smem_aux_t **aux; bseq1_t *seqs; + smem_v **smem_arr; + mem_chain_v **chain_arr; mem_alnreg_v *regs; + uint64_v **isize_arr; int64_t n_processed; int64_t n; + int64_t n_reads; } mem_worker_t; - #ifdef __cplusplus extern "C" { #endif @@ -243,7 +258,8 @@ extern "C" { * @param regs region array of size $n; 2i-th and (2i+1)-th elements constitute a pair * @param pes inferred insert size distribution (output) */ - void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]); + void mem_pestat_old(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]); + void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, uint64_v **isize_arr, mem_pestat_t pes[4]); #ifdef __cplusplus } diff --git a/bwamem_pair.c b/bwamem_pair.c index bba5c2b..5855779 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -46,6 +46,29 @@ #define MAPPING_BOUND 3.0 #define MAX_STDDEV 4.0 +typedef struct { + int is_rev; + int qlen; + int tlen; + int xtra; + int64_t rb; + uint8_t *seq; + uint8_t *ref; + const mem_alnreg_t *a; + mem_alnreg_v *ma; +} align_arg_t; + +typedef kvec_t(align_arg_t) align_arg_v; + +typedef struct _matesw_node { + int l_seq; + int b_offset; + uint8_t *seq; + mem_alnreg_v *b; + mem_alnreg_v *a; + struct _matesw_node *next; +} matesw_node; + static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *dist) { int64_t p2; @@ -69,7 +92,66 @@ static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r) return j < r->n? r->a[j].score : opt->min_seed_len * opt->a; } -void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]) +void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, uint64_v **isize_arr, mem_pestat_t pes[4]) +{ + int i, j, d, max; + uint64_v isize[4]; + memset(pes, 0, 4 * sizeof(mem_pestat_t)); + memset(isize, 0, sizeof(kvec_t(int)) * 4); + for (i=0; in_threads; ++i) { + for (d=0; d<4; ++d) { + for (j=0; j= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n); + for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two. + mem_pestat_t *r = &pes[d]; + uint64_v *q = &isize[d]; + int p25, p50, p75, x; + if (q->n < MIN_DIR_CNT) { + fprintf(stderr, "[M::%s] skip orientation %c%c as there are not enough pairs\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + r->failed = 1; + free(q->a); + continue; + } else fprintf(stderr, "[M::%s] analyzing insert size distribution for orientation %c%c...\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + ks_introsort_64(q->n, q->a); + p25 = q->a[(int)(.25 * q->n + .499)]; + p50 = q->a[(int)(.50 * q->n + .499)]; + p75 = q->a[(int)(.75 * q->n + .499)]; + r->low = (int)(p25 - OUTLIER_BOUND * (p75 - p25) + .499); + if (r->low < 1) r->low = 1; + r->high = (int)(p75 + OUTLIER_BOUND * (p75 - p25) + .499); + fprintf(stderr, "[M::%s] (25, 50, 75) percentile: (%d, %d, %d)\n", __func__, p25, p50, p75); + fprintf(stderr, "[M::%s] low and high boundaries for computing mean and std.dev: (%d, %d)\n", __func__, r->low, r->high); + for (i = x = 0, r->avg = 0; i < q->n; ++i) + if (q->a[i] >= r->low && q->a[i] <= r->high) + r->avg += q->a[i], ++x; + r->avg /= x; + for (i = 0, r->std = 0; i < q->n; ++i) + if (q->a[i] >= r->low && q->a[i] <= r->high) + r->std += (q->a[i] - r->avg) * (q->a[i] - r->avg); + r->std = sqrt(r->std / x); + fprintf(stderr, "[M::%s] mean and std.dev: (%.2f, %.2f)\n", __func__, r->avg, r->std); + r->low = (int)(p25 - MAPPING_BOUND * (p75 - p25) + .499); + r->high = (int)(p75 + MAPPING_BOUND * (p75 - p25) + .499); + if (r->low > r->avg - MAX_STDDEV * r->std) r->low = (int)(r->avg - MAX_STDDEV * r->std + .499); + if (r->high < r->avg + MAX_STDDEV * r->std) r->high = (int)(r->avg + MAX_STDDEV * r->std + .499); + if (r->low < 1) r->low = 1; + fprintf(stderr, "[M::%s] low and high boundaries for proper pairs: (%d, %d)\n", __func__, r->low, r->high); + free(q->a); + } + for (d = 0, max = 0; d < 4; ++d) + max = max > isize[d].n? max : isize[d].n; + for (d = 0; d < 4; ++d) + if (pes[d].failed == 0 && isize[d].n < max * MIN_DIR_RATIO) { + pes[d].failed = 1; + fprintf(stderr, "[M::%s] skip orientation %c%c\n", __func__, "FR"[d>>1&1], "FR"[d&1]); + } +} + +void mem_pestat_old(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]) { int i, d, max; uint64_v isize[4]; @@ -205,6 +287,123 @@ int mem_matesw(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co return n; } +static inline void add_aln(const mem_opt_t *opt, const bntseq_t *bns, align_arg_t *arg, kswr_t *aln) +{ + extern int mem_sort_dedup_patch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, uint8_t *query, int n, mem_alnreg_t *a); + + mem_alnreg_t b; + int is_rev = arg->is_rev; + const mem_alnreg_t *a = arg->a; + int64_t l_pac = bns->l_pac; + int64_t rb = arg->rb; + mem_alnreg_v *ma = arg->ma; + int i, tmp; + + memset(&b, 0, sizeof(mem_alnreg_t)); + b.rid = a->rid; + b.is_alt = a->is_alt; + b.qb = is_rev? arg->qlen - (aln->qe + 1) : aln->qb; + b.qe = is_rev? arg->qlen - aln->qb : aln->qe + 1; + b.rb = is_rev? (l_pac<<1) - (rb + aln->te + 1) : rb + aln->tb; + b.re = is_rev? (l_pac<<1) - (rb + aln->tb) : rb + aln->te + 1; + b.score = aln->score; + b.csub = aln->score2; + b.secondary = -1; + b.seedcov = (b.re - b.rb < b.qe - b.qb? b.re - b.rb : b.qe - b.qb) >> 1; + kv_push(mem_alnreg_t, *ma, b); // make room for a new element + // move b s.t. ma is sorted + for (i = 0; i < ma->n - 1; ++i) // find the insertion point + if (ma->a[i].score < b.score) break; + tmp = i; + for (i = ma->n - 1; i > tmp; --i) ma->a[i] = ma->a[i-1]; + ma->a[i] = b; + ma->n = mem_sort_dedup_patch(opt, 0, 0, 0, ma->n, ma->a); +} + +void ksw_align2_wrap(const mem_opt_t *opt, const bntseq_t *bns, align_arg_t *arg) +{ + kswr_t aln; + aln = ksw_align2(arg->qlen, arg->seq, arg->tlen, arg->ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, arg->xtra, 0); + if (aln.score >= opt->min_seed_len && aln.qb >= 0) { // something goes wrong if aln.qb < 0 + add_aln(opt, bns, arg, &aln); + } + if (arg->is_rev) free(arg->seq); + free(arg->ref); +} + +void ksw_align2_wrap2(const mem_opt_t *opt, const bntseq_t *bns, align_arg_t *arg1, align_arg_t *arg2) +{ + kswr_t aln1, aln2; + aln1 = ksw_align2(arg1->qlen, arg1->seq, arg1->tlen, arg1->ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, arg1->xtra, 0); + aln2 = ksw_align2(arg2->qlen, arg2->seq, arg2->tlen, arg2->ref, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, arg2->xtra, 0); + if (aln1.score >= opt->min_seed_len && aln1.qb >= 0) { // something goes wrong if aln.qb < 0 + add_aln(opt, bns, arg1, &aln1); + } + if (aln2.score >= opt->min_seed_len && aln2.qb >= 0) { + add_aln(opt, bns, arg2, &aln2); + } + if (arg1->is_rev) free(arg1->seq); + if (arg2->is_rev) free(arg2->seq); + free(arg1->ref); + free(arg2->ref); +} + +int mem_matesw_new(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], const mem_alnreg_t *a, int l_ms, const uint8_t *ms, mem_alnreg_v *ma, align_arg_v *argv) +{ + int64_t l_pac = bns->l_pac; + int i, r, skip[4], n = 0, rid; + for (r = 0; r < 4; ++r) + skip[r] = pes[r].failed? 1 : 0; + for (i = 0; i < ma->n; ++i) { // check which orinentation has been found + int64_t dist; + r = mem_infer_dir(l_pac, a->rb, ma->a[i].rb, &dist); + if (dist >= pes[r].low && dist <= pes[r].high) + skip[r] = 1; + } + if (skip[0] + skip[1] + skip[2] + skip[3] == 4) return 0; // consistent pair exist; no need to perform SW + for (r = 0; r < 4; ++r) { + int is_rev, is_larger; + uint8_t *seq, *rev = 0, *ref = 0; + int64_t rb, re; + if (skip[r]) continue; + is_rev = (r>>1 != (r&1)); // whether to reverse complement the mate + is_larger = !(r>>1); // whether the mate has larger coordinate + if (!is_rev) { + rb = is_larger? a->rb + pes[r].low : a->rb - pes[r].high; + re = (is_larger? a->rb + pes[r].high: a->rb - pes[r].low) + l_ms; // if on the same strand, end position should be larger to make room for the seq length + } else { + rb = (is_larger? a->rb + pes[r].low : a->rb - pes[r].high) - l_ms; // similarly on opposite strands + re = is_larger? a->rb + pes[r].high: a->rb - pes[r].low; + } + if (rb < 0) rb = 0; + if (re > l_pac<<1) re = l_pac<<1; + if (rb < re) ref = bns_fetch_seq(bns, pac, &rb, (rb+re)>>1, &re, &rid); + if (a->rid == rid && re - rb >= opt->min_seed_len) { // no funny things happening + if (is_rev) { + rev = malloc(l_ms); // this is the reverse complement of $ms + for (i = 0; i < l_ms; ++i) rev[l_ms - 1 - i] = ms[i] < 4? 3 - ms[i] : 4; + seq = rev; + } else seq = (uint8_t*)ms; + int xtra = KSW_XSUBO | KSW_XSTART | (l_ms * opt->a < 250? KSW_XBYTE : 0) | (opt->min_seed_len * opt->a); + align_arg_t *arg = kv_pushp(align_arg_t, *argv); + + arg->is_rev = is_rev; + arg->qlen = l_ms; + arg->tlen = re - rb; + arg->xtra = xtra; + arg->rb = rb; + arg->seq = seq; + arg->ref = ref; + arg->a = a; + arg->ma = ma; + ++n; + } else { + free(ref); + } + } + return n; +} + int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2], int n_pri[2]) { pair64_v v, u; @@ -268,6 +467,81 @@ int mem_pair(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, cons return ret; } +#define NEW_NODE(bidx, aidx) \ + if (barr[bidx].n) { \ + matesw_node *node; \ + tail->next = malloc(sizeof(matesw_node)); \ + node = tail->next; \ + node->l_seq = s[aidx].l_seq; \ + node->b_offset = 0; \ + node->seq = (uint8_t*)s[aidx].seq; \ + node->b = barr + bidx; \ + node->a = a + aidx; \ + node->next = 0; \ + tail = node; \ + } + +void mem_matesw_batch(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t *s, mem_alnreg_v *a, int data_size) +{ + int i, j; + align_arg_v argv; + mem_alnreg_v *barr = calloc(data_size, sizeof(mem_alnreg_v)); + matesw_node head; + matesw_node *tail = &head, *pre = &head; + tail->next = 0; + kv_init(argv); + for (i = 0; i < data_size; i+=2) { + for (j = 0; j < a[i].n; ++j) { + if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired && j < opt->max_matesw) { + kv_push(mem_alnreg_t, barr[i], a[i].a[j]); + } + } + NEW_NODE(i, i+1); + for (j = 0; j < a[i+1].n; ++j) { + if (a[i+1].a[j].score >= a[i+1].a[0].score - opt->pen_unpaired && j < opt->max_matesw) { + kv_push(mem_alnreg_t, barr[i+1], a[i+1].a[j]); + } + } + NEW_NODE(i+1, i); + } + + tail = head.next; + while(tail) { + argv.n = 0; + while (tail) { + int nn = 0; + while (tail->b_offset < tail->b->n && !nn) { + nn = mem_matesw_new(opt, bns, pac, pes, &tail->b->a[tail->b_offset], tail->l_seq, tail->seq, tail->a, &argv); + ++ tail->b_offset; + } + if (!nn || tail->b_offset == tail->b->n) { // delete node + tail = tail->next; + free(pre->next); + pre->next = tail; + } else { + pre = tail; + tail = tail->next; + } + } + if (argv.n) { + for (i=0; i= 0) { + while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:b:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; else if (c == '1') no_mt_io = 1; else if (c == 'x') mode = optarg; @@ -193,6 +193,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1; else if (c == 'U') opt->pen_unpaired = atoi(optarg), opt0.pen_unpaired = 1; else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1; + else if (c == 'b') opt->batch_size = atoi(optarg) >> 1 << 1, opt->batch_size = opt->batch_size > 1? opt->batch_size : 512; else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; else if (c == 'a') opt->flag |= MEM_F_ALL; else if (c == 'p') opt->flag |= MEM_F_PE | MEM_F_SMARTPE; @@ -291,11 +292,15 @@ int main_mem(int argc, char *argv[]) } if (opt->n_threads < 1) opt->n_threads = 1; + if (opt->batch_size < 1) opt->batch_size = 512; + fprintf(stderr, "batch_size: %d\n", opt->batch_size); + if (optind + 1 >= argc || optind + 3 < argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); fprintf(stderr, "Algorithm options:\n\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); + fprintf(stderr, " -b INT batch size of reads to process at one time [%d]\n", opt->batch_size); fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); fprintf(stderr, " -d INT off-diagonal X-dropoff [%d]\n", opt->zdrop); @@ -436,15 +441,23 @@ int main_mem(int argc, char *argv[]) } #ifdef SHOW_PERF + int64_t avg_seed_1 = 0, avg_seed_2 = 0, avg_seed_3 = 0, avg_sa = 0, avg_bsw = 0; + for (i = 0; i < opt->n_threads; ++i) { + avg_seed_1 += aux.w->aux[i]->time_seed_1; + avg_seed_2 += aux.w->aux[i]->time_seed_2; + avg_seed_3 += aux.w->aux[i]->time_seed_3; + avg_sa += aux.w->aux[i]->time_sa; + avg_bsw += aux.w->aux[i]->time_bsw; + } fprintf(stderr, "\n"); - fprintf(stderr, "time_work1: %f s\n", time_work1 / 1000.0 / 1); - fprintf(stderr, "time_work2: %f s\n", time_work2 / 1000.0 / 1); - fprintf(stderr, "time_flt_chain: %f s\n", time_flt_chain / 1000.0 / opt->n_threads); - fprintf(stderr, "time_seed_1: %f s\n", time_seed_1 / 1000.0 / opt->n_threads); - fprintf(stderr, "time_seed_2: %f s\n", time_seed_2 / 1000.0 / opt->n_threads); - fprintf(stderr, "time_seed_3: %f s\n", time_seed_3 / 1000.0 / opt->n_threads); - fprintf(stderr, "time_bwt_sa: %f s\n", time_bwt_sa / 1000.0 / opt->n_threads); - fprintf(stderr, "time_ksw_extend2: %f s\n", time_ksw_extend2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_work1: %f s\n", time_work1 / 1000.0); + fprintf(stderr, "time_work2: %f s\n", time_work2 / 1000.0); + fprintf(stderr, "time_process_seq: %f s\n", (time_work1 + time_work2) / 1000.0); + fprintf(stderr, "time_seed_1: %f s\n", avg_seed_1 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_seed_2: %f s\n", avg_seed_2 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_seed_3: %f s\n", avg_seed_3 / 1000.0 / opt->n_threads); + fprintf(stderr, "time_bwt_sa: %f s\n", avg_sa / 1000.0 / opt->n_threads); + fprintf(stderr, "time_bsw: %f s\n", avg_bsw / 1000.0 / opt->n_threads); fprintf(stderr, "\n"); fclose(gfp1); diff --git a/fmt_idx.c b/fmt_idx.c index b00e385..4bc5eb1 100644 --- a/fmt_idx.c +++ b/fmt_idx.c @@ -1005,4 +1005,25 @@ bwtint_t fmt_sa(const FMTIndex *fmt, bwtint_t k) } sa += bwt_get_sa(fmt->sa, k / fmt->sa_intv); return sa; +} + +bwtint_t fmt_sa_offset(const FMTIndex *fmt, bwtint_t k) +{ + bwtint_t sa = 0, mask = fmt->sa_intv - 1; + bwtint_t k1, k2; + while (k & mask) + { + ++sa; + fmt_previous_line(fmt, k, &k1, &k2); + __builtin_prefetch(fmt_occ_intv(fmt, k2), 0, 2); + if (!(k1 & mask)) + { + k = k1; + break; + } + ++sa; + k = k2; + } + sa = (sa << 48) | (k / fmt->sa_intv); + return sa; } \ No newline at end of file diff --git a/fmt_idx.h b/fmt_idx.h index 6774bc9..a102ce0 100644 --- a/fmt_idx.h +++ b/fmt_idx.h @@ -112,5 +112,6 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem); bwtint_t fmt_sa(const FMTIndex *fmt, bwtint_t k); +bwtint_t fmt_sa_offset(const FMTIndex *fmt, bwtint_t k); #endif \ No newline at end of file diff --git a/ksw_extend2_avx2_u8.c b/ksw_extend2_avx2_u8.c index 5aada2a..e0c277d 100644 --- a/ksw_extend2_avx2_u8.c +++ b/ksw_extend2_avx2_u8.c @@ -436,7 +436,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长 SWAP_DATA_POINTER; } -// free(mem); + //free(mem); if (_qle) *_qle = max_j + 1; if (_tle) *_tle = max_i + 1; if (_gtle) *_gtle = max_ie + 1; diff --git a/mem2.sh b/mem2.sh new file mode 100755 index 0000000..038fdd6 --- /dev/null +++ b/mem2.sh @@ -0,0 +1,12 @@ +thread=32 +n_r1=~/fastq/na12878/na12878_r1.fq +n_r2=~/fastq/na12878/na12878_r2.fq +reference=~/reference/mem2/human_g1k_v37_decoy.fasta +out=/dev/null +time /home/zzh/work/bwa-mem2/bwa-mem2 mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ + $reference \ + $n_r1 \ + $n_r2 \ + -o $out + + diff --git a/na.sh b/na.sh new file mode 100755 index 0000000..2c6ed5f --- /dev/null +++ b/na.sh @@ -0,0 +1,4 @@ +time ./bwa.sh &> bwa-na12878.log +time ./sbwa.sh &> sbwa-na12878.log +time ./mem2.sh &> mem2-na12878.log +time ./ert.sh &> ert-na12878.log diff --git a/run.sh b/run.sh index 8adaa60..7b75b29 100755 --- a/run.sh +++ b/run.sh @@ -1,10 +1,12 @@ thread=32 -n_r1=~/fastq/ZY2105177532213010_L4_1.fq -n_r2=~/fastq/ZY2105177532213010_L4_2.fq +#n_r1=~/fastq/ZY2105177532213010_L4_1.fq +#n_r2=~/fastq/ZY2105177532213010_L4_2.fq #n_r1=~/fastq/na12878/nas_1.fq #n_r2=~/fastq/na12878/nas_2.fq -#n_r1=~/fastq/na12878/na_1.fq -#n_r2=~/fastq/na12878/na_2.fq +n_r1=~/data/fastq/na12878/na_1.fq +n_r2=~/data/fastq/na12878/na_2.fq +#n_r1=~/fastq/na12878/na12878_r1.fq +#n_r2=~/fastq/na12878/na12878_r2.fq #n_r1=~/fastq/n_r1.fq #n_r2=~/fastq/n_r2.fq #n_r1=~/data/fastq/ZY2105177532213000/n_r1.fq @@ -24,7 +26,7 @@ n_r2=~/fastq/ZY2105177532213010_L4_2.fq #n_r2=~/fastq/diff_r2.fq #n_r1=~/fastq/d_r1.fq #n_r2=~/fastq/d_r2.fq -reference=~/reference/human_g1k_v37_decoy.fasta +reference=~/data/reference/human_g1k_v37_decoy.fasta #out=./all.sam #out=./sn.sam #out=./ssn-x1.sam @@ -42,7 +44,7 @@ out=/dev/null # /mnt/d/data/fastq/ZY2105177532213000/ZY2105177532213010_L4_2.fq.gz \ # -o /dev/null -time ./bwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ +time ./bwa mem -t $thread -b 256 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ $reference \ $n_r1 \ $n_r2 \ diff --git a/sbwa.sh b/sbwa.sh new file mode 100755 index 0000000..2ad7e73 --- /dev/null +++ b/sbwa.sh @@ -0,0 +1,12 @@ +thread=32 +n_r1=~/fastq/na12878/na12878_r1.fq +n_r2=~/fastq/na12878/na12878_r2.fq +reference=~/reference/human_g1k_v37_decoy.fasta +out=/dev/null +time /home/zzh/work/sbwa/bwa mem -t $thread -b 256 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ + $reference \ + $n_r1 \ + $n_r2 \ + -o $out + + diff --git a/utils.h b/utils.h index 764cc45..6f1cb4e 100644 --- a/utils.h +++ b/utils.h @@ -46,7 +46,7 @@ extern int64_t time_ksw_extend2, time_bwt_sa, time_bwt_sa_read, time_bns, - time_work1, +time_work1, time_work2, time_flt_chain;