diff --git a/Makefile b/Makefile index 3c4abeb..263ee60 100644 --- a/Makefile +++ b/Makefile @@ -15,7 +15,7 @@ AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ fmt_idx.o PROG= bwa INCLUDES= -LIBS= -lm -lz -lpthread +LIBS= -lm -lz -lpthread -ldl SUBDIRS= . ifeq ($(shell uname -s),Linux) @@ -32,6 +32,9 @@ all:$(PROG) bwa:libbwa.a $(AOBJS) main.o $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) main.o -o $@ -L. -lbwa $(LIBS) +#bwa:libbwa.a $(AOBJS) main.o +# $(CC) $(CFLAGS) $(LDFLAGS) $(AOBJS) ../sbwa/jemalloc/lib/libjemalloc.a main.o -o $@ -L. -lbwa $(LIBS) + bwamem-lite:libbwa.a example.o $(CC) $(CFLAGS) $(LDFLAGS) example.o -o $@ -L. -lbwa $(LIBS) diff --git a/bwa.c b/bwa.c index 0b28afb..066edd0 100644 --- a/bwa.c +++ b/bwa.c @@ -279,14 +279,12 @@ bwt_t *bwa_idx_load_bwt(const char *hint) if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); return 0; } - fprintf(stderr, "zzh-1\n"); tmp = calloc(strlen(prefix) + 5, 1); strcat(strcpy(tmp, prefix), ".bwt"); // FM-index bwt = bwt_restore_bwt(tmp); - fprintf(stderr, "zzh-1\n"); - strcat(strcpy(tmp, prefix), ".33.2.sa"); // partial suffix array (SA) + //strcat(strcpy(tmp, prefix), ".33.4.sa"); // partial suffix array (SA) + strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) bwt_restore_sa(tmp, bwt); - fprintf(stderr, "zzh-after-sa\n"); free(tmp); free(prefix); return bwt; } @@ -330,7 +328,7 @@ FMTIndex *bwa_idx_load_fmt(const char *hint) strcpy(sa_fn, hint); sprintf(suffix, ".33.%d.sa", SA_INTV); strcpy(sa_fn + l_hint, suffix); // partial suffix array (SA) -// fmt_restore_sa(sa_fn, fmt); + fmt_restore_sa(sa_fn, fmt); free(fmt_idx_fn); free(kmer_idx_fn); @@ -353,9 +351,9 @@ bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which) if (which & BWA_IDX_BWT) { idx->fmt = bwa_idx_load_fmt(hint); // 先和bwt共用sa - idx->fmt->sa = idx->bwt->sa; - idx->fmt->n_sa = idx->bwt->n_sa; - idx->fmt->sa_intv = idx->bwt->sa_intv; + //idx->fmt->sa = idx->bwt->sa; + //idx->fmt->n_sa = idx->bwt->n_sa; + //idx->fmt->sa_intv = idx->bwt->sa_intv; } if (which & BWA_IDX_BNS) { diff --git a/bwamem.c b/bwamem.c index 597651a..d1025c1 100644 --- a/bwamem.c +++ b/bwamem.c @@ -138,6 +138,8 @@ static void smem_aux_destroy(smem_aux_t *a) free(a); } +#define USE_FMT 1 + static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *a) { int i, k, x = 0, old_n; @@ -155,11 +157,9 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn // fprintf(stderr, "\n"); // first pass: find all SMEMs - //fprintf(fp1, "seq: %ld\n", dn++); + fprintf(fp1, "seq: %ld\n", dn++); //fprintf(stderr, "seq: %ld\n", dn++); //dn ++; - -#define USE_FMT 1 // goto third_seed; while (x < len) { @@ -186,7 +186,7 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn int slen = (uint32_t)p->info - (p->info >> 32); // seed length max_seed_len = fmax(max_seed_len, slen); if (slen >= opt->min_seed_len) { - //fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); + fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); kv_push(bwtintv_t, a->mem, *p); } } @@ -220,11 +220,12 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn #endif for (i = 0; i < a->mem1.n; ++i) { bwtintv_t *p = &a->mem1.a[i]; - //fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info); - //fprintf(stderr, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info); + //fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); + // fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info); + // fprintf(stderr, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info); int slen = (uint32_t)p->info - (p->info >> 32); if (slen >= opt->min_seed_len) { - //fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); + fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); kv_push(bwtintv_t, a->mem, a->mem1.a[i]); } } @@ -258,7 +259,8 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, const FMTIn if (m.x[2] > 0) { kv_push(bwtintv_t, a->mem, m); - //bwtintv_t *p = &m; + bwtintv_t *p = &m; + fprintf(fp1, "%ld %ld %d\n", p->x[2], p->info >> 32, (uint32_t)p->info); //fprintf(fp1, "%ld %ld %ld %ld %d\n", p->x[0], p->x[1], p->x[2], p->info >> 32, (uint32_t)p->info); } } else { // for now, we never come to this block which is slower @@ -434,8 +436,15 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const FMTIndex *fm #ifdef SHOW_PERF int64_t tmp_time = realtime_msec(); #endif -// s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference +#if USE_FMT s.rbeg = tmp.pos = fmt_sa(fmt, p->x[0] + k); + uint64_t tpos = bwt_sa(bwt, p->x[0] + k); + if (s.rbeg != tpos) { + fprintf(stderr, "diff: %ld, %ld %ld\n", p->x[0] + k, tmp.pos, tpos); + } +#else + s.rbeg = tmp.pos = bwt_sa(bwt, p->x[0] + k); // this is the base coordinate in the forward-reverse reference +#endif #ifdef SHOW_PERF tmp_time = realtime_msec() - tmp_time; __sync_fetch_and_add(&time_bwt_sa, tmp_time); @@ -1087,7 +1096,7 @@ void mem_aln2sam(const mem_opt_t *opt, const bntseq_t *bns, kstring_t *str, bseq kputsn("\tMD:Z:", 6, str); kputs((char*)(p->cigar + p->n_cigar), str); } if (m && m->n_cigar) { kputsn("\tMC:Z:", 6, str); add_cigar(opt, m, str, which); } - if (m) { kputsn("\tMQ:i:", 6, str); kputw(m->mapq, str);} +// if (m) { kputsn("\tMQ:i:", 6, str); kputw(m->mapq, str);} if (p->score >= 0) { kputsn("\tAS:i:", 6, str); kputw(p->score, str); } if (p->sub >= 0) { kputsn("\tXS:i:", 6, str); kputw(p->sub, str); } if (bwa_rg_id[0]) { kputsn("\tRG:Z:", 6, str); kputs(bwa_rg_id, str); } diff --git a/bwt.c b/bwt.c index 3255ae4..0797f02 100644 --- a/bwt.c +++ b/bwt.c @@ -77,7 +77,7 @@ void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val) const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2); bwtint_t *sa_addr = (bwtint_t *)(sa_arr + start_byte_idx); // *sa_addr &= (1 << val_idx_in_block) - 1; // 如果开辟内存的时候清零了,这一步可以省略,会清除后面的数据,只适合按递增顺序赋值 - *sa_addr |= val << val_idx_in_block; + *sa_addr |= (val & ((1L << 33) - 1)) << val_idx_in_block; } // 获取某一行的排序值(小端模式) @@ -129,7 +129,8 @@ void bwt_cal_sa(bwt_t *bwt, int intv) } if (isa % intv == 0) bwt_set_sa(bwt->sa, isa / intv, sa); - bwt_set_sa(bwt->sa, 0, (bwtint_t)-1); // before this line, bwt->sa[0] = bwt->seq_len + // bwt_set_sa(bwt->sa, 0, (bwtint_t)-1); // 赋值成-1也没问题,set_sa那里已经修正了 + bwt_set_sa(bwt->sa, 0, 8589934591); // before this line, bwt->sa[0] = bwt->seq_len } bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k) @@ -139,6 +140,7 @@ bwtint_t bwt_sa(const bwt_t *bwt, bwtint_t k) ++sa; k = bwt_invPsi(bwt, k); } + return sa + bwt->sa[k / bwt->sa_intv]; /* without setting bwt->sa[0] = -1, the following line should be changed to (sa + bwt->sa[k/bwt->sa_intv]) % (bwt->seq_len + 1) */ //#ifdef SHOW_PERF @@ -527,9 +529,15 @@ void bwt_restore_sa(const char *fn, bwt_t *bwt) xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; - bwt->sa = (uint8_t*)malloc(SA_BYTES(bwt->n_sa)); - fprintf(stderr, "zzh-read-sa %ld\n", SA_BYTES(bwt->n_sa)); - fread_fix(fp, SA_BYTES(bwt->n_sa), bwt->sa); + + //bwt->sa = (uint8_t*)malloc(SA_BYTES(bwt->n_sa)); + //fprintf(stderr, "zzh-read-sa %ld\n", SA_BYTES(bwt->n_sa)); + //fread_fix(fp, SA_BYTES(bwt->n_sa), bwt->sa); + + bwt->sa = (bwtint_t *)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa[0] = -1; + fread_fix(fp, sizeof(bwtint_t) * (bwt->n_sa - 1), bwt->sa + 1); + err_fclose(fp); } diff --git a/bwt.h b/bwt.h index 341a1c8..7c71380 100644 --- a/bwt.h +++ b/bwt.h @@ -56,7 +56,8 @@ typedef struct { // suffix array int sa_intv; bwtint_t n_sa; - uint8_t *sa; + //uint8_t *sa; + bwtint_t *sa; } bwt_t; typedef struct { diff --git a/fmt_idx.c b/fmt_idx.c index 70e05ad..2e16625 100644 --- a/fmt_idx.c +++ b/fmt_idx.c @@ -657,7 +657,12 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv do { kv_push(bwtintv_t, *curr, iv); goto backward_search; } while(0) // 处理kmer对应的匹配信息 - for (j = 1, curr->n = 0; j < kmer_len; ++j) { + if (only_forward) + j = kmer_len - 1; + else + j = 1; + for (curr->n = 0; j < kmer_len; ++j) + { fmt_kmer_get(fmt, &ok1, qbit, j); CHECK_INTV_CHANGE(ik, ok1, x + j + 1); } @@ -667,11 +672,15 @@ int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv #define PAC_BASE(pac, l) ((pac)[(l) >> 2] >> ((~(l) & 3) << 1) & 3) // 扩展kmer之后的碱基 + __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2); for (i = (int)ik.info; i + 1 < len; i += 2) { // forward search if (q[i] < 4 && q[i + 1] < 4) { fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); CHECK_INTV_CHANGE(ik, ok1, i + 1); CHECK_INTV_CHANGE(ik, ok2, i + 2); // 在这里进行判断是否只有一个候选了 @@ -806,6 +815,8 @@ backward_search: for (j = 0; j < curr->n; ++j) { bwtintv_t *p = &curr->a[j]; // 前向扩展的种子 + __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, p->x[0] - 1 + p->x[2]), 0, 2); uint64_t qbit = 0; if (!only_forward && p->info - x < HASH_KMER_LEN) { qbit = build_backward_kmer(q, p->info - 1, HASH_KMER_LEN, &kmer_len); // 创建反向kmer @@ -822,6 +833,8 @@ backward_search: if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展 { fmt_extend2(fmt, p, &ok1, &ok2, 1, q[i], q[i - 1]); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1 + ok2.x[2]), 0, 2); CHECK_INTV_ADD_MEM(ok1, i + 1, *p, mem); ok1.info = p->info; CHECK_INTV_ADD_MEM(ok2, i, ok1, mem); @@ -880,6 +893,8 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in fmt_kmer_get(fmt, &ik, qbit, kmer_len-1); // 初始碱基位置 ik.info = x + kmer_len; + __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ik.x[1] - 1 + ik.x[2]), 0, 2); #define COND_SET_RETURN(iv, ov, start_pos, end_pos, max_intv, min_len) \ if (iv.x[2] < max_intv && end_pos - start_pos >= min_len) \ @@ -894,6 +909,8 @@ int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, in if (q[i] < 4 && q[i + 1] < 4) { fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2); + __builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2); COND_SET_RETURN(ok1, *mem, x, i, max_intv, min_len); COND_SET_RETURN(ok2, *mem, x, i + 1, max_intv, min_len); ik = ok2; diff --git a/run.sh b/run.sh index 241f1a6..b906d4e 100755 --- a/run.sh +++ b/run.sh @@ -4,19 +4,20 @@ thread=1 #n_r1=~/data/fastq/ZY2105177532213000/sn_r1.fq #n_r2=~/data/fastq/ZY2105177532213000/sn_r2.fq #reference=~/data/reference/human_g1k_v37_decoy.fasta -n_r1=~/fastq/sn_r1.fq -n_r2=~/fastq/sn_r2.fq +#n_r1=~/fastq/sn_r1.fq +#n_r2=~/fastq/sn_r2.fq #n_r1=~/fastq/ssn_r1.fq #n_r2=~/fastq/ssn_r2.fq #n_r1=~/fastq/tiny_n_r1.fq #n_r2=~/fastq/tiny_n_r2.fq -#n_r1=~/fastq/diff_r1.fq -#n_r2=~/fastq/diff_r2.fq +n_r1=~/fastq/diff_r1.fq +n_r2=~/fastq/diff_r2.fq #n_r1=~/fastq/d_r1.fq #n_r2=~/fastq/d_r2.fq reference=~/reference/human_g1k_v37_decoy.fasta -#out=./out.sam -out=/dev/null +#out=./ssn.sam +out=./out.sam +#out=/dev/null #time ./bwa mem -t 12 -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \ # /home/zzh/data/reference/human_g1k_v37_decoy.fasta \ # /home/zzh/data/fastq/nm1.fq \