diff --git a/bwamem.c b/bwamem.c index 52c4a18..2a35fe7 100644 --- a/bwamem.c +++ b/bwamem.c @@ -81,7 +81,7 @@ static int test_and_merge(const memopt_t *opt, memchain1_t *c, const memseed_t * return 0; } -void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr) +static void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr) { while (smem_next(itr) > 0) { int i; @@ -89,21 +89,22 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr) bwtintv_t *p = &itr->matches->a[i]; int slen = (uint32_t)p->info - (p->info>>32); // seed length int64_t k; - if (slen >= opt->min_seed_len || p->x[2] > opt->max_occ) continue; + if (slen < opt->min_seed_len || p->x[2] > opt->max_occ) continue; for (k = 0; k < p->x[2]; ++k) { memchain1_t tmp, *lower, *upper; - memseed_t c1; + memseed_t s; int to_add = 0; - c1.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k); - c1.qbeg = p->info>>32; - c1.len = slen; + s.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k); + s.qbeg = p->info>>32; + s.len = slen; if (kb_size(tree)) { kb_intervalp(chn, tree, &tmp, &lower, &upper); - if (!test_and_merge(opt, lower, &c1)) to_add = 1; - } to_add = 1; + if (!lower || !test_and_merge(opt, lower, &s)) to_add = 1; + } else to_add = 1; if (to_add) { tmp.n = 1; tmp.m = 4; tmp.seeds = calloc(tmp.m, sizeof(memseed_t)); + tmp.seeds[0] = s; kb_putp(chn, tree, &tmp); } } @@ -111,7 +112,7 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr) } } -memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq) +memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq) { memchain_t chain; smem_i *itr; @@ -122,6 +123,14 @@ memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, cons tree = kb_init(chn, KB_DEFAULT_SIZE); itr = smem_itr_init(bwt); smem_set_query(itr, 1, len, seq); + mem_insert_seed(opt, tree, itr); + + chain.m = kb_size(tree); chain.n = 0; + chain.chains = malloc(chain.m * sizeof(memchain1_t)); + + #define traverse_func(p_) (chain.chains[chain.n++] = *(p_)) + __kb_traverse(memchain1_t, tree, traverse_func); + #undef traverse_func smem_itr_destroy(itr); kb_destroy(chn, tree); diff --git a/bwamem.h b/bwamem.h index cc86ef2..72d9557 100644 --- a/bwamem.h +++ b/bwamem.h @@ -11,7 +11,8 @@ typedef struct { } smem_i; typedef struct { - int64_t qbeg, rbeg, len; + int64_t rbeg; + int32_t qbeg, len; } memseed_t; typedef struct { @@ -41,6 +42,8 @@ int smem_next(smem_i *itr); memopt_t *mem_opt_init(void); +memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq); + #ifdef __cplusplus } #endif diff --git a/fastmap.c b/fastmap.c index 6a41aeb..85ccd5c 100644 --- a/fastmap.c +++ b/fastmap.c @@ -16,7 +16,9 @@ int main_mem(int argc, char *argv[]) memopt_t *opt; bwt_t *bwt; bntseq_t *bns; - int c; + int i, j, c; + gzFile *fp; + kseq_t *seq; opt = mem_opt_init(); while ((c = getopt(argc, argv, "")) >= 0) { @@ -28,6 +30,38 @@ int main_mem(int argc, char *argv[]) free(opt); return 1; } + fp = gzopen(argv[optind + 1], "r"); + seq = kseq_init(fp); + { // load the packed sequences, BWT and SA + char *tmp = calloc(strlen(argv[optind]) + 5, 1); + strcat(strcpy(tmp, argv[optind]), ".bwt"); + bwt = bwt_restore_bwt(tmp); + strcat(strcpy(tmp, argv[optind]), ".sa"); + bwt_restore_sa(tmp, bwt); + free(tmp); + bns = bns_restore(argv[optind]); + } + while (kseq_read(seq) >= 0) { + memchain_t chain; + printf(">%s\n", seq->name.s); + for (i = 0; i < seq->seq.l; ++i) + seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; + chain = mem_chain(opt, bwt, seq->seq.l, (uint8_t*)seq->seq.s); + for (i = 0; i < chain.n; ++i) { + memchain1_t *p = &chain.chains[i]; + printf("%d\t%d", i, p->n); + for (j = 0; j < p->n; ++j) { + bwtint_t pos; + int is_rev, ref_id; + pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev); + if (is_rev) pos -= p->seeds[j].len - 1; + bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id); + printf("\t%d,%d,%s:%c%ld", p->seeds[j].len, p->seeds[j].qbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1); + } + putchar('\n'); + } + puts("//"); + } free(opt); return 0;