basic chaining working
Definitely suboptimal in a lot of corner cases...
This commit is contained in:
parent
6c19c9640c
commit
8977737460
27
bwamem.c
27
bwamem.c
|
|
@ -81,7 +81,7 @@ static int test_and_merge(const memopt_t *opt, memchain1_t *c, const memseed_t *
|
|||
return 0;
|
||||
}
|
||||
|
||||
void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||
static void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||
{
|
||||
while (smem_next(itr) > 0) {
|
||||
int i;
|
||||
|
|
@ -89,21 +89,22 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
|||
bwtintv_t *p = &itr->matches->a[i];
|
||||
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||
int64_t k;
|
||||
if (slen >= opt->min_seed_len || p->x[2] > opt->max_occ) continue;
|
||||
if (slen < opt->min_seed_len || p->x[2] > opt->max_occ) continue;
|
||||
for (k = 0; k < p->x[2]; ++k) {
|
||||
memchain1_t tmp, *lower, *upper;
|
||||
memseed_t c1;
|
||||
memseed_t s;
|
||||
int to_add = 0;
|
||||
c1.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k);
|
||||
c1.qbeg = p->info>>32;
|
||||
c1.len = slen;
|
||||
s.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k);
|
||||
s.qbeg = p->info>>32;
|
||||
s.len = slen;
|
||||
if (kb_size(tree)) {
|
||||
kb_intervalp(chn, tree, &tmp, &lower, &upper);
|
||||
if (!test_and_merge(opt, lower, &c1)) to_add = 1;
|
||||
} to_add = 1;
|
||||
if (!lower || !test_and_merge(opt, lower, &s)) to_add = 1;
|
||||
} else to_add = 1;
|
||||
if (to_add) {
|
||||
tmp.n = 1; tmp.m = 4;
|
||||
tmp.seeds = calloc(tmp.m, sizeof(memseed_t));
|
||||
tmp.seeds[0] = s;
|
||||
kb_putp(chn, tree, &tmp);
|
||||
}
|
||||
}
|
||||
|
|
@ -111,7 +112,7 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
|||
}
|
||||
}
|
||||
|
||||
memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq)
|
||||
memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq)
|
||||
{
|
||||
memchain_t chain;
|
||||
smem_i *itr;
|
||||
|
|
@ -122,6 +123,14 @@ memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, cons
|
|||
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
||||
itr = smem_itr_init(bwt);
|
||||
smem_set_query(itr, 1, len, seq);
|
||||
mem_insert_seed(opt, tree, itr);
|
||||
|
||||
chain.m = kb_size(tree); chain.n = 0;
|
||||
chain.chains = malloc(chain.m * sizeof(memchain1_t));
|
||||
|
||||
#define traverse_func(p_) (chain.chains[chain.n++] = *(p_))
|
||||
__kb_traverse(memchain1_t, tree, traverse_func);
|
||||
#undef traverse_func
|
||||
|
||||
smem_itr_destroy(itr);
|
||||
kb_destroy(chn, tree);
|
||||
|
|
|
|||
5
bwamem.h
5
bwamem.h
|
|
@ -11,7 +11,8 @@ typedef struct {
|
|||
} smem_i;
|
||||
|
||||
typedef struct {
|
||||
int64_t qbeg, rbeg, len;
|
||||
int64_t rbeg;
|
||||
int32_t qbeg, len;
|
||||
} memseed_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -41,6 +42,8 @@ int smem_next(smem_i *itr);
|
|||
|
||||
memopt_t *mem_opt_init(void);
|
||||
|
||||
memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
36
fastmap.c
36
fastmap.c
|
|
@ -16,7 +16,9 @@ int main_mem(int argc, char *argv[])
|
|||
memopt_t *opt;
|
||||
bwt_t *bwt;
|
||||
bntseq_t *bns;
|
||||
int c;
|
||||
int i, j, c;
|
||||
gzFile *fp;
|
||||
kseq_t *seq;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "")) >= 0) {
|
||||
|
|
@ -28,6 +30,38 @@ int main_mem(int argc, char *argv[])
|
|||
free(opt);
|
||||
return 1;
|
||||
}
|
||||
fp = gzopen(argv[optind + 1], "r");
|
||||
seq = kseq_init(fp);
|
||||
{ // load the packed sequences, BWT and SA
|
||||
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
||||
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
||||
bwt = bwt_restore_bwt(tmp);
|
||||
strcat(strcpy(tmp, argv[optind]), ".sa");
|
||||
bwt_restore_sa(tmp, bwt);
|
||||
free(tmp);
|
||||
bns = bns_restore(argv[optind]);
|
||||
}
|
||||
while (kseq_read(seq) >= 0) {
|
||||
memchain_t chain;
|
||||
printf(">%s\n", seq->name.s);
|
||||
for (i = 0; i < seq->seq.l; ++i)
|
||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||
chain = mem_chain(opt, bwt, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||
for (i = 0; i < chain.n; ++i) {
|
||||
memchain1_t *p = &chain.chains[i];
|
||||
printf("%d\t%d", i, p->n);
|
||||
for (j = 0; j < p->n; ++j) {
|
||||
bwtint_t pos;
|
||||
int is_rev, ref_id;
|
||||
pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
|
||||
if (is_rev) pos -= p->seeds[j].len - 1;
|
||||
bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
|
||||
printf("\t%d,%d,%s:%c%ld", p->seeds[j].len, p->seeds[j].qbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||
}
|
||||
putchar('\n');
|
||||
}
|
||||
puts("//");
|
||||
}
|
||||
|
||||
free(opt);
|
||||
return 0;
|
||||
|
|
|
|||
Loading…
Reference in New Issue