basic chaining working
Definitely suboptimal in a lot of corner cases...
This commit is contained in:
parent
6c19c9640c
commit
8977737460
27
bwamem.c
27
bwamem.c
|
|
@ -81,7 +81,7 @@ static int test_and_merge(const memopt_t *opt, memchain1_t *c, const memseed_t *
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
static void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||||
{
|
{
|
||||||
while (smem_next(itr) > 0) {
|
while (smem_next(itr) > 0) {
|
||||||
int i;
|
int i;
|
||||||
|
|
@ -89,21 +89,22 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||||
bwtintv_t *p = &itr->matches->a[i];
|
bwtintv_t *p = &itr->matches->a[i];
|
||||||
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||||
int64_t k;
|
int64_t k;
|
||||||
if (slen >= opt->min_seed_len || p->x[2] > opt->max_occ) continue;
|
if (slen < opt->min_seed_len || p->x[2] > opt->max_occ) continue;
|
||||||
for (k = 0; k < p->x[2]; ++k) {
|
for (k = 0; k < p->x[2]; ++k) {
|
||||||
memchain1_t tmp, *lower, *upper;
|
memchain1_t tmp, *lower, *upper;
|
||||||
memseed_t c1;
|
memseed_t s;
|
||||||
int to_add = 0;
|
int to_add = 0;
|
||||||
c1.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k);
|
s.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k);
|
||||||
c1.qbeg = p->info>>32;
|
s.qbeg = p->info>>32;
|
||||||
c1.len = slen;
|
s.len = slen;
|
||||||
if (kb_size(tree)) {
|
if (kb_size(tree)) {
|
||||||
kb_intervalp(chn, tree, &tmp, &lower, &upper);
|
kb_intervalp(chn, tree, &tmp, &lower, &upper);
|
||||||
if (!test_and_merge(opt, lower, &c1)) to_add = 1;
|
if (!lower || !test_and_merge(opt, lower, &s)) to_add = 1;
|
||||||
} to_add = 1;
|
} else to_add = 1;
|
||||||
if (to_add) {
|
if (to_add) {
|
||||||
tmp.n = 1; tmp.m = 4;
|
tmp.n = 1; tmp.m = 4;
|
||||||
tmp.seeds = calloc(tmp.m, sizeof(memseed_t));
|
tmp.seeds = calloc(tmp.m, sizeof(memseed_t));
|
||||||
|
tmp.seeds[0] = s;
|
||||||
kb_putp(chn, tree, &tmp);
|
kb_putp(chn, tree, &tmp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -111,7 +112,7 @@ void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq)
|
memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq)
|
||||||
{
|
{
|
||||||
memchain_t chain;
|
memchain_t chain;
|
||||||
smem_i *itr;
|
smem_i *itr;
|
||||||
|
|
@ -122,6 +123,14 @@ memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, cons
|
||||||
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
||||||
itr = smem_itr_init(bwt);
|
itr = smem_itr_init(bwt);
|
||||||
smem_set_query(itr, 1, len, seq);
|
smem_set_query(itr, 1, len, seq);
|
||||||
|
mem_insert_seed(opt, tree, itr);
|
||||||
|
|
||||||
|
chain.m = kb_size(tree); chain.n = 0;
|
||||||
|
chain.chains = malloc(chain.m * sizeof(memchain1_t));
|
||||||
|
|
||||||
|
#define traverse_func(p_) (chain.chains[chain.n++] = *(p_))
|
||||||
|
__kb_traverse(memchain1_t, tree, traverse_func);
|
||||||
|
#undef traverse_func
|
||||||
|
|
||||||
smem_itr_destroy(itr);
|
smem_itr_destroy(itr);
|
||||||
kb_destroy(chn, tree);
|
kb_destroy(chn, tree);
|
||||||
|
|
|
||||||
5
bwamem.h
5
bwamem.h
|
|
@ -11,7 +11,8 @@ typedef struct {
|
||||||
} smem_i;
|
} smem_i;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int64_t qbeg, rbeg, len;
|
int64_t rbeg;
|
||||||
|
int32_t qbeg, len;
|
||||||
} memseed_t;
|
} memseed_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
|
|
@ -41,6 +42,8 @@ int smem_next(smem_i *itr);
|
||||||
|
|
||||||
memopt_t *mem_opt_init(void);
|
memopt_t *mem_opt_init(void);
|
||||||
|
|
||||||
|
memchain_t mem_chain(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
36
fastmap.c
36
fastmap.c
|
|
@ -16,7 +16,9 @@ int main_mem(int argc, char *argv[])
|
||||||
memopt_t *opt;
|
memopt_t *opt;
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
int c;
|
int i, j, c;
|
||||||
|
gzFile *fp;
|
||||||
|
kseq_t *seq;
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
while ((c = getopt(argc, argv, "")) >= 0) {
|
while ((c = getopt(argc, argv, "")) >= 0) {
|
||||||
|
|
@ -28,6 +30,38 @@ int main_mem(int argc, char *argv[])
|
||||||
free(opt);
|
free(opt);
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
fp = gzopen(argv[optind + 1], "r");
|
||||||
|
seq = kseq_init(fp);
|
||||||
|
{ // load the packed sequences, BWT and SA
|
||||||
|
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
||||||
|
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
||||||
|
bwt = bwt_restore_bwt(tmp);
|
||||||
|
strcat(strcpy(tmp, argv[optind]), ".sa");
|
||||||
|
bwt_restore_sa(tmp, bwt);
|
||||||
|
free(tmp);
|
||||||
|
bns = bns_restore(argv[optind]);
|
||||||
|
}
|
||||||
|
while (kseq_read(seq) >= 0) {
|
||||||
|
memchain_t chain;
|
||||||
|
printf(">%s\n", seq->name.s);
|
||||||
|
for (i = 0; i < seq->seq.l; ++i)
|
||||||
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
|
chain = mem_chain(opt, bwt, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||||
|
for (i = 0; i < chain.n; ++i) {
|
||||||
|
memchain1_t *p = &chain.chains[i];
|
||||||
|
printf("%d\t%d", i, p->n);
|
||||||
|
for (j = 0; j < p->n; ++j) {
|
||||||
|
bwtint_t pos;
|
||||||
|
int is_rev, ref_id;
|
||||||
|
pos = bns_depos(bns, p->seeds[j].rbeg, &is_rev);
|
||||||
|
if (is_rev) pos -= p->seeds[j].len - 1;
|
||||||
|
bns_cnt_ambi(bns, pos, p->seeds[j].len, &ref_id);
|
||||||
|
printf("\t%d,%d,%s:%c%ld", p->seeds[j].len, p->seeds[j].qbeg, bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||||
|
}
|
||||||
|
putchar('\n');
|
||||||
|
}
|
||||||
|
puts("//");
|
||||||
|
}
|
||||||
|
|
||||||
free(opt);
|
free(opt);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue