fixed a deadlock; SMEM iterator
This commit is contained in:
parent
b5170e0efa
commit
150bfbdef4
36
bwt.c
36
bwt.c
|
|
@ -292,13 +292,17 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
|
||||||
ik.info = x + 1;
|
ik.info = x + 1;
|
||||||
|
|
||||||
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
|
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
|
||||||
if (q[i] > 3) break;
|
if (q[i] < 4) {
|
||||||
c = 3 - q[i];
|
c = 3 - q[i];
|
||||||
bwt_extend(bwt, &ik, ok, 0);
|
bwt_extend(bwt, &ik, ok, 0);
|
||||||
if (ok[c].x[2] != ik.x[2]) // change of the interval size
|
if (ok[c].x[2] != ik.x[2]) // change of the interval size
|
||||||
|
kv_push(bwtintv_t, *curr, ik);
|
||||||
|
if (ok[c].x[2] == 0) break; // cannot be extended
|
||||||
|
ik = ok[c]; ik.info = i + 1;
|
||||||
|
} else { // an ambiguous base
|
||||||
kv_push(bwtintv_t, *curr, ik);
|
kv_push(bwtintv_t, *curr, ik);
|
||||||
if (ok[c].x[2] == 0) break; // cannot be extended
|
break; // cannot be extended; in this case, i<len always stands
|
||||||
ik = ok[c]; ik.info = i + 1;
|
}
|
||||||
}
|
}
|
||||||
if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
|
if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
|
||||||
bwt_reverse_intvs(curr); // s.t. smaller intervals visited first
|
bwt_reverse_intvs(curr); // s.t. smaller intervals visited first
|
||||||
|
|
@ -333,23 +337,3 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
|
||||||
if (tmpvec[1] == 0) free(a[1].a);
|
if (tmpvec[1] == 0) free(a[1].a);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bwt_smem(const bwt_t *bwt, int len, const uint8_t *q, bwtintv_v *mem, bwtintv_v *tmpvec[3])
|
|
||||||
{
|
|
||||||
int x = 0, i;
|
|
||||||
bwtintv_v a[3], *tvec[2], *mem1;
|
|
||||||
kv_init(a[0]); kv_init(a[1]); kv_init(a[2]); // no memory allocation here
|
|
||||||
tvec[0] = tmpvec[0]? tmpvec[0] : &a[0];
|
|
||||||
tvec[1] = tmpvec[1]? tmpvec[1] : &a[1];
|
|
||||||
mem1 = tmpvec[2]? tmpvec[2] : &a[2];
|
|
||||||
mem->n = 0;
|
|
||||||
do {
|
|
||||||
x = bwt_smem1(bwt, len, q, x, mem1, tvec);
|
|
||||||
for (i = 0; i < mem1->n; ++i)
|
|
||||||
kv_push(bwtintv_t, *mem, mem1->a[i]);
|
|
||||||
} while (x < len);
|
|
||||||
if (tmpvec[0] == 0) free(a[0].a);
|
|
||||||
if (tmpvec[1] == 0) free(a[1].a);
|
|
||||||
if (tmpvec[2] == 0) free(a[2].a);
|
|
||||||
return mem->n;
|
|
||||||
}
|
|
||||||
|
|
|
||||||
10
bwt.h
10
bwt.h
|
|
@ -112,8 +112,16 @@ extern "C" {
|
||||||
int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end);
|
int bwt_match_exact(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *sa_begin, bwtint_t *sa_end);
|
||||||
int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0);
|
int bwt_match_exact_alt(const bwt_t *bwt, int len, const ubyte_t *str, bwtint_t *k0, bwtint_t *l0);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extend bi-SA-interval _ik_
|
||||||
|
*/
|
||||||
void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back);
|
void bwt_extend(const bwt_t *bwt, const bwtintv_t *ik, bwtintv_t ok[4], int is_back);
|
||||||
int bwt_smem(const bwt_t *bwt, int len, const uint8_t *q, bwtintv_v *mem, bwtintv_v *tmpvec[3]);
|
|
||||||
|
/**
|
||||||
|
* Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_.
|
||||||
|
* Return the end of the longest exact match starting from _x_.
|
||||||
|
*/
|
||||||
|
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
|
|
||||||
88
fastmap.c
88
fastmap.c
|
|
@ -10,6 +10,49 @@ KSEQ_INIT(gzFile, gzread)
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const bwt_t *bwt;
|
||||||
|
const uint8_t *query;
|
||||||
|
int start, len;
|
||||||
|
bwtintv_v *tmpvec[2], *matches;
|
||||||
|
} smem_i;
|
||||||
|
|
||||||
|
smem_i *smem_iter_init(const bwt_t *bwt)
|
||||||
|
{
|
||||||
|
smem_i *iter;
|
||||||
|
iter = calloc(1, sizeof(smem_i));
|
||||||
|
iter->bwt = bwt;
|
||||||
|
iter->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
|
||||||
|
iter->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
|
||||||
|
iter->matches = calloc(1, sizeof(bwtintv_v));
|
||||||
|
return iter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void smem_iter_destroy(smem_i *iter)
|
||||||
|
{
|
||||||
|
free(iter->tmpvec[0]->a);
|
||||||
|
free(iter->tmpvec[1]->a);
|
||||||
|
free(iter->matches->a);
|
||||||
|
free(iter);
|
||||||
|
}
|
||||||
|
|
||||||
|
void smem_set_query(smem_i *iter, int len, const uint8_t *query)
|
||||||
|
{
|
||||||
|
iter->query = query;
|
||||||
|
iter->start = 0;
|
||||||
|
iter->len = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
int smem_next(smem_i *iter)
|
||||||
|
{
|
||||||
|
iter->tmpvec[0]->n = iter->tmpvec[1]->n = iter->matches->n = 0;
|
||||||
|
if (iter->start >= iter->len || iter->start < 0) return -1;
|
||||||
|
while (iter->start < iter->len && iter->query[iter->start] > 3) ++iter->start; // skip ambiguous bases
|
||||||
|
if (iter->start == iter->len) return -1;
|
||||||
|
iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->matches, iter->tmpvec);
|
||||||
|
return iter->start;
|
||||||
|
}
|
||||||
|
|
||||||
int main_fastmap(int argc, char *argv[])
|
int main_fastmap(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int c, i, min_iwidth = 20, min_len = 17;
|
int c, i, min_iwidth = 20, min_len = 17;
|
||||||
|
|
@ -18,7 +61,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
bwtintv_v a[3], mem, *tvec[3];
|
smem_i *iter;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "w:l:")) >= 0) {
|
while ((c = getopt(argc, argv, "w:l:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
|
@ -42,38 +85,35 @@ int main_fastmap(int argc, char *argv[])
|
||||||
free(tmp);
|
free(tmp);
|
||||||
bns = bns_restore(argv[optind]);
|
bns = bns_restore(argv[optind]);
|
||||||
}
|
}
|
||||||
for (i = 0; i < 3; ++i) { // initiate the temporary array
|
iter = smem_iter_init(bwt);
|
||||||
kv_init(a[i]);
|
|
||||||
tvec[i] = &a[i];
|
|
||||||
}
|
|
||||||
kv_init(mem);
|
|
||||||
while (kseq_read(seq) >= 0) {
|
while (kseq_read(seq) >= 0) {
|
||||||
for (i = 0; i < seq->seq.l; ++i)
|
for (i = 0; i < seq->seq.l; ++i)
|
||||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
bwt_smem(bwt, seq->seq.l, (uint8_t*)seq->seq.s, &mem, tvec);
|
|
||||||
printf("SQ\t%s\t%ld\n", seq->name.s, seq->seq.l);
|
printf("SQ\t%s\t%ld\n", seq->name.s, seq->seq.l);
|
||||||
for (i = 0; i < mem.n; ++i) {
|
smem_set_query(iter, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||||
bwtintv_t *p = &mem.a[i];
|
while (smem_next(iter) > 0) {
|
||||||
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
for (i = 0; i < iter->matches->n; ++i) {
|
||||||
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
bwtintv_t *p = &iter->matches->a[i];
|
||||||
if (p->x[2] <= min_iwidth) {
|
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
||||||
for (k = 0; k < p->x[2]; ++k) {
|
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
||||||
bwtint_t pos;
|
if (p->x[2] <= min_iwidth) {
|
||||||
int len, is_rev, ref_id;
|
for (k = 0; k < p->x[2]; ++k) {
|
||||||
len = (uint32_t)p->info - (p->info>>32);
|
bwtint_t pos;
|
||||||
pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
|
int len, is_rev, ref_id;
|
||||||
if (is_rev) pos -= len - 1;
|
len = (uint32_t)p->info - (p->info>>32);
|
||||||
bns_cnt_ambi(bns, pos, len, &ref_id);
|
pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
|
||||||
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
if (is_rev) pos -= len - 1;
|
||||||
}
|
bns_cnt_ambi(bns, pos, len, &ref_id);
|
||||||
|
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||||
|
}
|
||||||
|
} else fputs("\t*", stdout);
|
||||||
|
putchar('\n');
|
||||||
}
|
}
|
||||||
putchar('\n');
|
|
||||||
}
|
}
|
||||||
puts("//");
|
puts("//");
|
||||||
}
|
}
|
||||||
|
|
||||||
free(mem.a);
|
smem_iter_destroy(iter);
|
||||||
for (i = 0; i < 3; ++i) free(a[i].a);
|
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
bwt_destroy(bwt);
|
bwt_destroy(bwt);
|
||||||
kseq_destroy(seq);
|
kseq_destroy(seq);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue