a bit refactoring for further changes
This commit is contained in:
parent
6de74888fd
commit
5a4a0c4173
41
bwt.c
41
bwt.c
|
|
@ -337,3 +337,44 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv,
|
||||||
if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
|
if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/***************************
|
||||||
|
* SMEM iterator interface *
|
||||||
|
***************************/
|
||||||
|
|
||||||
|
smem_i *smem_itr_init(const bwt_t *bwt)
|
||||||
|
{
|
||||||
|
smem_i *itr;
|
||||||
|
itr = calloc(1, sizeof(smem_i));
|
||||||
|
itr->bwt = bwt;
|
||||||
|
itr->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
|
||||||
|
itr->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
|
||||||
|
itr->matches = calloc(1, sizeof(bwtintv_v));
|
||||||
|
return itr;
|
||||||
|
}
|
||||||
|
|
||||||
|
void smem_itr_destroy(smem_i *itr)
|
||||||
|
{
|
||||||
|
free(itr->tmpvec[0]->a);
|
||||||
|
free(itr->tmpvec[1]->a);
|
||||||
|
free(itr->matches->a);
|
||||||
|
free(itr);
|
||||||
|
}
|
||||||
|
|
||||||
|
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query)
|
||||||
|
{
|
||||||
|
itr->query = query;
|
||||||
|
itr->start = 0;
|
||||||
|
itr->len = len;
|
||||||
|
itr->min_intv = min_intv;
|
||||||
|
}
|
||||||
|
|
||||||
|
int smem_next(smem_i *itr)
|
||||||
|
{
|
||||||
|
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = 0;
|
||||||
|
if (itr->start >= itr->len || itr->start < 0) return -1;
|
||||||
|
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
||||||
|
if (itr->start == itr->len) return -1;
|
||||||
|
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, itr->start, itr->min_intv, itr->matches, itr->tmpvec);
|
||||||
|
return itr->start;
|
||||||
|
}
|
||||||
|
|
|
||||||
14
bwt.h
14
bwt.h
|
|
@ -60,6 +60,13 @@ typedef struct {
|
||||||
|
|
||||||
typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v;
|
typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v;
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
const bwt_t *bwt;
|
||||||
|
const uint8_t *query;
|
||||||
|
int start, len, min_intv;
|
||||||
|
bwtintv_v *tmpvec[2], *matches;
|
||||||
|
} smem_i;
|
||||||
|
|
||||||
/* For general OCC_INTERVAL, the following is correct:
|
/* For general OCC_INTERVAL, the following is correct:
|
||||||
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
|
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
|
||||||
#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4)
|
#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4)
|
||||||
|
|
@ -123,6 +130,13 @@ extern "C" {
|
||||||
*/
|
*/
|
||||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||||
|
|
||||||
|
// SMEM iterator interface
|
||||||
|
|
||||||
|
smem_i *smem_itr_init(const bwt_t *bwt);
|
||||||
|
void smem_itr_destroy(smem_i *itr);
|
||||||
|
void smem_set_query(smem_i *itr, int min_intv, int len, const uint8_t *query);
|
||||||
|
int smem_next(smem_i *itr);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
58
fastmap.c
58
fastmap.c
|
|
@ -10,50 +10,6 @@ KSEQ_INIT(gzFile, gzread)
|
||||||
|
|
||||||
extern unsigned char nst_nt4_table[256];
|
extern unsigned char nst_nt4_table[256];
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
const bwt_t *bwt;
|
|
||||||
const uint8_t *query;
|
|
||||||
int start, len, min_intv;
|
|
||||||
bwtintv_v *tmpvec[2], *matches;
|
|
||||||
} smem_i;
|
|
||||||
|
|
||||||
smem_i *smem_iter_init(const bwt_t *bwt, int min_intv)
|
|
||||||
{
|
|
||||||
smem_i *iter;
|
|
||||||
iter = calloc(1, sizeof(smem_i));
|
|
||||||
iter->bwt = bwt;
|
|
||||||
iter->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
|
|
||||||
iter->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
|
|
||||||
iter->matches = calloc(1, sizeof(bwtintv_v));
|
|
||||||
iter->min_intv = min_intv > 0? min_intv : 1;
|
|
||||||
return iter;
|
|
||||||
}
|
|
||||||
|
|
||||||
void smem_iter_destroy(smem_i *iter)
|
|
||||||
{
|
|
||||||
free(iter->tmpvec[0]->a);
|
|
||||||
free(iter->tmpvec[1]->a);
|
|
||||||
free(iter->matches->a);
|
|
||||||
free(iter);
|
|
||||||
}
|
|
||||||
|
|
||||||
void smem_set_query(smem_i *iter, int len, const uint8_t *query)
|
|
||||||
{
|
|
||||||
iter->query = query;
|
|
||||||
iter->start = 0;
|
|
||||||
iter->len = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
int smem_next(smem_i *iter)
|
|
||||||
{
|
|
||||||
iter->tmpvec[0]->n = iter->tmpvec[1]->n = iter->matches->n = 0;
|
|
||||||
if (iter->start >= iter->len || iter->start < 0) return -1;
|
|
||||||
while (iter->start < iter->len && iter->query[iter->start] > 3) ++iter->start; // skip ambiguous bases
|
|
||||||
if (iter->start == iter->len) return -1;
|
|
||||||
iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->min_intv, iter->matches, iter->tmpvec);
|
|
||||||
return iter->start;
|
|
||||||
}
|
|
||||||
|
|
||||||
int main_fastmap(int argc, char *argv[])
|
int main_fastmap(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1;
|
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1;
|
||||||
|
|
@ -62,7 +18,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
bntseq_t *bns;
|
bntseq_t *bns;
|
||||||
smem_i *iter;
|
smem_i *itr;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "w:l:sm:")) >= 0) {
|
while ((c = getopt(argc, argv, "w:l:sm:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
|
@ -88,7 +44,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
free(tmp);
|
free(tmp);
|
||||||
bns = bns_restore(argv[optind]);
|
bns = bns_restore(argv[optind]);
|
||||||
}
|
}
|
||||||
iter = smem_iter_init(bwt, min_intv);
|
itr = smem_itr_init(bwt);
|
||||||
while (kseq_read(seq) >= 0) {
|
while (kseq_read(seq) >= 0) {
|
||||||
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
||||||
if (print_seq) {
|
if (print_seq) {
|
||||||
|
|
@ -97,10 +53,10 @@ int main_fastmap(int argc, char *argv[])
|
||||||
} else putchar('\n');
|
} else putchar('\n');
|
||||||
for (i = 0; i < seq->seq.l; ++i)
|
for (i = 0; i < seq->seq.l; ++i)
|
||||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||||
smem_set_query(iter, seq->seq.l, (uint8_t*)seq->seq.s);
|
smem_set_query(itr, min_intv, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||||
while (smem_next(iter) > 0) {
|
while (smem_next(itr) > 0) {
|
||||||
for (i = 0; i < iter->matches->n; ++i) {
|
for (i = 0; i < itr->matches->n; ++i) {
|
||||||
bwtintv_t *p = &iter->matches->a[i];
|
bwtintv_t *p = &itr->matches->a[i];
|
||||||
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
||||||
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
printf("EM\t%d\t%d\t%ld", (uint32_t)(p->info>>32), (uint32_t)p->info, (long)p->x[2]);
|
||||||
if (p->x[2] <= min_iwidth) {
|
if (p->x[2] <= min_iwidth) {
|
||||||
|
|
@ -120,7 +76,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
puts("//");
|
puts("//");
|
||||||
}
|
}
|
||||||
|
|
||||||
smem_iter_destroy(iter);
|
smem_itr_destroy(itr);
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
bwt_destroy(bwt);
|
bwt_destroy(bwt);
|
||||||
kseq_destroy(seq);
|
kseq_destroy(seq);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue