code backup
This commit is contained in:
parent
91debf412b
commit
6c19c9640c
75
bwamem.c
75
bwamem.c
|
|
@ -1,4 +1,6 @@
|
|||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include "bwamem.h"
|
||||
|
||||
memopt_t *mem_opt_init()
|
||||
|
|
@ -8,6 +10,7 @@ memopt_t *mem_opt_init()
|
|||
o->a = 1; o->b = 9; o->q = 16; o->r = 1; o->w = 100;
|
||||
o->min_seed_len = 17;
|
||||
o->max_occ = 10;
|
||||
o->max_chain_gap = 10000;
|
||||
return o;
|
||||
}
|
||||
|
||||
|
|
@ -52,3 +55,75 @@ int smem_next(smem_i *itr)
|
|||
return itr->start;
|
||||
}
|
||||
|
||||
#include "kbtree.h"
|
||||
|
||||
#define chain_lt(a, b) ((a).pos < (b).pos)
|
||||
KBTREE_INIT(chn, memchain1_t, chain_lt)
|
||||
|
||||
static int test_and_merge(const memopt_t *opt, memchain1_t *c, const memseed_t *p)
|
||||
{
|
||||
int64_t qend, rend, x, y;
|
||||
const memseed_t *last = &c->seeds[c->n-1];
|
||||
qend = last->qbeg + last->len;
|
||||
rend = last->rbeg + last->len;
|
||||
if (p->qbeg > c->seeds[0].qbeg && p->qbeg + p->len < qend && p->rbeg > c->seeds[0].rbeg && p->rbeg + p->len < rend)
|
||||
return 1; // contained seed; do nothing
|
||||
x = p->qbeg - last->qbeg; // always positive
|
||||
y = p->rbeg - last->rbeg;
|
||||
if (y > 0 && x - y <= opt->w && y - x <= opt->w && x - last->len < opt->max_chain_gap && y - last->len < opt->max_chain_gap) {
|
||||
if (c->n == c->m) {
|
||||
c->m <<= 1;
|
||||
c->seeds = realloc(c->seeds, c->m * sizeof(memseed_t));
|
||||
}
|
||||
c->seeds[c->n++] = *p;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void mem_insert_seed(const memopt_t *opt, kbtree_t(chn) *tree, smem_i *itr)
|
||||
{
|
||||
while (smem_next(itr) > 0) {
|
||||
int i;
|
||||
for (i = 0; i < itr->matches->n; ++i) {
|
||||
bwtintv_t *p = &itr->matches->a[i];
|
||||
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||
int64_t k;
|
||||
if (slen >= opt->min_seed_len || p->x[2] > opt->max_occ) continue;
|
||||
for (k = 0; k < p->x[2]; ++k) {
|
||||
memchain1_t tmp, *lower, *upper;
|
||||
memseed_t c1;
|
||||
int to_add = 0;
|
||||
c1.rbeg = tmp.pos = bwt_sa(itr->bwt, p->x[0] + k);
|
||||
c1.qbeg = p->info>>32;
|
||||
c1.len = slen;
|
||||
if (kb_size(tree)) {
|
||||
kb_intervalp(chn, tree, &tmp, &lower, &upper);
|
||||
if (!test_and_merge(opt, lower, &c1)) to_add = 1;
|
||||
} to_add = 1;
|
||||
if (to_add) {
|
||||
tmp.n = 1; tmp.m = 4;
|
||||
tmp.seeds = calloc(tmp.m, sizeof(memseed_t));
|
||||
kb_putp(chn, tree, &tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memchain_t mem_collect_seed(const memopt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq)
|
||||
{
|
||||
memchain_t chain;
|
||||
smem_i *itr;
|
||||
kbtree_t(chn) *tree;
|
||||
|
||||
memset(&chain, 0, sizeof(memchain_t));
|
||||
if (len < opt->min_seed_len) return chain; // if the query is shorter than the seed length, no match
|
||||
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
||||
itr = smem_itr_init(bwt);
|
||||
smem_set_query(itr, 1, len, seq);
|
||||
|
||||
smem_itr_destroy(itr);
|
||||
kb_destroy(chn, tree);
|
||||
return chain;
|
||||
}
|
||||
|
|
|
|||
17
bwamem.h
17
bwamem.h
|
|
@ -10,11 +10,26 @@ typedef struct {
|
|||
bwtintv_v *tmpvec[2], *matches;
|
||||
} smem_i;
|
||||
|
||||
typedef struct {
|
||||
int64_t qbeg, rbeg, len;
|
||||
} memseed_t;
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r, w;
|
||||
int min_seed_len, max_occ;
|
||||
int min_seed_len, max_occ, max_chain_gap;
|
||||
} memopt_t;
|
||||
|
||||
typedef struct {
|
||||
int n, m;
|
||||
int64_t pos;
|
||||
memseed_t *seeds;
|
||||
} memchain1_t;
|
||||
|
||||
typedef struct {
|
||||
int n, m;
|
||||
memchain1_t *chains;
|
||||
} memchain_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue