preparation for further changes
This commit is contained in:
parent
292f9061ab
commit
6641788d38
27
bwt.c
27
bwt.c
|
|
@ -277,7 +277,7 @@ static void bwt_reverse_intvs(bwtintv_v *p)
|
|||
}
|
||||
}
|
||||
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2])
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2])
|
||||
{
|
||||
int i, j, c, ret;
|
||||
bwtintv_t ik, ok[4];
|
||||
|
|
@ -285,37 +285,38 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
|
|||
|
||||
mem->n = 0;
|
||||
if (q[x] > 3) return x + 1;
|
||||
if (min_intv < 1) min_intv = 1; // the interval size should be at least 1
|
||||
kv_init(a[0]); kv_init(a[1]);
|
||||
prev = tmpvec[0]? tmpvec[0] : &a[0];
|
||||
curr = tmpvec[1]? tmpvec[1] : &a[1];
|
||||
bwt_set_intv(bwt, q[x], ik);
|
||||
prev = tmpvec && tmpvec[0]? tmpvec[0] : &a[0]; // use the temporary vector if provided
|
||||
curr = tmpvec && tmpvec[1]? tmpvec[1] : &a[1];
|
||||
bwt_set_intv(bwt, q[x], ik); // the initial interval of a single base
|
||||
ik.info = x + 1;
|
||||
|
||||
for (i = x + 1, curr->n = 0; i < len; ++i) { // forward search
|
||||
if (q[i] < 4) {
|
||||
c = 3 - q[i];
|
||||
if (q[i] < 4) { // an A/C/G/T base
|
||||
c = 3 - q[i]; // complement of q[i]
|
||||
bwt_extend(bwt, &ik, ok, 0);
|
||||
if (ok[c].x[2] != ik.x[2]) // change of the interval size
|
||||
kv_push(bwtintv_t, *curr, ik);
|
||||
if (ok[c].x[2] == 0) break; // cannot be extended
|
||||
if (ok[c].x[2] < min_intv) break; // the interval size is too small to be extended further
|
||||
ik = ok[c]; ik.info = i + 1;
|
||||
} else { // an ambiguous base
|
||||
kv_push(bwtintv_t, *curr, ik);
|
||||
break; // cannot be extended; in this case, i<len always stands
|
||||
break; // always terminate extension at an ambiguous base; in this case, i<len always stands
|
||||
}
|
||||
}
|
||||
if (i == len) kv_push(bwtintv_t, *curr, ik); // push the last interval if we reach the end
|
||||
bwt_reverse_intvs(curr); // s.t. smaller intervals visited first
|
||||
bwt_reverse_intvs(curr); // s.t. smaller intervals (i.e. longer matches) visited first
|
||||
ret = curr->a[0].info; // this will be the returned value
|
||||
swap = curr; curr = prev; prev = swap;
|
||||
|
||||
for (i = x - 1; i >= -1; --i) { // backward search for MEMs
|
||||
if (q[i] > 3) break;
|
||||
if (i >= 0 && q[i] > 3) break; // always stop at an ambiguous base as the FM-index does not have any.
|
||||
c = i < 0? 0 : q[i];
|
||||
for (j = 0, curr->n = 0; j < prev->n; ++j) {
|
||||
bwtintv_t *p = &prev->a[j];
|
||||
bwt_extend(bwt, p, ok, 1);
|
||||
if (ok[c].x[2] == 0 || i == -1) { // keep the hit if reaching the beginning or not extended further
|
||||
if (ok[c].x[2] < min_intv || i == -1) { // keep the hit if reaching the beginning or not extended further
|
||||
if (curr->n == 0) { // curr->n to make sure there is no longer matches
|
||||
if (mem->n == 0 || i + 1 < mem->a[mem->n-1].info>>32) { // skip contained matches
|
||||
ik = *p; ik.info |= (uint64_t)(i + 1)<<32;
|
||||
|
|
@ -333,7 +334,7 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem
|
|||
}
|
||||
bwt_reverse_intvs(mem); // s.t. sorted by the start coordinate
|
||||
|
||||
if (tmpvec[0] == 0) free(a[0].a);
|
||||
if (tmpvec[1] == 0) free(a[1].a);
|
||||
if (tmpvec == 0 || tmpvec[0] == 0) free(a[0].a);
|
||||
if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a);
|
||||
return ret;
|
||||
}
|
||||
|
|
|
|||
2
bwt.h
2
bwt.h
|
|
@ -121,7 +121,7 @@ extern "C" {
|
|||
* Given a query _q_, collect potential SMEMs covering position _x_ and store them in _mem_.
|
||||
* Return the end of the longest exact match starting from _x_.
|
||||
*/
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||
int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, bwtintv_v *mem, bwtintv_v *tmpvec[2]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
16
fastmap.c
16
fastmap.c
|
|
@ -13,11 +13,11 @@ extern unsigned char nst_nt4_table[256];
|
|||
typedef struct {
|
||||
const bwt_t *bwt;
|
||||
const uint8_t *query;
|
||||
int start, len;
|
||||
int start, len, min_intv;
|
||||
bwtintv_v *tmpvec[2], *matches;
|
||||
} smem_i;
|
||||
|
||||
smem_i *smem_iter_init(const bwt_t *bwt)
|
||||
smem_i *smem_iter_init(const bwt_t *bwt, int min_intv)
|
||||
{
|
||||
smem_i *iter;
|
||||
iter = calloc(1, sizeof(smem_i));
|
||||
|
|
@ -25,6 +25,7 @@ smem_i *smem_iter_init(const bwt_t *bwt)
|
|||
iter->tmpvec[0] = calloc(1, sizeof(bwtintv_v));
|
||||
iter->tmpvec[1] = calloc(1, sizeof(bwtintv_v));
|
||||
iter->matches = calloc(1, sizeof(bwtintv_v));
|
||||
iter->min_intv = min_intv > 0? min_intv : 1;
|
||||
return iter;
|
||||
}
|
||||
|
||||
|
|
@ -49,13 +50,13 @@ int smem_next(smem_i *iter)
|
|||
if (iter->start >= iter->len || iter->start < 0) return -1;
|
||||
while (iter->start < iter->len && iter->query[iter->start] > 3) ++iter->start; // skip ambiguous bases
|
||||
if (iter->start == iter->len) return -1;
|
||||
iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->matches, iter->tmpvec);
|
||||
iter->start = bwt_smem1(iter->bwt, iter->len, iter->query, iter->start, iter->min_intv, iter->matches, iter->tmpvec);
|
||||
return iter->start;
|
||||
}
|
||||
|
||||
int main_fastmap(int argc, char *argv[])
|
||||
{
|
||||
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0;
|
||||
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, min_intv = 1;
|
||||
kseq_t *seq;
|
||||
bwtint_t k;
|
||||
gzFile fp;
|
||||
|
|
@ -63,15 +64,16 @@ int main_fastmap(int argc, char *argv[])
|
|||
bntseq_t *bns;
|
||||
smem_i *iter;
|
||||
|
||||
while ((c = getopt(argc, argv, "w:l:s")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "w:l:sm:")) >= 0) {
|
||||
switch (c) {
|
||||
case 's': print_seq = 1; break;
|
||||
case 'w': min_iwidth = atoi(optarg); break;
|
||||
case 'l': min_len = atoi(optarg); break;
|
||||
case 'm': min_intv = atoi(optarg); break;
|
||||
}
|
||||
}
|
||||
if (optind + 1 >= argc) {
|
||||
fprintf(stderr, "Usage: bwa fastmap [-s] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", min_len, min_iwidth);
|
||||
fprintf(stderr, "Usage: bwa fastmap [-s] [-l minLen=%d] [-w maxSaSize=%d] [-m minIntv=%d] <idxbase> <in.fq>\n", min_len, min_iwidth, min_intv);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -86,7 +88,7 @@ int main_fastmap(int argc, char *argv[])
|
|||
free(tmp);
|
||||
bns = bns_restore(argv[optind]);
|
||||
}
|
||||
iter = smem_iter_init(bwt);
|
||||
iter = smem_iter_init(bwt, min_intv);
|
||||
while (kseq_read(seq) >= 0) {
|
||||
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
||||
if (print_seq) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue