dev-458: simplified the smem iterator
simpler but less powful.
This commit is contained in:
parent
acfe7613db
commit
b3225581be
34
bwamem.c
34
bwamem.c
|
|
@ -116,7 +116,7 @@ void smem_set_query(smem_i *itr, int len, const uint8_t *query)
|
|||
itr->len = len;
|
||||
}
|
||||
|
||||
const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int start_width)
|
||||
const bwtintv_v *smem_next(smem_i *itr)
|
||||
{
|
||||
int i, max, max_i, ori_start;
|
||||
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
|
||||
|
|
@ -124,44 +124,16 @@ const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int sta
|
|||
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
||||
if (itr->start == itr->len) return 0;
|
||||
ori_start = itr->start;
|
||||
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, start_width, itr->matches, itr->tmpvec); // search for SMEM
|
||||
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, 1, itr->matches, itr->tmpvec); // search for SMEM
|
||||
if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
|
||||
for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
|
||||
bwtintv_t *p = &itr->matches->a[i];
|
||||
int len = (uint32_t)p->info - (p->info>>32);
|
||||
if (max < len) max = len, max_i = i;
|
||||
}
|
||||
if (split_len > 0 && max >= split_len && itr->matches->a[max_i].x[2] <= split_width) { // if the longest SMEM is unique and long
|
||||
int j;
|
||||
bwtintv_v *a = itr->tmpvec[0]; // reuse tmpvec[0] for merging
|
||||
bwtintv_t *p = &itr->matches->a[max_i];
|
||||
bwt_smem1(itr->bwt, itr->len, itr->query, ((uint32_t)p->info + (p->info>>32))>>1, itr->matches->a[max_i].x[2]+1, itr->sub, itr->tmpvec); // starting from the middle of the longest MEM
|
||||
i = j = 0; a->n = 0;
|
||||
while (i < itr->matches->n && j < itr->sub->n) { // ordered merge
|
||||
int64_t xi = itr->matches->a[i].info>>32<<32 | (itr->len - (uint32_t)itr->matches->a[i].info);
|
||||
int64_t xj = itr->sub->a[j].info>>32<<32 | (itr->len - (uint32_t)itr->sub->a[j].info);
|
||||
if (xi < xj) {
|
||||
kv_push(bwtintv_t, *a, itr->matches->a[i]);
|
||||
++i;
|
||||
} else if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start) {
|
||||
kv_push(bwtintv_t, *a, itr->sub->a[j]);
|
||||
++j;
|
||||
} else ++j;
|
||||
}
|
||||
for (; i < itr->matches->n; ++i) kv_push(bwtintv_t, *a, itr->matches->a[i]);
|
||||
for (; j < itr->sub->n; ++j)
|
||||
if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start)
|
||||
kv_push(bwtintv_t, *a, itr->sub->a[j]);
|
||||
kv_copy(bwtintv_t, *itr->matches, *a);
|
||||
}
|
||||
return itr->matches;
|
||||
}
|
||||
|
||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
||||
{
|
||||
return smem_next2(itr, split_len, split_width, 1);
|
||||
}
|
||||
|
||||
/***************************
|
||||
* Collection SA invervals *
|
||||
***************************/
|
||||
|
|
@ -410,7 +382,7 @@ int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *chains)
|
|||
int e_min = a[j].end < a[i].end? a[j].end : a[i].end;
|
||||
if (e_min > b_max) { // have overlap
|
||||
int min_l = a[i].end - a[i].beg < a[j].end - a[j].beg? a[i].end - a[i].beg : a[j].end - a[j].beg;
|
||||
if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap
|
||||
if (e_min - b_max >= min_l * opt->mask_level && min_l < opt->max_chain_gap) { // significant overlap
|
||||
if (a[j].p2 == 0) a[j].p2 = a[i].p;
|
||||
if (a[i].w < a[j].w * opt->chain_drop_ratio && a[j].w - a[i].w >= opt->min_seed_len<<1)
|
||||
break;
|
||||
|
|
|
|||
2
bwamem.h
2
bwamem.h
|
|
@ -86,7 +86,7 @@ extern "C" {
|
|||
smem_i *smem_itr_init(const bwt_t *bwt);
|
||||
void smem_itr_destroy(smem_i *itr);
|
||||
void smem_set_query(smem_i *itr, int len, const uint8_t *query);
|
||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width);
|
||||
const bwtintv_v *smem_next(smem_i *itr);
|
||||
|
||||
mem_opt_t *mem_opt_init(void);
|
||||
void mem_fill_scmat(int a, int b, int8_t mat[25]);
|
||||
|
|
|
|||
12
fastmap.c
12
fastmap.c
|
|
@ -37,7 +37,7 @@ int main_mem(int argc, char *argv[])
|
|||
opt = mem_opt_init();
|
||||
opt0.a = opt0.b = opt0.o_del = opt0.e_del = opt0.o_ins = opt0.e_ins = opt0.pen_unpaired = -1;
|
||||
opt0.pen_clip5 = opt0.pen_clip3 = opt0.zdrop = opt0.T = -1;
|
||||
while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'w') opt->w = atoi(optarg);
|
||||
else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1;
|
||||
|
|
@ -58,6 +58,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'D') opt->chain_drop_ratio = atof(optarg);
|
||||
else if (c == 'm') opt->max_matesw = atoi(optarg);
|
||||
else if (c == 's') opt->split_width = atoi(optarg);
|
||||
else if (c == 'N') opt->max_chain_gap = atoi(optarg);
|
||||
else if (c == 'C') copy_comment = 1;
|
||||
else if (c == 'Q') {
|
||||
opt->mapQ_coef_len = atoi(optarg);
|
||||
|
|
@ -215,7 +216,7 @@ int main_mem(int argc, char *argv[])
|
|||
|
||||
int main_fastmap(int argc, char *argv[])
|
||||
{
|
||||
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, split_width = 0;
|
||||
int c, i, min_iwidth = 20, min_len = 17, print_seq = 0;
|
||||
kseq_t *seq;
|
||||
bwtint_t k;
|
||||
gzFile fp;
|
||||
|
|
@ -223,9 +224,8 @@ int main_fastmap(int argc, char *argv[])
|
|||
const bwtintv_v *a;
|
||||
bwaidx_t *idx;
|
||||
|
||||
while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "w:l:p")) >= 0) {
|
||||
switch (c) {
|
||||
case 's': split_width = atoi(optarg); break;
|
||||
case 'p': print_seq = 1; break;
|
||||
case 'w': min_iwidth = atoi(optarg); break;
|
||||
case 'l': min_len = atoi(optarg); break;
|
||||
|
|
@ -233,7 +233,7 @@ int main_fastmap(int argc, char *argv[])
|
|||
}
|
||||
}
|
||||
if (optind + 1 >= argc) {
|
||||
fprintf(stderr, "Usage: bwa fastmap [-p] [-s splitWidth=%d] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", split_width, min_len, min_iwidth);
|
||||
fprintf(stderr, "Usage: bwa fastmap [-p] [-l minLen=%d] [-w maxSaSize=%d] <idxbase> <in.fq>\n", min_len, min_iwidth);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
@ -250,7 +250,7 @@ int main_fastmap(int argc, char *argv[])
|
|||
for (i = 0; i < seq->seq.l; ++i)
|
||||
seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]];
|
||||
smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s);
|
||||
while ((a = smem_next(itr, min_len<<1, split_width)) != 0) {
|
||||
while ((a = smem_next(itr)) != 0) {
|
||||
for (i = 0; i < a->n; ++i) {
|
||||
bwtintv_t *p = &a->a[i];
|
||||
if ((uint32_t)p->info - (p->info>>32) < min_len) continue;
|
||||
|
|
|
|||
Loading…
Reference in New Issue