diff --git a/bwamem.c b/bwamem.c index ea936f1..6abcf00 100644 --- a/bwamem.c +++ b/bwamem.c @@ -116,7 +116,7 @@ void smem_set_query(smem_i *itr, int len, const uint8_t *query) itr->len = len; } -const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int start_width) +const bwtintv_v *smem_next(smem_i *itr) { int i, max, max_i, ori_start; itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0; @@ -124,44 +124,16 @@ const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int sta while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases if (itr->start == itr->len) return 0; ori_start = itr->start; - itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, start_width, itr->matches, itr->tmpvec); // search for SMEM + itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, 1, itr->matches, itr->tmpvec); // search for SMEM if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match bwtintv_t *p = &itr->matches->a[i]; int len = (uint32_t)p->info - (p->info>>32); if (max < len) max = len, max_i = i; } - if (split_len > 0 && max >= split_len && itr->matches->a[max_i].x[2] <= split_width) { // if the longest SMEM is unique and long - int j; - bwtintv_v *a = itr->tmpvec[0]; // reuse tmpvec[0] for merging - bwtintv_t *p = &itr->matches->a[max_i]; - bwt_smem1(itr->bwt, itr->len, itr->query, ((uint32_t)p->info + (p->info>>32))>>1, itr->matches->a[max_i].x[2]+1, itr->sub, itr->tmpvec); // starting from the middle of the longest MEM - i = j = 0; a->n = 0; - while (i < itr->matches->n && j < itr->sub->n) { // ordered merge - int64_t xi = itr->matches->a[i].info>>32<<32 | (itr->len - (uint32_t)itr->matches->a[i].info); - int64_t xj = itr->sub->a[j].info>>32<<32 | (itr->len - (uint32_t)itr->sub->a[j].info); - if (xi < xj) { - kv_push(bwtintv_t, *a, itr->matches->a[i]); - ++i; - } else if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start) { - kv_push(bwtintv_t, *a, itr->sub->a[j]); - ++j; - } else ++j; - } - for (; i < itr->matches->n; ++i) kv_push(bwtintv_t, *a, itr->matches->a[i]); - for (; j < itr->sub->n; ++j) - if ((uint32_t)itr->sub->a[j].info - (itr->sub->a[j].info>>32) >= max>>1 && (uint32_t)itr->sub->a[j].info > ori_start) - kv_push(bwtintv_t, *a, itr->sub->a[j]); - kv_copy(bwtintv_t, *itr->matches, *a); - } return itr->matches; } -const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width) -{ - return smem_next2(itr, split_len, split_width, 1); -} - /*************************** * Collection SA invervals * ***************************/ @@ -410,7 +382,7 @@ int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *chains) int e_min = a[j].end < a[i].end? a[j].end : a[i].end; if (e_min > b_max) { // have overlap int min_l = a[i].end - a[i].beg < a[j].end - a[j].beg? a[i].end - a[i].beg : a[j].end - a[j].beg; - if (e_min - b_max >= min_l * opt->mask_level) { // significant overlap + if (e_min - b_max >= min_l * opt->mask_level && min_l < opt->max_chain_gap) { // significant overlap if (a[j].p2 == 0) a[j].p2 = a[i].p; if (a[i].w < a[j].w * opt->chain_drop_ratio && a[j].w - a[i].w >= opt->min_seed_len<<1) break; diff --git a/bwamem.h b/bwamem.h index 5291491..86e7d47 100644 --- a/bwamem.h +++ b/bwamem.h @@ -86,7 +86,7 @@ extern "C" { smem_i *smem_itr_init(const bwt_t *bwt); void smem_itr_destroy(smem_i *itr); void smem_set_query(smem_i *itr, int len, const uint8_t *query); - const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width); + const bwtintv_v *smem_next(smem_i *itr); mem_opt_t *mem_opt_init(void); void mem_fill_scmat(int a, int b, int8_t mat[25]); diff --git a/fastmap.c b/fastmap.c index 093fb7b..5aac1f7 100644 --- a/fastmap.c +++ b/fastmap.c @@ -37,7 +37,7 @@ int main_mem(int argc, char *argv[]) opt = mem_opt_init(); opt0.a = opt0.b = opt0.o_del = opt0.e_del = opt0.o_ins = opt0.e_ins = opt0.pen_unpaired = -1; opt0.pen_clip5 = opt0.pen_clip3 = opt0.zdrop = opt0.T = -1; - while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:")) >= 0) { + while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 'w') opt->w = atoi(optarg); else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1; @@ -58,6 +58,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'D') opt->chain_drop_ratio = atof(optarg); else if (c == 'm') opt->max_matesw = atoi(optarg); else if (c == 's') opt->split_width = atoi(optarg); + else if (c == 'N') opt->max_chain_gap = atoi(optarg); else if (c == 'C') copy_comment = 1; else if (c == 'Q') { opt->mapQ_coef_len = atoi(optarg); @@ -215,7 +216,7 @@ int main_mem(int argc, char *argv[]) int main_fastmap(int argc, char *argv[]) { - int c, i, min_iwidth = 20, min_len = 17, print_seq = 0, split_width = 0; + int c, i, min_iwidth = 20, min_len = 17, print_seq = 0; kseq_t *seq; bwtint_t k; gzFile fp; @@ -223,9 +224,8 @@ int main_fastmap(int argc, char *argv[]) const bwtintv_v *a; bwaidx_t *idx; - while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) { + while ((c = getopt(argc, argv, "w:l:p")) >= 0) { switch (c) { - case 's': split_width = atoi(optarg); break; case 'p': print_seq = 1; break; case 'w': min_iwidth = atoi(optarg); break; case 'l': min_len = atoi(optarg); break; @@ -233,7 +233,7 @@ int main_fastmap(int argc, char *argv[]) } } if (optind + 1 >= argc) { - fprintf(stderr, "Usage: bwa fastmap [-p] [-s splitWidth=%d] [-l minLen=%d] [-w maxSaSize=%d] \n", split_width, min_len, min_iwidth); + fprintf(stderr, "Usage: bwa fastmap [-p] [-l minLen=%d] [-w maxSaSize=%d] \n", min_len, min_iwidth); return 1; } @@ -250,7 +250,7 @@ int main_fastmap(int argc, char *argv[]) for (i = 0; i < seq->seq.l; ++i) seq->seq.s[i] = nst_nt4_table[(int)seq->seq.s[i]]; smem_set_query(itr, seq->seq.l, (uint8_t*)seq->seq.s); - while ((a = smem_next(itr, min_len<<1, split_width)) != 0) { + while ((a = smem_next(itr)) != 0) { for (i = 0; i < a->n; ++i) { bwtintv_t *p = &a->a[i]; if ((uint32_t)p->info - (p->info>>32) < min_len) continue; diff --git a/main.c b/main.c index 1923384..673cb8a 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r457" +#define PACKAGE_VERSION "0.7.8+dev-r458" #endif int bwa_fa2pac(int argc, char *argv[]);