dev-r442: suppress exact full-length matches
This commit is contained in:
parent
1c19bc630f
commit
2e9463ebf1
21
bwamem.c
21
bwamem.c
|
|
@ -111,7 +111,7 @@ void smem_set_query(smem_i *itr, int len, const uint8_t *query)
|
|||
itr->len = len;
|
||||
}
|
||||
|
||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
||||
const bwtintv_v *smem_next2(smem_i *itr, int split_len, int split_width, int start_width)
|
||||
{
|
||||
int i, max, max_i, ori_start;
|
||||
itr->tmpvec[0]->n = itr->tmpvec[1]->n = itr->matches->n = itr->sub->n = 0;
|
||||
|
|
@ -119,7 +119,7 @@ const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
|||
while (itr->start < itr->len && itr->query[itr->start] > 3) ++itr->start; // skip ambiguous bases
|
||||
if (itr->start == itr->len) return 0;
|
||||
ori_start = itr->start;
|
||||
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, 1, itr->matches, itr->tmpvec); // search for SMEM
|
||||
itr->start = bwt_smem1(itr->bwt, itr->len, itr->query, ori_start, start_width, itr->matches, itr->tmpvec); // search for SMEM
|
||||
if (itr->matches->n == 0) return itr->matches; // well, in theory, we should never come here
|
||||
for (i = max = 0, max_i = 0; i < itr->matches->n; ++i) { // look for the longest match
|
||||
bwtintv_t *p = &itr->matches->a[i];
|
||||
|
|
@ -152,6 +152,11 @@ const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
|||
return itr->matches;
|
||||
}
|
||||
|
||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
||||
{
|
||||
return smem_next2(itr, split_len, split_width, 1);
|
||||
}
|
||||
|
||||
/********************************
|
||||
* Chaining while finding SMEMs *
|
||||
********************************/
|
||||
|
|
@ -200,8 +205,9 @@ static void mem_insert_seed(const mem_opt_t *opt, int64_t l_pac, kbtree_t(chn) *
|
|||
{
|
||||
const bwtintv_v *a;
|
||||
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
||||
int start_width = (opt->flag & MEM_F_NO_EXACT)? 2 : 1;
|
||||
split_len = split_len < itr->len? split_len : itr->len;
|
||||
while ((a = smem_next(itr, split_len, opt->split_width)) != 0) { // to find all SMEM and some internal MEM
|
||||
while ((a = smem_next2(itr, split_len, opt->split_width, start_width)) != 0) { // to find all SMEM and some internal MEM
|
||||
int i;
|
||||
for (i = 0; i < a->n; ++i) { // go through each SMEM/MEM up to itr->start
|
||||
bwtintv_t *p = &a->a[i];
|
||||
|
|
@ -425,6 +431,13 @@ int mem_sort_and_dedup(int n, mem_alnreg_t *a, float mask_level_redun)
|
|||
return m;
|
||||
}
|
||||
|
||||
int mem_test_and_remove_exact(const mem_opt_t *opt, int n, mem_alnreg_t *a, int qlen)
|
||||
{
|
||||
if (!(opt->flag & MEM_F_NO_EXACT) || n == 0 || a->truesc != qlen * opt->a) return n;
|
||||
memmove(a, a + 1, (n - 1) * sizeof(mem_alnreg_t));
|
||||
return n - 1;
|
||||
}
|
||||
|
||||
void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id) // IMPORTANT: must run mem_sort_and_dedup() before calling this function
|
||||
{ // similar to the loop in mem_chain_flt()
|
||||
int i, k, tmp;
|
||||
|
|
@ -894,6 +907,8 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse
|
|||
}
|
||||
free(chn.a);
|
||||
regs.n = mem_sort_and_dedup(regs.n, regs.a, opt->mask_level_redun);
|
||||
if (opt->flag & MEM_F_NO_EXACT)
|
||||
regs.n = mem_test_and_remove_exact(opt, regs.n, regs.a, l_seq);
|
||||
return regs;
|
||||
}
|
||||
|
||||
|
|
|
|||
1
bwamem.h
1
bwamem.h
|
|
@ -16,6 +16,7 @@ typedef struct __smem_i smem_i;
|
|||
#define MEM_F_ALL 0x8
|
||||
#define MEM_F_NO_MULTI 0x10
|
||||
#define MEM_F_NO_RESCUE 0x20
|
||||
#define MEM_F_NO_EXACT 0x40
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ int main_mem(int argc, char *argv[])
|
|||
int64_t n_processed = 0;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'w') opt->w = atoi(optarg);
|
||||
else if (c == 'A') opt->a = atoi(optarg);
|
||||
|
|
@ -45,6 +45,7 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'p') opt->flag |= MEM_F_PE;
|
||||
else if (c == 'M') opt->flag |= MEM_F_NO_MULTI;
|
||||
else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE;
|
||||
else if (c == 'e') opt->flag |= MEM_F_NO_EXACT;
|
||||
else if (c == 'c') opt->max_occ = atoi(optarg);
|
||||
else if (c == 'd') opt->zdrop = atoi(optarg);
|
||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||
|
|
@ -81,6 +82,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, " -m INT perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw);
|
||||
fprintf(stderr, " -S skip mate rescue\n");
|
||||
fprintf(stderr, " -P skip pairing; mate rescue performed unless -S also in use\n");
|
||||
fprintf(stderr, " -e discard full-length exact matches\n");
|
||||
fprintf(stderr, " -A INT score for a sequence match [%d]\n", opt->a);
|
||||
fprintf(stderr, " -B INT penalty for a mismatch [%d]\n", opt->b);
|
||||
fprintf(stderr, " -O INT gap open penalty [%d]\n", opt->q);
|
||||
|
|
|
|||
Loading…
Reference in New Issue