r278: don't perform too many mate-sw
This commit is contained in:
parent
e9e5ee6a3d
commit
9957e04590
24
bwamem.c
24
bwamem.c
|
|
@ -14,17 +14,6 @@
|
|||
#include "kvec.h"
|
||||
#include "ksort.h"
|
||||
|
||||
void mem_fill_scmat(int a, int b, int8_t mat[25])
|
||||
{
|
||||
int i, j, k;
|
||||
for (i = k = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
mat[k++] = i == j? a : -b;
|
||||
mat[k++] = 0; // ambiguous base
|
||||
}
|
||||
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||
}
|
||||
|
||||
/* Theory on probability and scoring *ungapped* alignment
|
||||
*
|
||||
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
|
||||
|
|
@ -64,12 +53,23 @@ mem_opt_t *mem_opt_init()
|
|||
o->split_factor = 1.5;
|
||||
o->chunk_size = 10000000;
|
||||
o->n_threads = 1;
|
||||
o->pe_dir = 0<<1|1;
|
||||
o->pen_unpaired = 9;
|
||||
o->max_matesw = 100;
|
||||
mem_fill_scmat(o->a, o->b, o->mat);
|
||||
return o;
|
||||
}
|
||||
|
||||
void mem_fill_scmat(int a, int b, int8_t mat[25])
|
||||
{
|
||||
int i, j, k;
|
||||
for (i = k = 0; i < 4; ++i) {
|
||||
for (j = 0; j < 4; ++j)
|
||||
mat[k++] = i == j? a : -b;
|
||||
mat[k++] = 0; // ambiguous base
|
||||
}
|
||||
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||
}
|
||||
|
||||
/***************************
|
||||
* SMEM iterator interface *
|
||||
***************************/
|
||||
|
|
|
|||
28
bwamem.h
28
bwamem.h
|
|
@ -18,18 +18,22 @@ typedef struct __smem_i smem_i;
|
|||
#define MEM_F_NO_MULTI 0x16
|
||||
|
||||
typedef struct {
|
||||
int a, b, q, r, w;
|
||||
int flag;
|
||||
int split_width;
|
||||
int min_seed_len, max_occ, max_chain_gap;
|
||||
int n_threads, chunk_size;
|
||||
int pe_dir;
|
||||
float mask_level;
|
||||
float chain_drop_ratio;
|
||||
float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor
|
||||
int pen_unpaired; // phred-scaled penalty for unpaired reads
|
||||
int max_ins; // maximum insert size
|
||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||
int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r
|
||||
int w; // band width
|
||||
int flag; // see MEM_F_* macros
|
||||
int min_seed_len; // minimum seed length
|
||||
float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor
|
||||
int split_width; // split into a seed if its occurence is smaller than this value
|
||||
int max_occ; // skip a seed if its occurence is larger than this value
|
||||
int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed
|
||||
int n_threads; // number of threads
|
||||
int chunk_size; // process chunk_size-bp sequences in a batch
|
||||
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
|
||||
float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
|
||||
int pen_unpaired; // phred-scaled penalty for unpaired reads
|
||||
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
||||
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||
} mem_opt_t;
|
||||
|
||||
typedef struct {
|
||||
|
|
|
|||
|
|
@ -246,7 +246,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired)
|
||||
kv_push(mem_alnreg_t, b[i], a[i].a[j]);
|
||||
for (i = 0; i < 2; ++i)
|
||||
for (j = 0; j < b[i].n; ++j)
|
||||
for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
|
||||
n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
||||
free(b[0].a); free(b[1].a);
|
||||
mem_mark_primary_se(opt, a[0].n, a[0].a);
|
||||
|
|
|
|||
|
|
@ -26,8 +26,9 @@ int main_mem(int argc, char *argv[])
|
|||
void *ko = 0, *ko2 = 0;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:w:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'w') opt->w = atoi(optarg);
|
||||
else if (c == 'A') opt->a = atoi(optarg);
|
||||
else if (c == 'B') opt->b = atoi(optarg);
|
||||
else if (c == 'O') opt->q = atoi(optarg);
|
||||
|
|
@ -52,6 +53,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, "Algorithm options:\n\n");
|
||||
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
||||
fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len);
|
||||
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
|
||||
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
||||
fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
||||
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
||||
|
|
|
|||
Loading…
Reference in New Issue