r278: don't perform too many mate-sw
This commit is contained in:
parent
e9e5ee6a3d
commit
9957e04590
24
bwamem.c
24
bwamem.c
|
|
@ -14,17 +14,6 @@
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "ksort.h"
|
#include "ksort.h"
|
||||||
|
|
||||||
void mem_fill_scmat(int a, int b, int8_t mat[25])
|
|
||||||
{
|
|
||||||
int i, j, k;
|
|
||||||
for (i = k = 0; i < 4; ++i) {
|
|
||||||
for (j = 0; j < 4; ++j)
|
|
||||||
mat[k++] = i == j? a : -b;
|
|
||||||
mat[k++] = 0; // ambiguous base
|
|
||||||
}
|
|
||||||
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Theory on probability and scoring *ungapped* alignment
|
/* Theory on probability and scoring *ungapped* alignment
|
||||||
*
|
*
|
||||||
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
|
* s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution
|
||||||
|
|
@ -64,12 +53,23 @@ mem_opt_t *mem_opt_init()
|
||||||
o->split_factor = 1.5;
|
o->split_factor = 1.5;
|
||||||
o->chunk_size = 10000000;
|
o->chunk_size = 10000000;
|
||||||
o->n_threads = 1;
|
o->n_threads = 1;
|
||||||
o->pe_dir = 0<<1|1;
|
|
||||||
o->pen_unpaired = 9;
|
o->pen_unpaired = 9;
|
||||||
|
o->max_matesw = 100;
|
||||||
mem_fill_scmat(o->a, o->b, o->mat);
|
mem_fill_scmat(o->a, o->b, o->mat);
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void mem_fill_scmat(int a, int b, int8_t mat[25])
|
||||||
|
{
|
||||||
|
int i, j, k;
|
||||||
|
for (i = k = 0; i < 4; ++i) {
|
||||||
|
for (j = 0; j < 4; ++j)
|
||||||
|
mat[k++] = i == j? a : -b;
|
||||||
|
mat[k++] = 0; // ambiguous base
|
||||||
|
}
|
||||||
|
for (j = 0; j < 5; ++j) mat[k++] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
/***************************
|
/***************************
|
||||||
* SMEM iterator interface *
|
* SMEM iterator interface *
|
||||||
***************************/
|
***************************/
|
||||||
|
|
|
||||||
22
bwamem.h
22
bwamem.h
|
|
@ -18,17 +18,21 @@ typedef struct __smem_i smem_i;
|
||||||
#define MEM_F_NO_MULTI 0x16
|
#define MEM_F_NO_MULTI 0x16
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int a, b, q, r, w;
|
int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r
|
||||||
int flag;
|
int w; // band width
|
||||||
int split_width;
|
int flag; // see MEM_F_* macros
|
||||||
int min_seed_len, max_occ, max_chain_gap;
|
int min_seed_len; // minimum seed length
|
||||||
int n_threads, chunk_size;
|
|
||||||
int pe_dir;
|
|
||||||
float mask_level;
|
|
||||||
float chain_drop_ratio;
|
|
||||||
float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor
|
float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor
|
||||||
|
int split_width; // split into a seed if its occurence is smaller than this value
|
||||||
|
int max_occ; // skip a seed if its occurence is larger than this value
|
||||||
|
int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed
|
||||||
|
int n_threads; // number of threads
|
||||||
|
int chunk_size; // process chunk_size-bp sequences in a batch
|
||||||
|
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
|
||||||
|
float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
|
||||||
int pen_unpaired; // phred-scaled penalty for unpaired reads
|
int pen_unpaired; // phred-scaled penalty for unpaired reads
|
||||||
int max_ins; // maximum insert size
|
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
||||||
|
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||||
} mem_opt_t;
|
} mem_opt_t;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -246,7 +246,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
||||||
if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired)
|
if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired)
|
||||||
kv_push(mem_alnreg_t, b[i], a[i].a[j]);
|
kv_push(mem_alnreg_t, b[i], a[i].a[j]);
|
||||||
for (i = 0; i < 2; ++i)
|
for (i = 0; i < 2; ++i)
|
||||||
for (j = 0; j < b[i].n; ++j)
|
for (j = 0; j < b[i].n && j < opt->max_matesw; ++j)
|
||||||
n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
||||||
free(b[0].a); free(b[1].a);
|
free(b[0].a); free(b[1].a);
|
||||||
mem_mark_primary_se(opt, a[0].n, a[0].a);
|
mem_mark_primary_se(opt, a[0].n, a[0].a);
|
||||||
|
|
|
||||||
|
|
@ -26,8 +26,9 @@ int main_mem(int argc, char *argv[])
|
||||||
void *ko = 0, *ko2 = 0;
|
void *ko = 0, *ko2 = 0;
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:")) >= 0) {
|
while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:w:")) >= 0) {
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||||
|
else if (c == 'w') opt->w = atoi(optarg);
|
||||||
else if (c == 'A') opt->a = atoi(optarg);
|
else if (c == 'A') opt->a = atoi(optarg);
|
||||||
else if (c == 'B') opt->b = atoi(optarg);
|
else if (c == 'B') opt->b = atoi(optarg);
|
||||||
else if (c == 'O') opt->q = atoi(optarg);
|
else if (c == 'O') opt->q = atoi(optarg);
|
||||||
|
|
@ -52,6 +53,7 @@ int main_mem(int argc, char *argv[])
|
||||||
fprintf(stderr, "Algorithm options:\n\n");
|
fprintf(stderr, "Algorithm options:\n\n");
|
||||||
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads);
|
||||||
fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len);
|
fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len);
|
||||||
|
fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w);
|
||||||
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor);
|
||||||
fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width);
|
||||||
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue