alternative way to estimate mapQ
the old mapQ estimate is too conservative
This commit is contained in:
parent
ed78df9184
commit
623da055e1
15
bwamem.c
15
bwamem.c
|
|
@ -63,6 +63,8 @@ mem_opt_t *mem_opt_init()
|
|||
o->chunk_size = 10000000;
|
||||
o->n_threads = 1;
|
||||
o->max_matesw = 100;
|
||||
o->mapQ_coef_len = 100;
|
||||
o->mapQ_coef_fac = log(o->mapQ_coef_len);
|
||||
bwa_fill_scmat(o->a, o->b, o->mat);
|
||||
return o;
|
||||
}
|
||||
|
|
@ -768,9 +770,18 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
|
|||
sub = a->csub > sub? a->csub : sub;
|
||||
if (sub >= a->score) return 0;
|
||||
l = a->qe - a->qb > a->re - a->rb? a->qe - a->qb : a->re - a->rb;
|
||||
mapq = a->score? (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499) : 0;
|
||||
identity = 1. - (double)(l * opt->a - a->score) / (opt->a + opt->b) / l;
|
||||
mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq;
|
||||
if (a->score == 0) {
|
||||
mapq = 0;
|
||||
} else if (opt->mapQ_coef_len > 0) {
|
||||
double tmp;
|
||||
tmp = 6.02 * (a->score - sub) / opt->a * identity;
|
||||
if (l > opt->mapQ_coef_len) tmp *= log(l) / opt->mapQ_coef_fac;
|
||||
mapq = (int)(tmp + .499);
|
||||
} else {
|
||||
mapq = (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499);
|
||||
mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq;
|
||||
}
|
||||
if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499);
|
||||
if (mapq > 60) mapq = 60;
|
||||
if (mapq < 0) mapq = 0;
|
||||
|
|
|
|||
2
bwamem.h
2
bwamem.h
|
|
@ -35,6 +35,8 @@ typedef struct {
|
|||
int chunk_size; // process chunk_size-bp sequences in a batch
|
||||
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
|
||||
float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
|
||||
float mapQ_coef_len;
|
||||
int mapQ_coef_fac;
|
||||
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
||||
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include "bwa.h"
|
||||
#include "bwamem.h"
|
||||
#include "kvec.h"
|
||||
|
|
@ -28,7 +29,7 @@ int main_mem(int argc, char *argv[])
|
|||
void *ko = 0, *ko2 = 0;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||
else if (c == 'w') opt->w = atoi(optarg);
|
||||
else if (c == 'A') opt->a = atoi(optarg);
|
||||
|
|
@ -48,7 +49,10 @@ int main_mem(int argc, char *argv[])
|
|||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||
else if (c == 'r') opt->split_factor = atof(optarg);
|
||||
else if (c == 'C') copy_comment = 1;
|
||||
else if (c == 'L') {
|
||||
else if (c == 'Q') {
|
||||
opt->mapQ_coef_len = atoi(optarg);
|
||||
opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0;
|
||||
} else if (c == 'L') {
|
||||
char *p;
|
||||
opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10);
|
||||
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
|
||||
|
|
|
|||
Loading…
Reference in New Issue