alternative way to estimate mapQ
the old mapQ estimate is too conservative
This commit is contained in:
parent
ed78df9184
commit
623da055e1
13
bwamem.c
13
bwamem.c
|
|
@ -63,6 +63,8 @@ mem_opt_t *mem_opt_init()
|
||||||
o->chunk_size = 10000000;
|
o->chunk_size = 10000000;
|
||||||
o->n_threads = 1;
|
o->n_threads = 1;
|
||||||
o->max_matesw = 100;
|
o->max_matesw = 100;
|
||||||
|
o->mapQ_coef_len = 100;
|
||||||
|
o->mapQ_coef_fac = log(o->mapQ_coef_len);
|
||||||
bwa_fill_scmat(o->a, o->b, o->mat);
|
bwa_fill_scmat(o->a, o->b, o->mat);
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
@ -768,9 +770,18 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
|
||||||
sub = a->csub > sub? a->csub : sub;
|
sub = a->csub > sub? a->csub : sub;
|
||||||
if (sub >= a->score) return 0;
|
if (sub >= a->score) return 0;
|
||||||
l = a->qe - a->qb > a->re - a->rb? a->qe - a->qb : a->re - a->rb;
|
l = a->qe - a->qb > a->re - a->rb? a->qe - a->qb : a->re - a->rb;
|
||||||
mapq = a->score? (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499) : 0;
|
|
||||||
identity = 1. - (double)(l * opt->a - a->score) / (opt->a + opt->b) / l;
|
identity = 1. - (double)(l * opt->a - a->score) / (opt->a + opt->b) / l;
|
||||||
|
if (a->score == 0) {
|
||||||
|
mapq = 0;
|
||||||
|
} else if (opt->mapQ_coef_len > 0) {
|
||||||
|
double tmp;
|
||||||
|
tmp = 6.02 * (a->score - sub) / opt->a * identity;
|
||||||
|
if (l > opt->mapQ_coef_len) tmp *= log(l) / opt->mapQ_coef_fac;
|
||||||
|
mapq = (int)(tmp + .499);
|
||||||
|
} else {
|
||||||
|
mapq = (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499);
|
||||||
mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq;
|
mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq;
|
||||||
|
}
|
||||||
if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499);
|
if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499);
|
||||||
if (mapq > 60) mapq = 60;
|
if (mapq > 60) mapq = 60;
|
||||||
if (mapq < 0) mapq = 0;
|
if (mapq < 0) mapq = 0;
|
||||||
|
|
|
||||||
2
bwamem.h
2
bwamem.h
|
|
@ -35,6 +35,8 @@ typedef struct {
|
||||||
int chunk_size; // process chunk_size-bp sequences in a batch
|
int chunk_size; // process chunk_size-bp sequences in a batch
|
||||||
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
|
float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits
|
||||||
float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
|
float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain
|
||||||
|
float mapQ_coef_len;
|
||||||
|
int mapQ_coef_fac;
|
||||||
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value
|
||||||
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end
|
||||||
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
|
#include <math.h>
|
||||||
#include "bwa.h"
|
#include "bwa.h"
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
|
|
@ -28,7 +29,7 @@ int main_mem(int argc, char *argv[])
|
||||||
void *ko = 0, *ko2 = 0;
|
void *ko = 0, *ko2 = 0;
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:")) >= 0) {
|
while ((c = getopt(argc, argv, "paMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:")) >= 0) {
|
||||||
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
if (c == 'k') opt->min_seed_len = atoi(optarg);
|
||||||
else if (c == 'w') opt->w = atoi(optarg);
|
else if (c == 'w') opt->w = atoi(optarg);
|
||||||
else if (c == 'A') opt->a = atoi(optarg);
|
else if (c == 'A') opt->a = atoi(optarg);
|
||||||
|
|
@ -48,7 +49,10 @@ int main_mem(int argc, char *argv[])
|
||||||
else if (c == 'v') bwa_verbose = atoi(optarg);
|
else if (c == 'v') bwa_verbose = atoi(optarg);
|
||||||
else if (c == 'r') opt->split_factor = atof(optarg);
|
else if (c == 'r') opt->split_factor = atof(optarg);
|
||||||
else if (c == 'C') copy_comment = 1;
|
else if (c == 'C') copy_comment = 1;
|
||||||
else if (c == 'L') {
|
else if (c == 'Q') {
|
||||||
|
opt->mapQ_coef_len = atoi(optarg);
|
||||||
|
opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0;
|
||||||
|
} else if (c == 'L') {
|
||||||
char *p;
|
char *p;
|
||||||
opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10);
|
opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10);
|
||||||
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
|
if (*p != 0 && ispunct(*p) && isdigit(p[1]))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue