r279: for PE mapq, consider the number of pairs
If there are a lot of proper pairs, it is more likely that the best pair is wrong.
This commit is contained in:
parent
9957e04590
commit
20aa848b3c
2
bwamem.c
2
bwamem.c
|
|
@ -604,7 +604,7 @@ int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a)
|
|||
mapq = a->score? (int)(MEM_MAPQ_COEF * (1. - (double)sub / a->score) * log(a->seedcov) + .499) : 0;
|
||||
identity = 1. - (double)(l * opt->a - a->score) / (opt->a + opt->b) / l;
|
||||
mapq = identity < 0.95? (int)(mapq * identity * identity + .499) : mapq;
|
||||
if (a->sub_n) mapq -= (int)(4.343 * log(a->sub_n) + .499);
|
||||
if (a->sub_n > 0) mapq -= (int)(4.343 * log(a->sub_n+1) + .499);
|
||||
if (mapq > 60) mapq = 60;
|
||||
if (mapq < 0) mapq = 0;
|
||||
return mapq;
|
||||
|
|
|
|||
2
bwamem.h
2
bwamem.h
|
|
@ -5,7 +5,7 @@
|
|||
#include "bntseq.h"
|
||||
#include "bwa.h"
|
||||
|
||||
#define MEM_MAPQ_COEF 40.0
|
||||
#define MEM_MAPQ_COEF 30.0
|
||||
#define MEM_MAPQ_MAX 60
|
||||
|
||||
struct __smem_i;
|
||||
|
|
|
|||
|
|
@ -167,13 +167,12 @@ int mem_matesw(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const me
|
|||
return n;
|
||||
}
|
||||
|
||||
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int z[2])
|
||||
int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_pestat_t pes[4], bseq1_t s[2], mem_alnreg_v a[2], int id, int *sub, int *n_sub, int z[2])
|
||||
{
|
||||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
||||
pair64_v v;
|
||||
pair64_t o, subo; // .x: score<<32 | raw_score<<8 | hash; .y: pair
|
||||
int r, i, k, y[4]; // y[] keeps the last hit
|
||||
kv_init(v);
|
||||
pair64_v v, u;
|
||||
int r, i, k, y[4], ret; // y[] keeps the last hit
|
||||
kv_init(v); kv_init(u);
|
||||
for (r = 0; r < 2; ++r) { // loop through read number
|
||||
for (i = 0; i < a[r].n; ++i) {
|
||||
pair64_t key;
|
||||
|
|
@ -185,7 +184,6 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
}
|
||||
ks_introsort_128(v.n, v.a);
|
||||
y[0] = y[1] = y[2] = y[3] = -1;
|
||||
o.x = subo.x = o.y = subo.y = 0;
|
||||
//for (i = 0; i < v.n; ++i) printf("[%d]\t%d\t%c%ld\n", i, (int)(v.a[i].y&1)+1, "+-"[v.a[i].y>>1&1], (long)v.a[i].x);
|
||||
for (i = 0; i < v.n; ++i) {
|
||||
for (r = 0; r < 2; ++r) { // loop through direction
|
||||
|
|
@ -197,7 +195,7 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
int64_t dist;
|
||||
int q;
|
||||
double ns;
|
||||
uint64_t x, pair;
|
||||
pair64_t *p;
|
||||
if ((v.a[k].y&3) != which) continue;
|
||||
dist = (int64_t)v.a[i].x - v.a[k].x;
|
||||
//printf("%d: %lld\n", k, dist);
|
||||
|
|
@ -206,23 +204,27 @@ int mem_pair(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, const mem_
|
|||
ns = (dist - pes[dir].avg) / pes[dir].std;
|
||||
q = (int)((v.a[i].y>>32) + (v.a[k].y>>32) + .721 * log(2. * erfc(fabs(ns) * M_SQRT1_2)) + .499); // .721 = 1/log(4)
|
||||
if (q < 0) q = 0;
|
||||
pair = (uint64_t)k<<32 | i;
|
||||
x = (uint64_t)q<<32 | (hash_64(pair ^ id<<8) & 0xffffffffU);
|
||||
p = kv_pushp(pair64_t, u);
|
||||
p->y = (uint64_t)k<<32 | i;
|
||||
p->x = (uint64_t)q<<32 | (hash_64(p->y ^ id<<8) & 0xffffffffU);
|
||||
//printf("[%lld,%lld]\t%d\tdist=%ld\n", v.a[k].x, v.a[i].x, q, (long)dist);
|
||||
if (x > o.x) subo = o, o.x = x, o.y = pair;
|
||||
else if (x > subo.x) subo.x = x, subo.y = pair;
|
||||
}
|
||||
}
|
||||
y[v.a[i].y&3] = i;
|
||||
}
|
||||
if (o.x > 0) {
|
||||
i = o.y >> 32; k = o.y << 32 >> 32;
|
||||
z[v.a[i].y&1] = v.a[i].y<<32>>34;
|
||||
if (u.n) { // found at least one proper pair
|
||||
int tmp = opt->a + opt->b > opt->q + opt->r? opt->a + opt->b : opt->q + opt->r;
|
||||
ks_introsort_128(u.n, u.a);
|
||||
i = u.a[u.n-1].y >> 32; k = u.a[u.n-1].y << 32 >> 32;
|
||||
z[v.a[i].y&1] = v.a[i].y<<32>>34; // index of the best pair
|
||||
z[v.a[k].y&1] = v.a[k].y<<32>>34;
|
||||
}
|
||||
free(v.a);
|
||||
*sub = subo.x>>32;
|
||||
return o.x>>32;
|
||||
ret = u.a[u.n-1].x >> 32;
|
||||
*sub = u.n > 1? u.a[u.n-2].x>>32 : 0;
|
||||
for (i = (long)u.n - 2, *n_sub = 0; i >= 0; --i)
|
||||
if (*sub - (int)(u.a[i].x>>32) <= tmp) ++*n_sub;
|
||||
} else ret = 0, *sub = 0, *n_sub = 0;
|
||||
free(u.a); free(v.a);
|
||||
return ret;
|
||||
}
|
||||
|
||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||
|
|
@ -233,7 +235,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
extern void mem_alnreg2hit(const mem_alnreg_t *a, bwahit_t *h);
|
||||
extern void bwa_hit2sam(kstring_t *str, const int8_t mat[25], int q, int r, int w, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, const bwahit_t *p, int is_hard, const bwahit_t *m);
|
||||
|
||||
int n = 0, i, j, z[2], o, subo;
|
||||
int n = 0, i, j, z[2], o, subo, n_sub;
|
||||
kstring_t str;
|
||||
mem_alnreg_v b[2];
|
||||
bwahit_t h[2];
|
||||
|
|
@ -253,7 +255,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
mem_mark_primary_se(opt, a[1].n, a[1].a);
|
||||
if (opt->flag&MEM_F_NOPAIRING) goto no_pairing;
|
||||
// pairing single-end hits
|
||||
if (a[0].n && a[1].n && (o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, z)) > 0) {
|
||||
if (a[0].n && a[1].n && (o = mem_pair(opt, bns->l_pac, pac, pes, s, a, id, &subo, &n_sub, z)) > 0) {
|
||||
int is_multi[2], q_pe, extra_flag = 1, score_un, q_se[2];
|
||||
// check if an end has multiple hits even after mate-SW
|
||||
for (i = 0; i < 2; ++i) {
|
||||
|
|
@ -267,6 +269,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
//q_pe = o && subo < o? (int)(MEM_MAPQ_COEF * (1. - (double)subo / o) * log(a[0].a[z[0]].seedcov + a[1].a[z[1]].seedcov) + .499) : 0;
|
||||
subo = subo > score_un? subo : score_un;
|
||||
q_pe = (o - subo) * 6;
|
||||
if (n_sub > 0) q_pe -= (int)(4.343 * log(n_sub+1) + .499);
|
||||
if (q_pe < 0) q_pe = 0;
|
||||
if (q_pe > 60) q_pe = 60;
|
||||
// the following assumes no split hits
|
||||
if (o > score_un) { // paired alignment is preferred
|
||||
|
|
|
|||
Loading…
Reference in New Issue