r336: fine tuning pemerge

This commit is contained in:
Heng Li 2013-03-06 23:38:07 -05:00
parent 557d50c7e1
commit 72817b664e
3 changed files with 11 additions and 11 deletions

View File

@ -163,7 +163,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
bwa_cigar_t *cigar = 0;
uint32_t *cigar32 = 0;
ubyte_t *rseq;
int tle, qle, gtle, gscore, lscore;
int tle, qle, gtle, gscore;
int64_t k, rb, re, rlen;
int8_t mat[25];
@ -176,7 +176,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences
seq_reverse(rlen, rseq, 0);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len;
rb = re - tle; rlen = tle;
seq_reverse(len, seq, 0);
@ -192,7 +192,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rb = *_pos; re = rb + len + SW_BW;
if (re > l_pac) re = l_pac;
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len;
re = rb + tle; rlen = tle;
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension

2
main.c
View File

@ -3,7 +3,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.0-r335-beta"
#define PACKAGE_VERSION "0.7.0-r336-beta"
#endif
int bwa_fa2pac(int argc, char *argv[]);

View File

@ -18,7 +18,7 @@ static const char *err_msg[MAX_ERR+1] = {
"pairs where the best SW alignment is not an overlap (long right end)",
"pairs with large 2nd best SW score",
"pairs with gapped overlap",
"pairs where the end-to-end ungapped alignment score is not high enough",
"pairs where the end-to-end alignment is inconsistent with SW",
"pairs potentially with tandem overlaps",
"pairs with high sum of errors"
};
@ -81,18 +81,18 @@ int bwa_pemerge(const pem_opt_t *opt, mseq_t x[2], uint8_t **seq_, uint8_t **qua
int max_m, max_m2, min_l, max_l, max_l2;
max_m = max_m2 = 0; max_l = max_l2 = 0;
min_l = x[0].s.l < x[1].s.l? x[0].s.l : x[1].s.l;
for (l = 0; l < min_l; ++l) {
for (l = 1; l < min_l; ++l) {
int m = 0, o = x[0].s.l - l;
int a = opt->a, b = -opt->b;
for (i = 0; i < l; ++i)
m += (s[0][o + i] == s[1][i])? a : b;
uint8_t *s0o = &s[0][o], *s1 = s[1];
for (i = 0; i < l; ++i) // TODO: in principle, this can be done with SSE2. It is the bottleneck!
m += opt->mat[(s1[i]<<2) + s1[i] + s0o[i]]; // equivalent to s[1][i]*5 + s[0][o+i]
if (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l;
else if (m > max_m2) max_m2 = m, max_l2 = l;
}
if (max_m < opt->T) return -6;
if (max_m < opt->T || max_l != x[0].s.l - (r.tb - r.qb)) return -6;
if (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO)
return -7;
//printf("*** %d,%d; %d,%d\n", max_m, max_m2, max_l, max_l2);
if (max_l2 > max_l && (double)max_m2 / max_m >= MAX_SCORE_RATIO) return -7;
}
l = x[0].s.l - (r.tb - r.qb); // length to merge