r336: fine tuning pemerge

This commit is contained in:
Heng Li 2013-03-06 23:38:07 -05:00
parent 557d50c7e1
commit 72817b664e
3 changed files with 11 additions and 11 deletions

View File

@ -163,7 +163,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
bwa_cigar_t *cigar = 0; bwa_cigar_t *cigar = 0;
uint32_t *cigar32 = 0; uint32_t *cigar32 = 0;
ubyte_t *rseq; ubyte_t *rseq;
int tle, qle, gtle, gscore, lscore; int tle, qle, gtle, gscore;
int64_t k, rb, re, rlen; int64_t k, rb, re, rlen;
int8_t mat[25]; int8_t mat[25];
@ -176,7 +176,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences seq_reverse(len, seq, 0); // as we need to do left extension, we have to reverse both query and reference sequences
seq_reverse(rlen, rseq, 0); seq_reverse(rlen, rseq, 0);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0); ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len; if (gscore > 0) tle = gtle, qle = len;
rb = re - tle; rlen = tle; rb = re - tle; rlen = tle;
seq_reverse(len, seq, 0); seq_reverse(len, seq, 0);
@ -192,7 +192,7 @@ bwa_cigar_t *bwa_refine_gapped_core(bwtint_t l_pac, const ubyte_t *pacseq, int l
rb = *_pos; re = rb + len + SW_BW; rb = *_pos; re = rb + len + SW_BW;
if (re > l_pac) re = l_pac; if (re > l_pac) re = l_pac;
rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen); rseq = bns_get_seq(l_pac, pacseq, rb, re, &rlen);
lscore = ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0); ksw_extend(len, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, -1, len<<1, &qle, &tle, &gtle, &gscore, 0);
if (gscore > 0) tle = gtle, qle = len; if (gscore > 0) tle = gtle, qle = len;
re = rb + tle; rlen = tle; re = rb + tle; rlen = tle;
ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension ksw_global(qle, seq, rlen, rseq, 5, mat, 5, 1, SW_BW, n_cigar, &cigar32); // right extension

2
main.c
View File

@ -3,7 +3,7 @@
#include "utils.h" #include "utils.h"
#ifndef PACKAGE_VERSION #ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.0-r335-beta" #define PACKAGE_VERSION "0.7.0-r336-beta"
#endif #endif
int bwa_fa2pac(int argc, char *argv[]); int bwa_fa2pac(int argc, char *argv[]);

View File

@ -18,7 +18,7 @@ static const char *err_msg[MAX_ERR+1] = {
"pairs where the best SW alignment is not an overlap (long right end)", "pairs where the best SW alignment is not an overlap (long right end)",
"pairs with large 2nd best SW score", "pairs with large 2nd best SW score",
"pairs with gapped overlap", "pairs with gapped overlap",
"pairs where the end-to-end ungapped alignment score is not high enough", "pairs where the end-to-end alignment is inconsistent with SW",
"pairs potentially with tandem overlaps", "pairs potentially with tandem overlaps",
"pairs with high sum of errors" "pairs with high sum of errors"
}; };
@ -81,18 +81,18 @@ int bwa_pemerge(const pem_opt_t *opt, mseq_t x[2], uint8_t **seq_, uint8_t **qua
int max_m, max_m2, min_l, max_l, max_l2; int max_m, max_m2, min_l, max_l, max_l2;
max_m = max_m2 = 0; max_l = max_l2 = 0; max_m = max_m2 = 0; max_l = max_l2 = 0;
min_l = x[0].s.l < x[1].s.l? x[0].s.l : x[1].s.l; min_l = x[0].s.l < x[1].s.l? x[0].s.l : x[1].s.l;
for (l = 0; l < min_l; ++l) { for (l = 1; l < min_l; ++l) {
int m = 0, o = x[0].s.l - l; int m = 0, o = x[0].s.l - l;
int a = opt->a, b = -opt->b; uint8_t *s0o = &s[0][o], *s1 = s[1];
for (i = 0; i < l; ++i) for (i = 0; i < l; ++i) // TODO: in principle, this can be done with SSE2. It is the bottleneck!
m += (s[0][o + i] == s[1][i])? a : b; m += opt->mat[(s1[i]<<2) + s1[i] + s0o[i]]; // equivalent to s[1][i]*5 + s[0][o+i]
if (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l; if (m > max_m) max_m2 = max_m, max_m = m, max_l2 = max_l, max_l = l;
else if (m > max_m2) max_m2 = m, max_l2 = l; else if (m > max_m2) max_m2 = m, max_l2 = l;
} }
if (max_m < opt->T) return -6; if (max_m < opt->T || max_l != x[0].s.l - (r.tb - r.qb)) return -6;
if (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO) if (max_l2 < max_l && max_m2 >= opt->T && (double)(max_m2 + (max_l - max_l2) * opt->a) / max_m >= MAX_SCORE_RATIO)
return -7; return -7;
//printf("*** %d,%d; %d,%d\n", max_m, max_m2, max_l, max_l2); if (max_l2 > max_l && (double)max_m2 / max_m >= MAX_SCORE_RATIO) return -7;
} }
l = x[0].s.l - (r.tb - r.qb); // length to merge l = x[0].s.l - (r.tb - r.qb); // length to merge