diff --git a/bwtsw2.h b/bwtsw2.h index 0718826..c13ef9c 100644 --- a/bwtsw2.h +++ b/bwtsw2.h @@ -6,9 +6,10 @@ #include "bwt_lite.h" #include "bwt.h" -#define BSW2_FLAG_MATESW 0x100 -#define BSW2_FLAG_TANDEM 0x200 -#define BSW2_FLAG_MOVED 0x400 +#define BSW2_FLAG_MATESW 0x100 +#define BSW2_FLAG_TANDEM 0x200 +#define BSW2_FLAG_MOVED 0x400 +#define BSW2_FLAG_RESCUED 0x800 typedef struct { int a, b, q, r, t, qr, bw; diff --git a/bwtsw2_aux.c b/bwtsw2_aux.c index eff6cd1..6dc4f36 100644 --- a/bwtsw2_aux.c +++ b/bwtsw2_aux.c @@ -452,17 +452,15 @@ static void update_mate_aux(bwtsw2_t *b, const bwtsw2_t *m) // update mapping quality if (b->n == 1 && m->n == 1) { bsw2hit_t *p = &b->hits[0]; - int isize; - if (p->flag & BSW2_FLAG_MATESW) { // this alignment is rescued by Smith-Waterman - if (!(p->flag & BSW2_FLAG_TANDEM) && b->aux[0].pqual < m->aux[0].qual) - b->aux[0].pqual = m->aux[0].qual; - } else if (p->flag&2) { // properly paired - if (!(p->flag & BSW2_FLAG_TANDEM)) { // not around a tandem repeat - if (b->aux[0].pqual < m->aux[0].qual) { - b->aux[0].pqual += 20; - if (b->aux[0].pqual >= m->aux[0].qual) - b->aux[0].pqual = m->aux[0].qual; - } + if (p->flag & BSW2_FLAG_MATESW) { // this alignment is found by Smith-Waterman + if (!(p->flag & BSW2_FLAG_TANDEM) && b->aux[0].pqual < 20) + b->aux[0].pqual = 20; + if (b->aux[0].pqual >= m->aux[0].qual) b->aux[0].pqual = m->aux[0].qual; + } else if ((p->flag & 2) && !(m->hits[0].flag & BSW2_FLAG_MATESW)) { // properly paired + if (!(p->flag & BSW2_FLAG_TANDEM)) { // pqual is bounded by [b->aux[0].qual,m->aux[0].qual] + b->aux[0].pqual += 20; + if (b->aux[0].pqual > m->aux[0].qual) b->aux[0].pqual = m->aux[0].qual; + if (b->aux[0].pqual < b->aux[0].qual) b->aux[0].pqual = b->aux[0].qual; } } } diff --git a/bwtsw2_pair.c b/bwtsw2_pair.c index ac149a4..5a83199 100644 --- a/bwtsw2_pair.c +++ b/bwtsw2_pair.c @@ -85,7 +85,7 @@ void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const b ksw_query_t *q; ksw_aux_t aux[2]; // compute the region start and end - a->n_seeds = 1; a->l = 0; + a->n_seeds = 1; a->flag |= BSW2_FLAG_MATESW; // before calling this routine, *a has been cleared with memset(0); the flag is set with 1<<6/7 if (h->is_rev == 0) { beg = (int64_t)(h->k + st->avg - EXT_STDDEV * st->std - l_mseq + .499); end = (int64_t)(h->k + st->avg + EXT_STDDEV * st->std + .499); @@ -117,7 +117,6 @@ void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const b ksw_sse2(q, end - beg, ref, &aux[0]); free(q); if (aux[0].score < opt->t) { - aux[0].score = 0; free(seq); return; } @@ -132,6 +131,8 @@ void bsw2_pair1(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, const b // write output a->G = aux[0].score; a->G2 = aux[0].score2 > aux[1].score2? aux[0].score2 : aux[1].score2; + if (a->G2 < opt->t) a->G2 = 0; + if (a->G2) a->flag |= BSW2_FLAG_TANDEM; a->k = beg + (aux[0].te - aux[1].te); a->len = aux[1].te; a->beg = aux[0].qe - aux[1].qe; @@ -174,8 +175,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b if (hits[i]->n == 1) p[0] = hits[i], p[1] = hits[i+1], which = 1; else p[0] = hits[i+1], p[1] = hits[i], which = 0; if (a[which].G == 0) continue; - a[which].flag |= BSW2_FLAG_MATESW; - if (a[which].G2) a[which].flag |= BSW2_FLAG_TANDEM; + a[which].flag |= BSW2_FLAG_RESCUED; if (p[1]->max == 0) { p[1]->max = 1; p[1]->hits = malloc(sizeof(bsw2hit_t)); @@ -186,30 +186,31 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b p[1]->hits[0].flag |= 2; ++n_rescued; } else { // then both ends mapped - int ori_G2[2]; + int is_fixed = 0; //fprintf(stderr, "%d; %lld,%lld; %d,%d\n", a[0].is_rev, hits[i]->hits[0].k, a[0].k, hits[i]->hits[0].end, a[0].end); - ori_G2[0] = a[0].G2; ori_G2[1] = a[1].G2; - for (j = 0; j < 2; ++j) { // first fix wrong mappings - if (hits[i+j]->hits[0].G < a[j].G) { // the orginal mapping is suboptimal - a[j].G2 = a[j].G2 > hits[i+j]->hits[0].G? a[j].G2 : hits[i+j]->hits[0].G; - hits[i+j]->hits[0] = a[j]; + for (j = 0; j < 2; ++j) { // fix wrong mappings and wrong suboptimal alignment score + bsw2hit_t *p = &hits[i+j]->hits[0]; + if (p->G < a[j].G) { // the orginal mapping is suboptimal + a[j].G2 = a[j].G2 > p->G? a[j].G2 : p->G; // FIXME: reset BSW2_FLAG_TANDEM? + *p = a[j]; ++n_fixed; + is_fixed = 1; + } else if (p->k != a[j].k && p->G2 < a[j].G) { + p->G2 = a[j].G; + } else if (p->k == a[j].k && p->G2 < a[j].G2) { + p->G2 = a[j].G2; } } if (hits[i]->hits[0].k == a[0].k && hits[i+1]->hits[0].k == a[1].k) { // properly paired and no ends need to be moved - for (j = 0; j < 2; ++j) { - if (hits[i+j]->hits[0].G2 < a[j].G2) - hits[i+j]->hits[0].G2 = a[j].G2; - if (ori_G2[j]) hits[i+j]->hits[0].flag |= BSW2_FLAG_TANDEM; - hits[i+j]->hits[0].flag |= 2; - } + for (j = 0; j < 2; ++j) + hits[i+j]->hits[0].flag |= 2 | (a[j].flag & BSW2_FLAG_TANDEM); } else if (hits[i]->hits[0].k == a[0].k || hits[i+1]->hits[0].k == a[1].k) { // a tandem match for (j = 0; j < 2; ++j) { hits[i+j]->hits[0].flag |= 2; if (hits[i+j]->hits[0].k != a[j].k) hits[i+j]->hits[0].flag |= BSW2_FLAG_TANDEM; } - } else if (a[0].G || a[1].G) { // it is possible to move one end + } else if (!is_fixed && (a[0].G || a[1].G)) { // it is possible to move one end if (a[0].G && a[1].G) { // now we have two "proper pairs" int G[2]; double diff; @@ -219,7 +220,7 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b if (diff > 0.05) a[G[0] > G[1]? 0 : 1].G = 0; } if (a[0].G == 0 || a[1].G == 0) { // one proper pair only - bsw2hit_t *p[2]; + bsw2hit_t *p[2]; // p[0] points the unchanged hit; p[1] to the hit to be moved int which, isize; double dev, diff; if (a[0].G) p[0] = &hits[i+1]->hits[0], p[1] = &hits[i]->hits[0], which = 0; @@ -227,16 +228,17 @@ void bsw2_pair(const bsw2opt_t *opt, int64_t l_pac, const uint8_t *pac, int n, b isize = p[0]->is_rev? p[0]->k + p[0]->len - a[which].k : a[which].k + a[which].len - p[0]->k; dev = fabs(isize - pes.avg) / pes.std; diff = (double)(p[1]->G - a[which].G) / (opt->a + opt->b) / (p[1]->end - p[1]->beg) * 100.0; - if (diff < dev * 2.) { // then move - int tflag = 0; - if (ori_G2[which]) tflag = BSW2_FLAG_TANDEM; + if (diff < dev * 2.) { // then move (heuristic) a[which].G2 = a[which].G; p[1][0] = a[which]; - p[1]->flag |= BSW2_FLAG_MOVED | 2 | tflag; + p[1]->flag |= BSW2_FLAG_MOVED | 2; p[0]->flag |= 2; ++n_moved; } - } // else, do nothing + } + } else if (is_fixed) { + hits[i+0]->hits[0].flag |= 2; + hits[i+1]->hits[0].flag |= 2; } } } diff --git a/main.c b/main.c index 7130310..edebe0b 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.6.0-r78-dev" +#define PACKAGE_VERSION "0.6.0-r79-dev" #endif static int usage()