From e0991d6a459daa27031df17405e0d62f0395a6f1 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 5 Mar 2013 00:34:33 -0500 Subject: [PATCH] r323: added Z-dropoff, a variant of blast's X-drop --- bwamem.c | 5 +++-- bwamem.h | 1 + fastmap.c | 6 +++++- ksw.c | 4 ++-- ksw.h | 2 +- main.c | 2 +- 6 files changed, 13 insertions(+), 7 deletions(-) diff --git a/bwamem.c b/bwamem.c index d751cfb..20d8bdb 100644 --- a/bwamem.c +++ b/bwamem.c @@ -44,6 +44,7 @@ mem_opt_t *mem_opt_init() o = calloc(1, sizeof(mem_opt_t)); o->flag = 0; o->a = 1; o->b = 4; o->q = 6; o->r = 1; o->w = 60; o->max_w = 500; + o->zdrop = 100; o->pen_unpaired = 9; o->pen_clip = 5; o->min_seed_len = 19; @@ -560,7 +561,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int rs = malloc(tmp); for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i]; for (aw[0] = opt->w;; aw[0] <<= 1) { - a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); + a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); if (bwa_verbose >= 4) printf("L\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[0], max_off[0]); fflush(stdout); if (a->score == tmps || aw[0]<<1 > opt->max_w || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; tmps = a->score; @@ -577,7 +578,7 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int re = s->rbeg + s->len - rmax[0]; assert(re >= 0); for (aw[1] = opt->w;; aw[1] <<= 1) { - a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], sc0, &qle, &tle, >le, &gscore, &max_off[1]); + a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); if (bwa_verbose >= 4) printf("R\t%d < %d; w=%d; max_off=%d\n", tmps, a->score, aw[1], max_off[1]); fflush(stdout); if (a->score == tmps || aw[1]<<1 > opt->max_w || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; tmps = a->score; diff --git a/bwamem.h b/bwamem.h index 4d632e7..7d6e821 100644 --- a/bwamem.h +++ b/bwamem.h @@ -23,6 +23,7 @@ typedef struct { int pen_clip; // clipping penalty. This score is not deducted from the DP score. int w; // band width int max_w; // max band width + int zdrop; // Z-dropoff int flag; // see MEM_F_* macros int min_seed_len; // minimum seed length diff --git a/fastmap.c b/fastmap.c index 56cfb01..eb93994 100644 --- a/fastmap.c +++ b/fastmap.c @@ -26,9 +26,10 @@ int main_mem(int argc, char *argv[]) void *ko = 0, *ko2 = 0; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:")) >= 0) { + while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:W:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 'w') opt->w = atoi(optarg); + else if (c == 'W') opt->max_w = atoi(optarg); else if (c == 'A') opt->a = atoi(optarg); else if (c == 'B') opt->b = atoi(optarg); else if (c == 'O') opt->q = atoi(optarg); @@ -42,6 +43,7 @@ int main_mem(int argc, char *argv[]) else if (c == 'p') opt->flag |= MEM_F_PE; else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; else if (c == 'c') opt->max_occ = atoi(optarg); + else if (c == 'd') opt->zdrop = atoi(optarg); else if (c == 'v') bwa_verbose = atoi(optarg); else if (c == 'r') opt->split_factor = atof(optarg); else if (c == 'C') copy_comment = 1; @@ -57,6 +59,8 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); + fprintf(stderr, " -W INT max band width [%d]\n", opt->max_w); + fprintf(stderr, " -d INT off-diagnal X-dropoff [%d]\n", opt->zdrop); fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); // fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); diff --git a/ksw.c b/ksw.c index 3747cb0..5666a8f 100644 --- a/ksw.c +++ b/ksw.c @@ -359,7 +359,7 @@ typedef struct { int32_t h, e; } eh_t; -int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) +int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off) { eh_t *eh; // score array int8_t *qp; // query profile @@ -426,7 +426,7 @@ int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, max_ie = gscore > h1? max_ie : i; gscore = gscore > h1? gscore : h1; } - if (m == 0) break; + if (m == 0 || max - m - abs((i - max_i) - (j - max_j)) * gape > zdrop) break; // drop to zero, or below Z-dropoff if (m > max) { max = m, max_i = i, max_j = mj; max_off = max_off > abs(mj - i)? max_off : abs(mj - i); diff --git a/ksw.h b/ksw.h index 6d1f7cf..2dd6499 100644 --- a/ksw.h +++ b/ksw.h @@ -102,7 +102,7 @@ extern "C" { * * @return best semi-local alignment score */ - int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); + int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off); #ifdef __cplusplus } diff --git a/main.c b/main.c index be31bf0..dd1a481 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.0-r320" +#define PACKAGE_VERSION "0.7.0-r323-beta" #endif static int usage()