From 3b84c03c1e01686525bcb29caf4e3d4f1b94c46e Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 28 Aug 2013 15:59:05 -0400 Subject: [PATCH] r406: allow to use diff clipping penalties for 5'-end or for 3'-end --- bwamem.c | 10 +++++----- bwamem.h | 2 +- fastmap.c | 12 +++++++++--- main.c | 2 +- 4 files changed, 16 insertions(+), 10 deletions(-) diff --git a/bwamem.c b/bwamem.c index 801ea7e..06fc8ac 100644 --- a/bwamem.c +++ b/bwamem.c @@ -51,7 +51,7 @@ mem_opt_t *mem_opt_init() o->T = 30; o->zdrop = 100; o->pen_unpaired = 17; - o->pen_clip = 5; + o->pen_clip5 = o->pen_clip3 = 5; o->min_seed_len = 19; o->split_width = 10; o->max_occ = 10000; @@ -572,12 +572,12 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int for (i = 0; i < MAX_BAND_TRY; ++i) { int prev = a->score; aw[0] = opt->w << i; - a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->pen_clip, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); + a->score = ksw_extend(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->q, opt->r, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]); if (bwa_verbose >= 4) { printf("L\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); } if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break; } // check whether we prefer to reach the end of the query - if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension + if (gscore <= 0 || gscore <= a->score - opt->pen_clip5) { // local extension a->qb = s->qbeg - qle, a->rb = s->rbeg - tle; a->truesc = a->score; } else { // to-end extension @@ -595,12 +595,12 @@ void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int for (i = 0; i < MAX_BAND_TRY; ++i) { int prev = a->score; aw[1] = opt->w << i; - a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->pen_clip, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); + a->score = ksw_extend(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->q, opt->r, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]); if (bwa_verbose >= 4) { printf("R\t%d < %d; w=%d; max_off=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); } if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break; } // similar to the above - if (gscore <= 0 || gscore <= a->score - opt->pen_clip) { // local extension + if (gscore <= 0 || gscore <= a->score - opt->pen_clip3) { // local extension a->qe = qe + qle, a->re = rmax[0] + re + tle; a->truesc += a->score - sc0; } else { // to-end extension diff --git a/bwamem.h b/bwamem.h index be1862a..a81c92b 100644 --- a/bwamem.h +++ b/bwamem.h @@ -20,7 +20,7 @@ typedef struct __smem_i smem_i; typedef struct { int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r int pen_unpaired; // phred-scaled penalty for unpaired reads - int pen_clip; // clipping penalty. This score is not deducted from the DP score. + int pen_clip5,pen_clip3;// clipping penalty. This score is not deducted from the DP score. int w; // band width int zdrop; // Z-dropoff diff --git a/fastmap.c b/fastmap.c index b00ec00..8b4ae3f 100644 --- a/fastmap.c +++ b/fastmap.c @@ -2,6 +2,7 @@ #include #include #include +#include #include "bwa.h" #include "bwamem.h" #include "kvec.h" @@ -35,7 +36,6 @@ int main_mem(int argc, char *argv[]) else if (c == 'O') opt->q = atoi(optarg); else if (c == 'E') opt->r = atoi(optarg); else if (c == 'T') opt->T = atoi(optarg); - else if (c == 'L') opt->pen_clip = atoi(optarg); else if (c == 'U') opt->pen_unpaired = atoi(optarg); else if (c == 't') opt->n_threads = atoi(optarg), opt->n_threads = opt->n_threads > 1? opt->n_threads : 1; else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; @@ -48,7 +48,13 @@ int main_mem(int argc, char *argv[]) else if (c == 'v') bwa_verbose = atoi(optarg); else if (c == 'r') opt->split_factor = atof(optarg); else if (c == 'C') copy_comment = 1; - else if (c == 'R') { + else if (c == 'L') { + char *p; + opt->pen_clip5 = opt->pen_clip3 = strtol(optarg, &p, 10); + if (*p != 0 && ispunct(*p) && isdigit(p[1])) + opt->pen_clip3 = strtol(p+1, &p, 10); + fprintf(stderr, "%d,%d\n", opt->pen_clip5, opt->pen_clip3); + } else if (c == 'R') { if ((rg_line = bwa_set_rg(optarg)) == 0) return 1; // FIXME: memory leak } else if (c == 's') opt->split_width = atoi(optarg); else return 1; @@ -71,7 +77,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " -B INT penalty for a mismatch [%d]\n", opt->b); fprintf(stderr, " -O INT gap open penalty [%d]\n", opt->q); fprintf(stderr, " -E INT gap extension penalty; a gap of size k cost {-O} + {-E}*k [%d]\n", opt->r); - fprintf(stderr, " -L INT penalty for clipping [%d]\n", opt->pen_clip); + fprintf(stderr, " -L INT penalty for clipping [%d]\n", opt->pen_clip5); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n", opt->pen_unpaired); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); diff --git a/main.c b/main.c index 662d54f..06d24bc 100644 --- a/main.c +++ b/main.c @@ -3,7 +3,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.5a-r405" +#define PACKAGE_VERSION "0.7.5a-r406" #endif int bwa_fa2pac(int argc, char *argv[]);