diff --git a/bwamem.c b/bwamem.c index 5ee153a..8d15f15 100644 --- a/bwamem.c +++ b/bwamem.c @@ -1036,7 +1036,13 @@ mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntse regs.n = mem_sort_and_dedup(regs.n, regs.a, opt->mask_level_redun); if (opt->flag & MEM_F_NO_EXACT) regs.n = mem_test_and_remove_exact(opt, regs.n, regs.a, l_seq); - if (bwa_verbose >= 4) err_printf("* %ld chains remain after removing duplicated chains\n", regs.n); + if (bwa_verbose >= 4) { + err_printf("* %ld chains remain after removing duplicated chains\n", regs.n); + for (i = 0; i < regs.n; ++i) { + mem_alnreg_t *p = ®s.a[i]; + printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re); + } + } return regs; } diff --git a/fastmap.c b/fastmap.c index c971f9b..8ffa07b 100644 --- a/fastmap.c +++ b/fastmap.c @@ -18,6 +18,22 @@ extern unsigned char nst_nt4_table[256]; void *kopen(const char *fn, int *_fd); int kclose(void *a); +static void update_a(mem_opt_t *opt, const mem_opt_t *opt0) +{ + if (opt0->a) { // matching score is changed + if (!opt0->b) opt->b *= opt->a; + if (!opt0->T) opt->T *= opt->a; + if (!opt0->o_del) opt->o_del *= opt->a; + if (!opt0->e_del) opt->e_del *= opt->a; + if (!opt0->o_ins) opt->o_ins *= opt->a; + if (!opt0->e_ins) opt->e_ins *= opt->a; + if (!opt0->zdrop) opt->zdrop *= opt->a; + if (!opt0->pen_clip5) opt->pen_clip5 *= opt->a; + if (!opt0->pen_clip3) opt->pen_clip3 *= opt->a; + if (!opt0->pen_unpaired) opt->pen_unpaired *= opt->a; + } +} + int main_mem(int argc, char *argv[]) { mem_opt_t *opt, opt0; @@ -27,6 +43,7 @@ int main_mem(int argc, char *argv[]) bseq1_t *seqs; bwaidx_t *idx; char *p, *rg_line = 0; + const char *mode = 0; void *ko = 0, *ko2 = 0; int64_t n_processed = 0; mem_pestat_t pes[4], *pes0 = 0; @@ -35,11 +52,11 @@ int main_mem(int argc, char *argv[]) for (i = 0; i < 4; ++i) pes[i].failed = 1; opt = mem_opt_init(); - opt0.a = opt0.b = opt0.o_del = opt0.e_del = opt0.o_ins = opt0.e_ins = opt0.pen_unpaired = -1; - opt0.pen_clip5 = opt0.pen_clip3 = opt0.zdrop = opt0.T = -1; - while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:")) >= 0) { - if (c == 'k') opt->min_seed_len = atoi(optarg); - else if (c == 'w') opt->w = atoi(optarg); + memset(&opt0, 0, sizeof(mem_opt_t)); + while ((c = getopt(argc, argv, "epaMCSPHk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:W:x:")) >= 0) { + if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1; + else if (c == 'x') mode = optarg; + else if (c == 'w') opt->w = atoi(optarg), opt0.w = 1; else if (c == 'A') opt->a = atoi(optarg), opt0.a = 1; else if (c == 'B') opt->b = atoi(optarg), opt0.b = 1; else if (c == 'T') opt->T = atoi(optarg), opt0.T = 1; @@ -51,17 +68,18 @@ int main_mem(int argc, char *argv[]) else if (c == 'M') opt->flag |= MEM_F_NO_MULTI; else if (c == 'S') opt->flag |= MEM_F_NO_RESCUE; else if (c == 'e') opt->flag |= MEM_F_NO_EXACT; - else if (c == 'c') opt->max_occ = atoi(optarg); + else if (c == 'c') opt->max_occ = atoi(optarg), opt0.max_occ = 1; else if (c == 'd') opt->zdrop = atoi(optarg), opt0.zdrop = 1; else if (c == 'v') bwa_verbose = atoi(optarg); - else if (c == 'r') opt->split_factor = atof(optarg); - else if (c == 'D') opt->chain_drop_ratio = atof(optarg); - else if (c == 'm') opt->max_matesw = atoi(optarg); - else if (c == 's') opt->split_width = atoi(optarg); - else if (c == 'N') opt->max_chain_gap = atoi(optarg); - else if (c == 'W') opt->min_chain_weight = atoi(optarg); + else if (c == 'r') opt->split_factor = atof(optarg), opt0.split_factor = 1.; + else if (c == 'D') opt->chain_drop_ratio = atof(optarg), opt0.chain_drop_ratio = 1.; + else if (c == 'm') opt->max_matesw = atoi(optarg), opt0.max_matesw = 1; + else if (c == 's') opt->split_width = atoi(optarg), opt0.split_width = 1; + else if (c == 'N') opt->max_chain_gap = atoi(optarg), opt0.max_chain_gap = 1; + else if (c == 'W') opt->min_chain_weight = atoi(optarg), opt0.min_chain_weight = 1; else if (c == 'C') copy_comment = 1; else if (c == 'Q') { + opt0.mapQ_coef_len = 1; opt->mapQ_coef_len = atoi(optarg); opt->mapQ_coef_fac = opt->mapQ_coef_len > 0? log(opt->mapQ_coef_len) : 0; } else if (c == 'O') { @@ -102,7 +120,6 @@ int main_mem(int argc, char *argv[]) else return 1; } if (opt->n_threads < 1) opt->n_threads = 1; -// if (opt->T < opt->min_HSP_score) opt->T = opt->min_HSP_score; // TODO: tie ->T to MEM_HSP_COEF if (optind + 1 >= argc || optind + 3 < argc) { fprintf(stderr, "\n"); fprintf(stderr, "Usage: bwa mem [options] [in2.fq]\n\n"); @@ -115,16 +132,19 @@ int main_mem(int argc, char *argv[]) // fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); fprintf(stderr, " -D FLOAT drop chains shorter than FLOAT fraction of the longest overlapping chain [%.2f]\n", opt->chain_drop_ratio); + fprintf(stderr, " -W INT discard a chain if seeded bases shorter than INT [0]\n"); fprintf(stderr, " -m INT perform at most INT rounds of mate rescues for each read [%d]\n", opt->max_matesw); fprintf(stderr, " -S skip mate rescue\n"); fprintf(stderr, " -P skip pairing; mate rescue performed unless -S also in use\n"); fprintf(stderr, " -e discard full-length exact matches\n"); - fprintf(stderr, " -A INT score for a sequence match, which scales [-TdBOELU] unless overridden [%d]\n", opt->a); + fprintf(stderr, " -A INT score for a sequence match, which scales options -TdBOELU unless overridden [%d]\n", opt->a); fprintf(stderr, " -B INT penalty for a mismatch [%d]\n", opt->b); fprintf(stderr, " -O INT[,INT] gap open penalties for deletions and insertions [%d,%d]\n", opt->o_del, opt->o_ins); fprintf(stderr, " -E INT[,INT] gap extension penalty; a gap of size k cost '{-O} + {-E}*k' [%d,%d]\n", opt->e_del, opt->e_ins); fprintf(stderr, " -L INT[,INT] penalty for 5'- and 3'-end clipping [%d,%d]\n", opt->pen_clip5, opt->pen_clip3); fprintf(stderr, " -U INT penalty for an unpaired read pair [%d]\n", opt->pen_unpaired); + fprintf(stderr, " -x STR read type. Setting -x changes multiple parameters unless overriden [null]\n"); + fprintf(stderr, " pacbio: -k17 -W40 -w200 -c1000 -r10 -A2 -B7 -O2 -E1 -L0\n"); fprintf(stderr, "\nInput/output options:\n\n"); fprintf(stderr, " -p first query file consists of interleaved paired-end sequences\n"); fprintf(stderr, " -R STR read group header line such as '@RG\\tID:foo\\tSM:bar' [null]\n"); @@ -140,25 +160,35 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, " FR orientation only. [inferred]\n"); fprintf(stderr, "\n"); fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n"); - fprintf(stderr, " `-k17 -W40 -w200 -c1000 -r10 -A2 -O2 -E1' is recommended for PacBio reads as of early 2014.\n"); fprintf(stderr, "\n"); free(opt); return 1; } - if (opt0.a == 1) { // matching score is changed - if (opt0.b != 1) opt->b *= opt->a; - if (opt0.T != 1) opt->T *= opt->a; - if (opt0.o_del != 1) opt->o_del *= opt->a; - if (opt0.e_del != 1) opt->e_del *= opt->a; - if (opt0.o_ins != 1) opt->o_ins *= opt->a; - if (opt0.e_ins != 1) opt->e_ins *= opt->a; - if (opt0.zdrop != 1) opt->zdrop *= opt->a; - if (opt0.pen_clip5 != 1) opt->pen_clip5 *= opt->a; - if (opt0.pen_clip3 != 1) opt->pen_clip3 *= opt->a; - if (opt0.pen_unpaired != 1) opt->pen_unpaired *= opt->a; - } + if (mode) { + if (strcmp(mode, "pacbio") == 0) { + if (!opt0.a) opt->a = 2, opt0.a = 1; + update_a(opt, &opt0); + if (!opt0.b) opt->b = 7; + if (!opt0.o_del) opt->o_del = 2; + if (!opt0.e_del) opt->e_del = 1; + if (!opt0.o_ins) opt->o_ins = 2; + if (!opt0.e_ins) opt->e_ins = 1; + if (!opt0.w) opt->w = 200; + if (!opt0.min_seed_len) opt->min_seed_len = 17; + if (!opt0.min_chain_weight) opt->min_chain_weight = 40; + if (!opt0.max_occ) opt->max_occ = 1000; + if (!opt0.pen_clip5) opt->pen_clip5 = 0; + if (!opt0.pen_clip3) opt->pen_clip3 = 0; + if (opt0.split_factor == 0.) opt->split_factor = 10.; + } else { + fprintf(stderr, "[E::%s] unknown read type '%s'\n", __func__, mode); + return 1; // FIXME memory leak + } + } else update_a(opt, &opt0); +// if (opt->T < opt->min_HSP_score) opt->T = opt->min_HSP_score; // TODO: tie ->T to MEM_HSP_COEF bwa_fill_scmat(opt->a, opt->b, opt->mat); + if ((idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak ko = kopen(argv[optind + 1], &fd); diff --git a/main.c b/main.c index 1089477..dd06b42 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.8+dev-r462" +#define PACKAGE_VERSION "0.7.8+dev-r463" #endif int bwa_fa2pac(int argc, char *argv[]);