From 54da54ffd4aaf39f88c6051025561c7cf3d44b76 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Thu, 21 Feb 2013 12:52:00 -0500 Subject: [PATCH] extend more seeds (and thus slower...) --- bwamem.c | 4 +++- bwamem.h | 4 +++- fastmap.c | 4 +++- 3 files changed, 9 insertions(+), 3 deletions(-) diff --git a/bwamem.c b/bwamem.c index 8122f70..9bb5ad0 100644 --- a/bwamem.c +++ b/bwamem.c @@ -62,6 +62,7 @@ mem_opt_t *mem_opt_init() o->max_ins = 10000; o->mask_level = 0.50; o->chain_drop_ratio = 0.50; + o->split_factor = 1.5; o->chunk_size = 10000000; o->n_threads = 1; o->pe_dir = 0<<1|1; @@ -186,7 +187,8 @@ static int test_and_merge(const mem_opt_t *opt, mem_chain_t *c, const mem_seed_t static void mem_insert_seed(const mem_opt_t *opt, kbtree_t(chn) *tree, smem_i *itr) { const bwtintv_v *a; - while ((a = smem_next(itr, opt->min_seed_len<<1, opt->split_width)) != 0) { // to find all SMEM and some internal MEM + int split_len = (int)(opt->min_seed_len * opt->split_factor + .499); + while ((a = smem_next(itr, split_len, opt->split_width)) != 0) { // to find all SMEM and some internal MEM int i; for (i = 0; i < a->n; ++i) { // go through each SMEM/MEM up to itr->start bwtintv_t *p = &a->a[i]; diff --git a/bwamem.h b/bwamem.h index 43a5401..6b191ae 100644 --- a/bwamem.h +++ b/bwamem.h @@ -27,7 +27,9 @@ typedef struct { int min_seed_len, max_occ, max_chain_gap; int n_threads, chunk_size; int pe_dir; - float mask_level, chain_drop_ratio; + float mask_level; + float chain_drop_ratio; + float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor int pen_unpaired; // phred-scaled penalty for unpaired reads int max_ins; // maximum insert size int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset diff --git a/fastmap.c b/fastmap.c index a2d7d94..91a4ecb 100644 --- a/fastmap.c +++ b/fastmap.c @@ -24,12 +24,13 @@ int main_mem(int argc, char *argv[]) bseq1_t *seqs; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "PHk:c:v:s:")) >= 0) { + while ((c = getopt(argc, argv, "PHk:c:v:s:r:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); else if (c == 'P') opt->flag |= MEM_F_NOPAIRING; else if (c == 'H') opt->flag |= MEM_F_HARDCLIP; else if (c == 'c') opt->max_occ = atoi(optarg); else if (c == 'v') mem_verbose = atoi(optarg); + else if (c == 'r') opt->split_factor = atof(optarg); else if (c == 's') opt->split_width = atoi(optarg); } if (optind + 1 >= argc) { @@ -38,6 +39,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "Options: -k INT minimum seed length [%d]\n", opt->min_seed_len); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); + fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); fprintf(stderr, " -v INT verbose level [%d]\n", mem_verbose); fprintf(stderr, "\n"); free(opt);