diff --git a/bwamem.c b/bwamem.c index 2d326e2..88086ee 100644 --- a/bwamem.c +++ b/bwamem.c @@ -14,17 +14,6 @@ #include "kvec.h" #include "ksort.h" -void mem_fill_scmat(int a, int b, int8_t mat[25]) -{ - int i, j, k; - for (i = k = 0; i < 4; ++i) { - for (j = 0; j < 4; ++j) - mat[k++] = i == j? a : -b; - mat[k++] = 0; // ambiguous base - } - for (j = 0; j < 5; ++j) mat[k++] = 0; -} - /* Theory on probability and scoring *ungapped* alignment * * s'(a,b) = log[P(b|a)/P(b)] = log[4P(b|a)], assuming uniform base distribution @@ -64,12 +53,23 @@ mem_opt_t *mem_opt_init() o->split_factor = 1.5; o->chunk_size = 10000000; o->n_threads = 1; - o->pe_dir = 0<<1|1; o->pen_unpaired = 9; + o->max_matesw = 100; mem_fill_scmat(o->a, o->b, o->mat); return o; } +void mem_fill_scmat(int a, int b, int8_t mat[25]) +{ + int i, j, k; + for (i = k = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + mat[k++] = i == j? a : -b; + mat[k++] = 0; // ambiguous base + } + for (j = 0; j < 5; ++j) mat[k++] = 0; +} + /*************************** * SMEM iterator interface * ***************************/ diff --git a/bwamem.h b/bwamem.h index 6ab2b01..d99a9da 100644 --- a/bwamem.h +++ b/bwamem.h @@ -18,18 +18,22 @@ typedef struct __smem_i smem_i; #define MEM_F_NO_MULTI 0x16 typedef struct { - int a, b, q, r, w; - int flag; - int split_width; - int min_seed_len, max_occ, max_chain_gap; - int n_threads, chunk_size; - int pe_dir; - float mask_level; - float chain_drop_ratio; - float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor - int pen_unpaired; // phred-scaled penalty for unpaired reads - int max_ins; // maximum insert size - int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset + int a, b, q, r; // match score, mismatch penalty and gap open/extension penalty. A gap of size k costs q+k*r + int w; // band width + int flag; // see MEM_F_* macros + int min_seed_len; // minimum seed length + float split_factor; // split into a seed if MEM is longer than min_seed_len*split_factor + int split_width; // split into a seed if its occurence is smaller than this value + int max_occ; // skip a seed if its occurence is larger than this value + int max_chain_gap; // do not chain seed if it is max_chain_gap-bp away from the closest seed + int n_threads; // number of threads + int chunk_size; // process chunk_size-bp sequences in a batch + float mask_level; // regard a hit as redundant if the overlap with another better hit is over mask_level times the min length of the two hits + float chain_drop_ratio; // drop a chain if its seed coverage is below chain_drop_ratio times the seed coverage of a better chain overlapping with the small chain + int pen_unpaired; // phred-scaled penalty for unpaired reads + int max_ins; // when estimating insert size distribution, skip pairs with insert longer than this value + int max_matesw; // perform maximally max_matesw rounds of mate-SW for each end + int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset } mem_opt_t; typedef struct { diff --git a/bwamem_pair.c b/bwamem_pair.c index 51f51c9..3ef71ea 100644 --- a/bwamem_pair.c +++ b/bwamem_pair.c @@ -246,7 +246,7 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co if (a[i].a[j].score >= a[i].a[0].score - opt->pen_unpaired) kv_push(mem_alnreg_t, b[i], a[i].a[j]); for (i = 0; i < 2; ++i) - for (j = 0; j < b[i].n; ++j) + for (j = 0; j < b[i].n && j < opt->max_matesw; ++j) n += mem_matesw(opt, bns->l_pac, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]); free(b[0].a); free(b[1].a); mem_mark_primary_se(opt, a[0].n, a[0].a); diff --git a/fastmap.c b/fastmap.c index 77b3d75..b4f8ea8 100644 --- a/fastmap.c +++ b/fastmap.c @@ -26,8 +26,9 @@ int main_mem(int argc, char *argv[]) void *ko = 0, *ko2 = 0; opt = mem_opt_init(); - while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:")) >= 0) { + while ((c = getopt(argc, argv, "paMCPHk:c:v:s:r:t:R:A:B:O:E:w:")) >= 0) { if (c == 'k') opt->min_seed_len = atoi(optarg); + else if (c == 'w') opt->w = atoi(optarg); else if (c == 'A') opt->a = atoi(optarg); else if (c == 'B') opt->b = atoi(optarg); else if (c == 'O') opt->q = atoi(optarg); @@ -52,6 +53,7 @@ int main_mem(int argc, char *argv[]) fprintf(stderr, "Algorithm options:\n\n"); fprintf(stderr, " -t INT number of threads [%d]\n", opt->n_threads); fprintf(stderr, " -k INT minimum seed length [%d]\n", opt->min_seed_len); + fprintf(stderr, " -w INT band width for banded alignment [%d]\n", opt->w); fprintf(stderr, " -r FLOAT look for internal seeds inside a seed longer than {-k} * FLOAT [%g]\n", opt->split_factor); fprintf(stderr, " -s INT look for internal seeds inside a seed with less than INT occ [%d]\n", opt->split_width); fprintf(stderr, " -c INT skip seeds with more than INT occurrences [%d]\n", opt->max_occ); diff --git a/main.c b/main.c index 74980c9..9ef33fa 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.6.2-r277-beta" +#define PACKAGE_VERSION "0.6.2-r278-beta" #endif static int usage()