diff --git a/align.c b/align.c index b56bde2..5d6200d 100644 --- a/align.c +++ b/align.c @@ -6,18 +6,19 @@ #include "mmpriv.h" #include "ksw2.h" -static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b) +static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b, int8_t sc_ambi) { int i, j; a = a < 0? -a : a; b = b > 0? -b : b; + sc_ambi = sc_ambi > 0? -sc_ambi : sc_ambi; for (i = 0; i < m - 1; ++i) { for (j = 0; j < m - 1; ++j) mat[i * m + j] = i == j? a : b; - mat[i * m + m - 1] = 0; + mat[i * m + m - 1] = sc_ambi; } for (j = 0; j < m; ++j) - mat[(m - 1) * m + j] = 0; + mat[(m - 1) * m + j] = sc_ambi; } static inline void mm_seq_rev(uint32_t len, uint8_t *seq) @@ -434,7 +435,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int r2->cnt = 0; if (r->cnt == 0) return; - ksw_gen_simple_mat(5, mat, opt->a, opt->b); + ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi); bw = (int)(opt->bw * 1.5 + 1.); if (is_sr && !(mi->flag & MM_I_HPC)) { @@ -652,7 +653,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i if (ql < opt->min_chain_score || ql > opt->max_gap) return 0; if (tl < opt->min_chain_score || tl > opt->max_gap) return 0; - ksw_gen_simple_mat(5, mat, opt->a, opt->b); + ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi); tseq = (uint8_t*)kmalloc(km, tl); mm_idx_getseq(mi, r1->rid, r1->re, r2->rs, tseq); qseq = r1->rev? &qseq0[0][r2->qe] : &qseq0[1][qlen - r2->qs]; diff --git a/ksw2_extd2_sse.c b/ksw2_extd2_sse.c index 628f37f..a993a2d 100644 --- a/ksw2_extd2_sse.c +++ b/ksw2_extd2_sse.c @@ -76,7 +76,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin qe2_ = _mm_set1_epi8(q2 + e2); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); - sc_N_ = _mm_set1_epi8(-e2); + sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e2) : _mm_set1_epi8(mat[m*m-1]); m1_ = _mm_set1_epi8(m - 1); // wildcard if (w < 0) w = tlen > qlen? tlen : qlen; diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index d8178e1..f090af6 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -71,7 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin qe_ = _mm_set1_epi8(q + e); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); - sc_N_ = _mm_set1_epi8(-e); + sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]); m1_ = _mm_set1_epi8(m - 1); // wildcard tlen_ = (tlen + 15) / 16; diff --git a/ksw2_extz2_sse.c b/ksw2_extz2_sse.c index c0671a1..88bf636 100644 --- a/ksw2_extz2_sse.c +++ b/ksw2_extz2_sse.c @@ -65,7 +65,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin flag16_ = _mm_set1_epi8(0x10); sc_mch_ = _mm_set1_epi8(mat[0]); sc_mis_ = _mm_set1_epi8(mat[1]); - sc_N_ = _mm_set1_epi8(-e); + sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]); m1_ = _mm_set1_epi8(m - 1); // wildcard max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2); diff --git a/main.c b/main.c index 4dc7205..719ecd4 100644 --- a/main.c +++ b/main.c @@ -10,7 +10,7 @@ #include "getopt.h" #endif -#define MM_VERSION "2.10-r768-dirty" +#define MM_VERSION "2.10-r770-dirty" #ifdef __linux__ #include @@ -58,6 +58,7 @@ static struct option long_options[] = { { "min-occ-floor", required_argument, 0, 0 }, // 28 { "MD", no_argument, 0, 0 }, // 29 { "lj-min-ratio", required_argument, 0, 0 }, // 30 + { "score-N", required_argument, 0, 0 }, // 31 { "help", no_argument, 0, 'h' }, { "max-intron-len", required_argument, 0, 'G' }, { "version", no_argument, 0, 'V' }, @@ -175,6 +176,7 @@ int main(int argc, char *argv[]) else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio + else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N else if (c == 0 && long_idx == 14) { // --frag yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1); } else if (c == 0 && long_idx == 15) { // --secondary diff --git a/minimap.h b/minimap.h index d87c891..65f49c3 100644 --- a/minimap.h +++ b/minimap.h @@ -118,6 +118,7 @@ typedef struct { float min_join_flank_ratio; int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties + int sc_ambi; // score when one or both bases are "N" int noncan; // cost of non-canonical splicing sites int zdrop, zdrop_inv; // break alignment if alignment score drops too fast along the diagonal int end_bonus; diff --git a/minimap2.1 b/minimap2.1 index 0208ea9..5856d25 100644 --- a/minimap2.1 +++ b/minimap2.1 @@ -1,4 +1,4 @@ -.TH minimap2 1 "27 March 2018" "minimap2-2.10 (r761)" "Bioinformatics tools" +.TH minimap2 1 "30 April 2018" "minimap2-2.10-dirty (r770)" "Bioinformatics tools" .SH NAME .PP minimap2 - mapping and alignment between collections of DNA sequences @@ -322,6 +322,9 @@ no attempt to match GT-AG [n] .BI --end-bonus \ INT Score bonus when alignment extends to the end of the query sequence [0]. .TP +.BI --score-N \ INT +Score of a mismatch involving ambiguous bases [1]. +.TP .BR --splice-flank = yes | no Assume the next base to a .B GT diff --git a/options.c b/options.c index 5c19978..aa48ae4 100644 --- a/options.c +++ b/options.c @@ -34,6 +34,7 @@ void mm_mapopt_init(mm_mapopt_t *opt) opt->min_join_flank_ratio = 0.5f; opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1; + opt->sc_ambi = 1; opt->zdrop = 400, opt->zdrop_inv = 200; opt->end_bonus = -1; opt->min_dp_max = opt->min_chain_score * opt->a; diff --git a/python/cmappy.pxd b/python/cmappy.pxd index 231f68f..39be8a4 100644 --- a/python/cmappy.pxd +++ b/python/cmappy.pxd @@ -26,6 +26,7 @@ cdef extern from "minimap.h": int min_join_flank_sc float min_join_flank_ratio; int a, b, q, e, q2, e2 + int sc_ambi int noncan int zdrop, zdrop_inv int end_bonus