r770: matching N bases not working properly (#155)
This commit is contained in:
parent
aef7b0744c
commit
734ac379bb
11
align.c
11
align.c
|
|
@ -6,18 +6,19 @@
|
|||
#include "mmpriv.h"
|
||||
#include "ksw2.h"
|
||||
|
||||
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b)
|
||||
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b, int8_t sc_ambi)
|
||||
{
|
||||
int i, j;
|
||||
a = a < 0? -a : a;
|
||||
b = b > 0? -b : b;
|
||||
sc_ambi = sc_ambi > 0? -sc_ambi : sc_ambi;
|
||||
for (i = 0; i < m - 1; ++i) {
|
||||
for (j = 0; j < m - 1; ++j)
|
||||
mat[i * m + j] = i == j? a : b;
|
||||
mat[i * m + m - 1] = 0;
|
||||
mat[i * m + m - 1] = sc_ambi;
|
||||
}
|
||||
for (j = 0; j < m; ++j)
|
||||
mat[(m - 1) * m + j] = 0;
|
||||
mat[(m - 1) * m + j] = sc_ambi;
|
||||
}
|
||||
|
||||
static inline void mm_seq_rev(uint32_t len, uint8_t *seq)
|
||||
|
|
@ -434,7 +435,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
|
||||
r2->cnt = 0;
|
||||
if (r->cnt == 0) return;
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi);
|
||||
bw = (int)(opt->bw * 1.5 + 1.);
|
||||
|
||||
if (is_sr && !(mi->flag & MM_I_HPC)) {
|
||||
|
|
@ -652,7 +653,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
|
|||
if (ql < opt->min_chain_score || ql > opt->max_gap) return 0;
|
||||
if (tl < opt->min_chain_score || tl > opt->max_gap) return 0;
|
||||
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
|
||||
ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi);
|
||||
tseq = (uint8_t*)kmalloc(km, tl);
|
||||
mm_idx_getseq(mi, r1->rid, r1->re, r2->rs, tseq);
|
||||
qseq = r1->rev? &qseq0[0][r2->qe] : &qseq0[1][qlen - r2->qs];
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
qe2_ = _mm_set1_epi8(q2 + e2);
|
||||
sc_mch_ = _mm_set1_epi8(mat[0]);
|
||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||
sc_N_ = _mm_set1_epi8(-e2);
|
||||
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e2) : _mm_set1_epi8(mat[m*m-1]);
|
||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||
|
||||
if (w < 0) w = tlen > qlen? tlen : qlen;
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
qe_ = _mm_set1_epi8(q + e);
|
||||
sc_mch_ = _mm_set1_epi8(mat[0]);
|
||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||
sc_N_ = _mm_set1_epi8(-e);
|
||||
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]);
|
||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||
|
||||
tlen_ = (tlen + 15) / 16;
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
flag16_ = _mm_set1_epi8(0x10);
|
||||
sc_mch_ = _mm_set1_epi8(mat[0]);
|
||||
sc_mis_ = _mm_set1_epi8(mat[1]);
|
||||
sc_N_ = _mm_set1_epi8(-e);
|
||||
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]);
|
||||
m1_ = _mm_set1_epi8(m - 1); // wildcard
|
||||
max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2);
|
||||
|
||||
|
|
|
|||
4
main.c
4
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "getopt.h"
|
||||
#endif
|
||||
|
||||
#define MM_VERSION "2.10-r768-dirty"
|
||||
#define MM_VERSION "2.10-r770-dirty"
|
||||
|
||||
#ifdef __linux__
|
||||
#include <sys/resource.h>
|
||||
|
|
@ -58,6 +58,7 @@ static struct option long_options[] = {
|
|||
{ "min-occ-floor", required_argument, 0, 0 }, // 28
|
||||
{ "MD", no_argument, 0, 0 }, // 29
|
||||
{ "lj-min-ratio", required_argument, 0, 0 }, // 30
|
||||
{ "score-N", required_argument, 0, 0 }, // 31
|
||||
{ "help", no_argument, 0, 'h' },
|
||||
{ "max-intron-len", required_argument, 0, 'G' },
|
||||
{ "version", no_argument, 0, 'V' },
|
||||
|
|
@ -175,6 +176,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor
|
||||
else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD
|
||||
else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio
|
||||
else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N
|
||||
else if (c == 0 && long_idx == 14) { // --frag
|
||||
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
|
||||
} else if (c == 0 && long_idx == 15) { // --secondary
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ typedef struct {
|
|||
float min_join_flank_ratio;
|
||||
|
||||
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
|
||||
int sc_ambi; // score when one or both bases are "N"
|
||||
int noncan; // cost of non-canonical splicing sites
|
||||
int zdrop, zdrop_inv; // break alignment if alignment score drops too fast along the diagonal
|
||||
int end_bonus;
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
.TH minimap2 1 "27 March 2018" "minimap2-2.10 (r761)" "Bioinformatics tools"
|
||||
.TH minimap2 1 "30 April 2018" "minimap2-2.10-dirty (r770)" "Bioinformatics tools"
|
||||
.SH NAME
|
||||
.PP
|
||||
minimap2 - mapping and alignment between collections of DNA sequences
|
||||
|
|
@ -322,6 +322,9 @@ no attempt to match GT-AG [n]
|
|||
.BI --end-bonus \ INT
|
||||
Score bonus when alignment extends to the end of the query sequence [0].
|
||||
.TP
|
||||
.BI --score-N \ INT
|
||||
Score of a mismatch involving ambiguous bases [1].
|
||||
.TP
|
||||
.BR --splice-flank = yes | no
|
||||
Assume the next base to a
|
||||
.B GT
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
opt->min_join_flank_ratio = 0.5f;
|
||||
|
||||
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
|
||||
opt->sc_ambi = 1;
|
||||
opt->zdrop = 400, opt->zdrop_inv = 200;
|
||||
opt->end_bonus = -1;
|
||||
opt->min_dp_max = opt->min_chain_score * opt->a;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ cdef extern from "minimap.h":
|
|||
int min_join_flank_sc
|
||||
float min_join_flank_ratio;
|
||||
int a, b, q, e, q2, e2
|
||||
int sc_ambi
|
||||
int noncan
|
||||
int zdrop, zdrop_inv
|
||||
int end_bonus
|
||||
|
|
|
|||
Loading…
Reference in New Issue