r774: Merge branch 'hot-fix' into fix-long-gap

This commit is contained in:
Heng Li 2018-05-11 10:02:17 -04:00
commit 881b4ca3a2
9 changed files with 21 additions and 12 deletions

11
align.c
View File

@ -6,18 +6,19 @@
#include "mmpriv.h"
#include "ksw2.h"
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b)
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b, int8_t sc_ambi)
{
int i, j;
a = a < 0? -a : a;
b = b > 0? -b : b;
sc_ambi = sc_ambi > 0? -sc_ambi : sc_ambi;
for (i = 0; i < m - 1; ++i) {
for (j = 0; j < m - 1; ++j)
mat[i * m + j] = i == j? a : b;
mat[i * m + m - 1] = 0;
mat[i * m + m - 1] = sc_ambi;
}
for (j = 0; j < m; ++j)
mat[(m - 1) * m + j] = 0;
mat[(m - 1) * m + j] = sc_ambi;
}
static inline void mm_seq_rev(uint32_t len, uint8_t *seq)
@ -479,7 +480,7 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
r2->cnt = 0;
if (r->cnt == 0) return;
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi);
bw = (int)(opt->bw * 1.5 + 1.);
if (is_sr && !(mi->flag & MM_I_HPC)) {
@ -698,7 +699,7 @@ static int mm_align1_inv(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, i
if (ql < opt->min_chain_score || ql > opt->max_gap) return 0;
if (tl < opt->min_chain_score || tl > opt->max_gap) return 0;
ksw_gen_simple_mat(5, mat, opt->a, opt->b);
ksw_gen_simple_mat(5, mat, opt->a, opt->b, opt->sc_ambi);
tseq = (uint8_t*)kmalloc(km, tl);
mm_idx_getseq(mi, r1->rid, r1->re, r2->rs, tseq);
qseq = r1->rev? &qseq0[0][r2->qe] : &qseq0[1][qlen - r2->qs];

View File

@ -76,7 +76,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
qe2_ = _mm_set1_epi8(q2 + e2);
sc_mch_ = _mm_set1_epi8(mat[0]);
sc_mis_ = _mm_set1_epi8(mat[1]);
sc_N_ = _mm_set1_epi8(-e2);
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e2) : _mm_set1_epi8(mat[m*m-1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard
if (w < 0) w = tlen > qlen? tlen : qlen;
@ -111,7 +111,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
for (t = 0; t < tlen_ * 16; ++t) H[t] = KSW_NEG_INF;
}
if (with_cigar) {
mem2 = (uint8_t*)kmalloc(km, ((qlen + tlen - 1) * n_col_ + 1) * 16);
mem2 = (uint8_t*)kmalloc(km, ((size_t)(qlen + tlen - 1) * n_col_ + 1) * 16);
p = (__m128i*)(((size_t)mem2 + 15) >> 4 << 4);
off = (int*)kmalloc(km, (qlen + tlen - 1) * sizeof(int) * 2);
off_end = off + qlen + tlen - 1;

View File

@ -71,7 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
qe_ = _mm_set1_epi8(q + e);
sc_mch_ = _mm_set1_epi8(mat[0]);
sc_mis_ = _mm_set1_epi8(mat[1]);
sc_N_ = _mm_set1_epi8(-e);
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard
tlen_ = (tlen + 15) / 16;

View File

@ -65,7 +65,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
flag16_ = _mm_set1_epi8(0x10);
sc_mch_ = _mm_set1_epi8(mat[0]);
sc_mis_ = _mm_set1_epi8(mat[1]);
sc_N_ = _mm_set1_epi8(-e);
sc_N_ = mat[m*m-1] == 0? _mm_set1_epi8(-e) : _mm_set1_epi8(mat[m*m-1]);
m1_ = _mm_set1_epi8(m - 1); // wildcard
max_sc_ = _mm_set1_epi8(mat[0] + (q + e) * 2);

6
main.c
View File

@ -10,7 +10,7 @@
#include "getopt.h"
#endif
#define MM_VERSION "2.10-r769-dirty"
#define MM_VERSION "2.10-r774-dirty"
#ifdef __linux__
#include <sys/resource.h>
@ -58,6 +58,7 @@ static struct option long_options[] = {
{ "min-occ-floor", required_argument, 0, 0 }, // 28
{ "MD", no_argument, 0, 0 }, // 29
{ "lj-min-ratio", required_argument, 0, 0 }, // 30
{ "score-N", required_argument, 0, 0 }, // 31
{ "help", no_argument, 0, 'h' },
{ "max-intron-len", required_argument, 0, 'G' },
{ "version", no_argument, 0, 'V' },
@ -95,7 +96,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int flag, int long_idx, const cha
int main(int argc, char *argv[])
{
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:y";
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yY";
mm_mapopt_t opt;
mm_idxopt_t ipt;
int i, c, n_threads = 3, long_idx;
@ -175,6 +176,7 @@ int main(int argc, char *argv[])
else if (c == 0 && long_idx ==28) opt.min_mid_occ = atoi(optarg); // --min-occ-floor
else if (c == 0 && long_idx ==29) opt.flag |= MM_F_OUT_MD; // --MD
else if (c == 0 && long_idx ==30) opt.min_join_flank_ratio = atof(optarg); // --lj-min-ratio
else if (c == 0 && long_idx ==31) opt.sc_ambi = atoi(optarg); // --score-N
else if (c == 0 && long_idx == 14) { // --frag
yes_or_no(&opt, MM_F_FRAG_MODE, long_idx, optarg, 1);
} else if (c == 0 && long_idx == 15) { // --secondary

View File

@ -118,6 +118,7 @@ typedef struct {
float min_join_flank_ratio;
int a, b, q, e, q2, e2; // matching score, mismatch, gap-open and gap-ext penalties
int sc_ambi; // score when one or both bases are "N"
int noncan; // cost of non-canonical splicing sites
int zdrop, zdrop_inv; // break alignment if alignment score drops too fast along the diagonal
int end_bonus;

View File

@ -1,4 +1,4 @@
.TH minimap2 1 "27 March 2018" "minimap2-2.10 (r761)" "Bioinformatics tools"
.TH minimap2 1 "30 April 2018" "minimap2-2.10-dirty (r770)" "Bioinformatics tools"
.SH NAME
.PP
minimap2 - mapping and alignment between collections of DNA sequences
@ -322,6 +322,9 @@ no attempt to match GT-AG [n]
.BI --end-bonus \ INT
Score bonus when alignment extends to the end of the query sequence [0].
.TP
.BI --score-N \ INT
Score of a mismatch involving ambiguous bases [1].
.TP
.BR --splice-flank = yes | no
Assume the next base to a
.B GT

View File

@ -34,6 +34,7 @@ void mm_mapopt_init(mm_mapopt_t *opt)
opt->min_join_flank_ratio = 0.5f;
opt->a = 2, opt->b = 4, opt->q = 4, opt->e = 2, opt->q2 = 24, opt->e2 = 1;
opt->sc_ambi = 1;
opt->zdrop = 400, opt->zdrop_inv = 200;
opt->end_bonus = -1;
opt->min_dp_max = opt->min_chain_score * opt->a;

View File

@ -26,6 +26,7 @@ cdef extern from "minimap.h":
int min_join_flank_sc
float min_join_flank_ratio;
int a, b, q, e, q2, e2
int sc_ambi
int noncan
int zdrop, zdrop_inv
int end_bonus