diff --git a/ksw2_exts2_sse.c b/ksw2_exts2_sse.c index efb661e..109d9bb 100644 --- a/ksw2_exts2_sse.c +++ b/ksw2_exts2_sse.c @@ -68,7 +68,6 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin uint8_t *qr, *sf, *mem, *mem2 = 0; __m128i q_, q2_, qe_, zero_, sc_mch_, sc_mis_, sc_N_, m1_; __m128i *u, *v, *x, *y, *x2, *s, *p = 0, *donor, *acceptor; - const int sp0[4] = { 8, 15, 21, 30 }; ksw_reset_extz(ez); if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return; @@ -119,45 +118,16 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin memcpy(sf, target, tlen); // set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding! - if ((flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) && !(flag & KSW_EZ_SPLICE_CMPLX)) { - int semi_cost = flag&KSW_EZ_SPLICE_FLANK? -noncan/2 : 0; // GTr or yAG is worth 0.5 bit; see PMID:18688272 - memset(donor, -noncan, tlen_ * 16); - memset(acceptor, -noncan, tlen_ * 16); - if (!(flag & KSW_EZ_REV_CIGAR)) { - for (t = 0; t < tlen - 4; ++t) { - int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG - if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 3) can_type = 1; // GTr... - if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 3) can_type = 1; // CTr... - if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2; - if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost; - } - for (t = 2; t < tlen; ++t) { - int can_type = 0; - if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 0 && target[t] == 2) can_type = 1; // ...yAG - if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 0 && target[t] == 1) can_type = 1; // ...yAC - if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2; - if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost; - } - } else { - for (t = 0; t < tlen - 4; ++t) { - int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG - if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 0) can_type = 1; // GAy... - if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 0) can_type = 1; // CAy... - if (can_type && (target[t+3] == 1 || target[t+3] == 3)) can_type = 2; - if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost; - } - for (t = 2; t < tlen; ++t) { - int can_type = 0; - if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 3 && target[t] == 2) can_type = 1; // ...rTG - if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 3 && target[t] == 1) can_type = 1; // ...rTC - if (can_type && (target[t-2] == 0 || target[t-2] == 2)) can_type = 2; - if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost; - } - } - } else if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) { // use the complex splice model + if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) { + const int sp0[4] = { 8, 15, 21, 30 }; int sp[4]; - for (t = 0; t < 4; ++t) - sp[t] = (int)((double)sp0[t] / 4. + .499); + if (flag & KSW_EZ_SPLICE_CMPLX) { + for (t = 0; t < 4; ++t) + sp[t] = (int)((double)sp0[t] / 4. + .499); + } else { + sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0; + sp[1] = sp[2] = sp[3] = noncan; + } memset(donor, -sp[3], tlen_ * 16); memset(acceptor, -sp[3], tlen_ * 16); if (!(flag & KSW_EZ_REV_CIGAR)) { diff --git a/main.c b/main.c index 1e37d33..ecb4ab9 100644 --- a/main.c +++ b/main.c @@ -33,7 +33,7 @@ static ko_longopt_t long_options[] = { { "min-dp-len", ko_required_argument, 308 }, { "print-aln-seq", ko_no_argument, 309 }, { "splice", ko_no_argument, 310 }, - { "splice-model", ko_no_argument, 311 }, + { "splice-model", ko_no_argument, 'J' }, { "cost-non-gt-ag", ko_required_argument, 'C' }, { "no-long-join", ko_no_argument, 312 }, { "sr", ko_no_argument, 313 }, @@ -120,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const int main(int argc, char *argv[]) { - const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:"; + const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:J"; ketopt_t o = KETOPT_INIT; mm_mapopt_t opt; mm_idxopt_t ipt; @@ -187,6 +187,7 @@ int main(int argc, char *argv[]) else if (c == 'R') rg = o.arg; else if (c == 'h') fp_help = stdout; else if (c == '2') opt.flag |= MM_F_2_IO_THREADS; + else if (c == 'J') opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model else if (c == 'o') { if (strcmp(o.arg, "-") != 0) { if (freopen(o.arg, "wb", stdout) == NULL) { @@ -205,7 +206,6 @@ int main(int argc, char *argv[]) else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice - else if (c == 311) opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join else if (c == 313) opt.flag |= MM_F_SR; // --sr else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus @@ -324,7 +324,7 @@ int main(int argc, char *argv[]) fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n"); fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k); fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w); - fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n"); +// fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n"); fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n"); fprintf(fp_help, " -d FILE dump index to FILE []\n"); fprintf(fp_help, " Mapping:\n"); diff --git a/minimap.h b/minimap.h index 3d2440f..a431e5a 100644 --- a/minimap.h +++ b/minimap.h @@ -5,7 +5,7 @@ #include #include -#define MM_VERSION "2.24-r1160-dirty" +#define MM_VERSION "2.24-r1161-dirty" #define MM_F_NO_DIAG 0x001 // no exact diagonal hit #define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name