r1161: merged the simple and complex models
This commit is contained in:
parent
7ced0f16a0
commit
1834b1fd42
|
|
@ -68,7 +68,6 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
uint8_t *qr, *sf, *mem, *mem2 = 0;
|
||||
__m128i q_, q2_, qe_, zero_, sc_mch_, sc_mis_, sc_N_, m1_;
|
||||
__m128i *u, *v, *x, *y, *x2, *s, *p = 0, *donor, *acceptor;
|
||||
const int sp0[4] = { 8, 15, 21, 30 };
|
||||
|
||||
ksw_reset_extz(ez);
|
||||
if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
|
||||
|
|
@ -119,45 +118,16 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
|
|||
memcpy(sf, target, tlen);
|
||||
|
||||
// set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
|
||||
if ((flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) && !(flag & KSW_EZ_SPLICE_CMPLX)) {
|
||||
int semi_cost = flag&KSW_EZ_SPLICE_FLANK? -noncan/2 : 0; // GTr or yAG is worth 0.5 bit; see PMID:18688272
|
||||
memset(donor, -noncan, tlen_ * 16);
|
||||
memset(acceptor, -noncan, tlen_ * 16);
|
||||
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
||||
for (t = 0; t < tlen - 4; ++t) {
|
||||
int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
|
||||
if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 3) can_type = 1; // GTr...
|
||||
if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 3) can_type = 1; // CTr...
|
||||
if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2;
|
||||
if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
|
||||
}
|
||||
for (t = 2; t < tlen; ++t) {
|
||||
int can_type = 0;
|
||||
if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 0 && target[t] == 2) can_type = 1; // ...yAG
|
||||
if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 0 && target[t] == 1) can_type = 1; // ...yAC
|
||||
if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2;
|
||||
if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
|
||||
}
|
||||
} else {
|
||||
for (t = 0; t < tlen - 4; ++t) {
|
||||
int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
|
||||
if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 0) can_type = 1; // GAy...
|
||||
if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 0) can_type = 1; // CAy...
|
||||
if (can_type && (target[t+3] == 1 || target[t+3] == 3)) can_type = 2;
|
||||
if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
|
||||
}
|
||||
for (t = 2; t < tlen; ++t) {
|
||||
int can_type = 0;
|
||||
if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 3 && target[t] == 2) can_type = 1; // ...rTG
|
||||
if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 3 && target[t] == 1) can_type = 1; // ...rTC
|
||||
if (can_type && (target[t-2] == 0 || target[t-2] == 2)) can_type = 2;
|
||||
if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
|
||||
}
|
||||
}
|
||||
} else if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) { // use the complex splice model
|
||||
if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
|
||||
const int sp0[4] = { 8, 15, 21, 30 };
|
||||
int sp[4];
|
||||
for (t = 0; t < 4; ++t)
|
||||
sp[t] = (int)((double)sp0[t] / 4. + .499);
|
||||
if (flag & KSW_EZ_SPLICE_CMPLX) {
|
||||
for (t = 0; t < 4; ++t)
|
||||
sp[t] = (int)((double)sp0[t] / 4. + .499);
|
||||
} else {
|
||||
sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
|
||||
sp[1] = sp[2] = sp[3] = noncan;
|
||||
}
|
||||
memset(donor, -sp[3], tlen_ * 16);
|
||||
memset(acceptor, -sp[3], tlen_ * 16);
|
||||
if (!(flag & KSW_EZ_REV_CIGAR)) {
|
||||
|
|
|
|||
8
main.c
8
main.c
|
|
@ -33,7 +33,7 @@ static ko_longopt_t long_options[] = {
|
|||
{ "min-dp-len", ko_required_argument, 308 },
|
||||
{ "print-aln-seq", ko_no_argument, 309 },
|
||||
{ "splice", ko_no_argument, 310 },
|
||||
{ "splice-model", ko_no_argument, 311 },
|
||||
{ "splice-model", ko_no_argument, 'J' },
|
||||
{ "cost-non-gt-ag", ko_required_argument, 'C' },
|
||||
{ "no-long-join", ko_no_argument, 312 },
|
||||
{ "sr", ko_no_argument, 313 },
|
||||
|
|
@ -120,7 +120,7 @@ static inline void yes_or_no(mm_mapopt_t *opt, int64_t flag, int long_idx, const
|
|||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:";
|
||||
const char *opt_str = "2aSDw:k:K:t:r:f:Vv:g:G:I:d:XT:s:x:Hcp:M:n:z:A:B:O:E:m:N:Qu:R:hF:LC:yYPo:e:U:j:J";
|
||||
ketopt_t o = KETOPT_INIT;
|
||||
mm_mapopt_t opt;
|
||||
mm_idxopt_t ipt;
|
||||
|
|
@ -187,6 +187,7 @@ int main(int argc, char *argv[])
|
|||
else if (c == 'R') rg = o.arg;
|
||||
else if (c == 'h') fp_help = stdout;
|
||||
else if (c == '2') opt.flag |= MM_F_2_IO_THREADS;
|
||||
else if (c == 'J') opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model
|
||||
else if (c == 'o') {
|
||||
if (strcmp(o.arg, "-") != 0) {
|
||||
if (freopen(o.arg, "wb", stdout) == NULL) {
|
||||
|
|
@ -205,7 +206,6 @@ int main(int argc, char *argv[])
|
|||
else if (c == 308) opt.min_ksw_len = atoi(o.arg); // --min-dp-len
|
||||
else if (c == 309) mm_dbg_flag |= MM_DBG_PRINT_QNAME | MM_DBG_PRINT_ALN_SEQ, n_threads = 1; // --print-aln-seq
|
||||
else if (c == 310) opt.flag |= MM_F_SPLICE; // --splice
|
||||
else if (c == 311) opt.flag |= MM_F_SPLICE_CMPLX; // --splice-model
|
||||
else if (c == 312) opt.flag |= MM_F_NO_LJOIN; // --no-long-join
|
||||
else if (c == 313) opt.flag |= MM_F_SR; // --sr
|
||||
else if (c == 317) opt.end_bonus = atoi(o.arg); // --end-bonus
|
||||
|
|
@ -324,7 +324,7 @@ int main(int argc, char *argv[])
|
|||
fprintf(fp_help, " -H use homopolymer-compressed k-mer (preferrable for PacBio)\n");
|
||||
fprintf(fp_help, " -k INT k-mer size (no larger than 28) [%d]\n", ipt.k);
|
||||
fprintf(fp_help, " -w INT minimizer window size [%d]\n", ipt.w);
|
||||
fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n");
|
||||
// fprintf(fp_help, " -j INT syncmer submer size (overriding -w) []\n");
|
||||
fprintf(fp_help, " -I NUM split index for every ~NUM input bases [4G]\n");
|
||||
fprintf(fp_help, " -d FILE dump index to FILE []\n");
|
||||
fprintf(fp_help, " Mapping:\n");
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include <stdio.h>
|
||||
#include <sys/types.h>
|
||||
|
||||
#define MM_VERSION "2.24-r1160-dirty"
|
||||
#define MM_VERSION "2.24-r1161-dirty"
|
||||
|
||||
#define MM_F_NO_DIAG 0x001 // no exact diagonal hit
|
||||
#define MM_F_NO_DUAL 0x002 // skip pairs where query name is lexicographically larger than target name
|
||||
|
|
|
|||
Loading…
Reference in New Issue