diff --git a/align.c b/align.c index b4c00b1..25744e9 100644 --- a/align.c +++ b/align.c @@ -64,7 +64,7 @@ static int mm_check_zdrop(const uint8_t *qseq, const uint8_t *tseq, uint32_t n_c static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int q_intron) { uint32_t k, l, toff = 0, qoff = 0; - int32_t s = 0, max = 0, min_intron_len; + int32_t s = 0, max = 0, min_intron_len, n_gtag = 0, n_ctac = 0; min_intron_len = mm_min_intron_len(q, e, q_intron); if (p == 0) return; for (k = 0; k < p->n_cigar; ++k) { @@ -96,10 +96,19 @@ static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *t p->n_ambi += n_ambi, p->n_diff += len - n_ambi; s -= q + e * len; if (s < 0) s = 0; - } else toff += len, p->blen += len; + } else { // intron + uint8_t b[4]; + b[0] = tseq[toff], b[1] = tseq[toff+1]; + b[2] = tseq[toff+len-2], b[3] = tseq[toff+len-1]; + if (memcmp(b, "\2\3\0\2", 4) == 0) ++n_gtag; + else if (memcmp(b, "\1\3\0\1", 4) == 0) ++n_ctac; + toff += len, p->blen += len; + } } } p->dp_max = max; + if (n_gtag > n_ctac) p->trans_strand = 1; + else if (n_gtag < n_ctac) p->trans_strand = 2; } static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc() @@ -245,7 +254,7 @@ static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int32_ } } -static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez) +static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez, int splice_flag) { int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1; uint8_t *tseq, *qseq; @@ -267,8 +276,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe); if (opt->flag & MM_F_SPLICE) { - if (opt->flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR; - if (opt->flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV; + if (splice_flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR; + if (splice_flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV; + if (splice_flag & MM_F_SPLICE_BOTH) extra_flag |= KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV; } // compute rs0 and qs0 @@ -368,6 +378,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int if (r->p) { mm_idx_getseq(mi, rid, rs1, re1, tseq); mm_update_extra(r->p, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, (opt->flag&MM_F_SPLICE)? opt->q2 : 0); + if (rev && r->p->trans_strand) + r->p->trans_strand ^= 3; // flip to the read strand } kfree(km, tseq); @@ -450,7 +462,28 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m memset(&ez, 0, sizeof(ksw_extz_t)); for (i = 0; i < n_regs; ++i) { mm_reg1_t r2; - mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, a, &ez); + if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) { + mm_reg1_t s[2], s2[2]; + int which, trans_strand; + s[0] = s[1] = regs[i]; + mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], a, &ez, MM_F_SPLICE_FOR); + mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], a, &ez, MM_F_SPLICE_REV); + if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1; + else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2; + else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively + if (which == 0) { + regs[i] = s[0], r2 = s2[0]; + free(s[1].p); + } else { + regs[i] = s[1], r2 = s2[1]; + free(s[0].p); + } + regs[i].p->trans_strand = trans_strand; + } else { + mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, a, &ez, opt->flag); + if ((opt->flag&MM_F_SPLICE) && !(opt->flag&MM_F_SPLICE_BOTH)) + regs[i].p->trans_strand = opt->flag&MM_F_SPLICE_FOR? 1 : 2; + } if (r2.cnt > 0) regs = mm_insert_reg(&r2, i, &n_regs, regs); if (i > 0 && mm_align1_inv(km, opt, mi, qlen, qseq0, ®s[i-1], ®s[i], &r2, &ez)) { regs = mm_insert_reg(&r2, i, &n_regs, regs); diff --git a/format.c b/format.c index 02cf3be..144005a 100644 --- a/format.c +++ b/format.c @@ -119,7 +119,11 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r) mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score); if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc); if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split); - if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi); + if (r->p) { + mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi); + if (r->p->trans_strand == 1 || r->p->trans_strand == 2) + mm_sprintf_lite(s, "\tts:A:%c", "?+-?"[r->p->trans_strand]); + } } void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres) diff --git a/main.c b/main.c index f387f2d..6a084fb 100644 --- a/main.c +++ b/main.c @@ -8,7 +8,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r296-dirty" +#define MM_VERSION "2.0-r297-dirty" void liftrlimit() { @@ -112,6 +112,7 @@ int main(int argc, char *argv[]) return 0; } else if (c == 'u') { if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV; + else if (*optarg == 'B') opt.flag |= MM_F_SPLICE_BOTH; else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV; else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR; else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); @@ -153,7 +154,7 @@ int main(int argc, char *argv[]) opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV; opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 200000; opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0; - opt.noncan = 4; + opt.noncan = 5; opt.zdrop = 200; } else { fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg); diff --git a/map.c b/map.c index f4ea657..f8654b2 100644 --- a/map.c +++ b/map.c @@ -38,6 +38,8 @@ void mm_mapopt_init(mm_mapopt_t *opt) void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi) { + if (opt->flag & MM_F_SPLICE_BOTH) + opt->flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV); opt->max_occ = mm_idx_cal_max_occ(mi, opt->max_occ_frac); opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac); if (mm_verbose >= 3) diff --git a/minimap.h b/minimap.h index bce2448..c09b846 100644 --- a/minimap.h +++ b/minimap.h @@ -17,6 +17,7 @@ #define MM_F_SPLICE 0x080 #define MM_F_SPLICE_FOR 0x100 #define MM_F_SPLICE_REV 0x200 +#define MM_F_SPLICE_BOTH 0x400 #define MM_IDX_MAGIC "MMI\2" @@ -58,7 +59,8 @@ typedef struct { uint32_t capacity; int32_t dp_score, dp_max, dp_max2; uint32_t blen; - uint32_t n_diff, n_ambi; + uint32_t n_diff; + uint32_t n_ambi:30, trans_strand:2; uint32_t n_cigar; uint32_t cigar[]; } mm_extra_t; diff --git a/mmpriv.h b/mmpriv.h index 6f69467..83cfcf4 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -62,7 +62,8 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc); static inline int32_t mm_min_intron_len(int32_t q, int32_t e, int32_t q_intron) { - return q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX; + int32_t x = q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX; + return x > 4? x : 4; } #endif