r297: bidirectional RNA alignment
This commit is contained in:
parent
b5f5929bf9
commit
2cde8d257c
45
align.c
45
align.c
|
|
@ -64,7 +64,7 @@ static int mm_check_zdrop(const uint8_t *qseq, const uint8_t *tseq, uint32_t n_c
|
|||
static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int q_intron)
|
||||
{
|
||||
uint32_t k, l, toff = 0, qoff = 0;
|
||||
int32_t s = 0, max = 0, min_intron_len;
|
||||
int32_t s = 0, max = 0, min_intron_len, n_gtag = 0, n_ctac = 0;
|
||||
min_intron_len = mm_min_intron_len(q, e, q_intron);
|
||||
if (p == 0) return;
|
||||
for (k = 0; k < p->n_cigar; ++k) {
|
||||
|
|
@ -96,10 +96,19 @@ static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *t
|
|||
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
|
||||
s -= q + e * len;
|
||||
if (s < 0) s = 0;
|
||||
} else toff += len, p->blen += len;
|
||||
} else { // intron
|
||||
uint8_t b[4];
|
||||
b[0] = tseq[toff], b[1] = tseq[toff+1];
|
||||
b[2] = tseq[toff+len-2], b[3] = tseq[toff+len-1];
|
||||
if (memcmp(b, "\2\3\0\2", 4) == 0) ++n_gtag;
|
||||
else if (memcmp(b, "\1\3\0\1", 4) == 0) ++n_ctac;
|
||||
toff += len, p->blen += len;
|
||||
}
|
||||
}
|
||||
}
|
||||
p->dp_max = max;
|
||||
if (n_gtag > n_ctac) p->trans_strand = 1;
|
||||
else if (n_gtag < n_ctac) p->trans_strand = 2;
|
||||
}
|
||||
|
||||
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
|
||||
|
|
@ -245,7 +254,7 @@ static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int32_
|
|||
}
|
||||
}
|
||||
|
||||
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez)
|
||||
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez, int splice_flag)
|
||||
{
|
||||
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
|
||||
uint8_t *tseq, *qseq;
|
||||
|
|
@ -267,8 +276,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
|
||||
|
||||
if (opt->flag & MM_F_SPLICE) {
|
||||
if (opt->flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
|
||||
if (opt->flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
|
||||
if (splice_flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
|
||||
if (splice_flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
|
||||
if (splice_flag & MM_F_SPLICE_BOTH) extra_flag |= KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV;
|
||||
}
|
||||
|
||||
// compute rs0 and qs0
|
||||
|
|
@ -368,6 +378,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
|
|||
if (r->p) {
|
||||
mm_idx_getseq(mi, rid, rs1, re1, tseq);
|
||||
mm_update_extra(r->p, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, (opt->flag&MM_F_SPLICE)? opt->q2 : 0);
|
||||
if (rev && r->p->trans_strand)
|
||||
r->p->trans_strand ^= 3; // flip to the read strand
|
||||
}
|
||||
|
||||
kfree(km, tseq);
|
||||
|
|
@ -450,7 +462,28 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
|
|||
memset(&ez, 0, sizeof(ksw_extz_t));
|
||||
for (i = 0; i < n_regs; ++i) {
|
||||
mm_reg1_t r2;
|
||||
mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, a, &ez);
|
||||
if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) {
|
||||
mm_reg1_t s[2], s2[2];
|
||||
int which, trans_strand;
|
||||
s[0] = s[1] = regs[i];
|
||||
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], a, &ez, MM_F_SPLICE_FOR);
|
||||
mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], a, &ez, MM_F_SPLICE_REV);
|
||||
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
|
||||
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
|
||||
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
|
||||
if (which == 0) {
|
||||
regs[i] = s[0], r2 = s2[0];
|
||||
free(s[1].p);
|
||||
} else {
|
||||
regs[i] = s[1], r2 = s2[1];
|
||||
free(s[0].p);
|
||||
}
|
||||
regs[i].p->trans_strand = trans_strand;
|
||||
} else {
|
||||
mm_align1(km, opt, mi, qlen, qseq0, ®s[i], &r2, a, &ez, opt->flag);
|
||||
if ((opt->flag&MM_F_SPLICE) && !(opt->flag&MM_F_SPLICE_BOTH))
|
||||
regs[i].p->trans_strand = opt->flag&MM_F_SPLICE_FOR? 1 : 2;
|
||||
}
|
||||
if (r2.cnt > 0) regs = mm_insert_reg(&r2, i, &n_regs, regs);
|
||||
if (i > 0 && mm_align1_inv(km, opt, mi, qlen, qseq0, ®s[i-1], ®s[i], &r2, &ez)) {
|
||||
regs = mm_insert_reg(&r2, i, &n_regs, regs);
|
||||
|
|
|
|||
6
format.c
6
format.c
|
|
@ -119,7 +119,11 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
|
|||
mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score);
|
||||
if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
|
||||
if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split);
|
||||
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
if (r->p) {
|
||||
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
if (r->p->trans_strand == 1 || r->p->trans_strand == 2)
|
||||
mm_sprintf_lite(s, "\tts:A:%c", "?+-?"[r->p->trans_strand]);
|
||||
}
|
||||
}
|
||||
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres)
|
||||
|
|
|
|||
5
main.c
5
main.c
|
|
@ -8,7 +8,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r296-dirty"
|
||||
#define MM_VERSION "2.0-r297-dirty"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -112,6 +112,7 @@ int main(int argc, char *argv[])
|
|||
return 0;
|
||||
} else if (c == 'u') {
|
||||
if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV;
|
||||
else if (*optarg == 'B') opt.flag |= MM_F_SPLICE_BOTH;
|
||||
else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV;
|
||||
else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR;
|
||||
else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV);
|
||||
|
|
@ -153,7 +154,7 @@ int main(int argc, char *argv[])
|
|||
opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;
|
||||
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 200000;
|
||||
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0;
|
||||
opt.noncan = 4;
|
||||
opt.noncan = 5;
|
||||
opt.zdrop = 200;
|
||||
} else {
|
||||
fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg);
|
||||
|
|
|
|||
2
map.c
2
map.c
|
|
@ -38,6 +38,8 @@ void mm_mapopt_init(mm_mapopt_t *opt)
|
|||
|
||||
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
|
||||
{
|
||||
if (opt->flag & MM_F_SPLICE_BOTH)
|
||||
opt->flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV);
|
||||
opt->max_occ = mm_idx_cal_max_occ(mi, opt->max_occ_frac);
|
||||
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
|
||||
if (mm_verbose >= 3)
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@
|
|||
#define MM_F_SPLICE 0x080
|
||||
#define MM_F_SPLICE_FOR 0x100
|
||||
#define MM_F_SPLICE_REV 0x200
|
||||
#define MM_F_SPLICE_BOTH 0x400
|
||||
|
||||
#define MM_IDX_MAGIC "MMI\2"
|
||||
|
||||
|
|
@ -58,7 +59,8 @@ typedef struct {
|
|||
uint32_t capacity;
|
||||
int32_t dp_score, dp_max, dp_max2;
|
||||
uint32_t blen;
|
||||
uint32_t n_diff, n_ambi;
|
||||
uint32_t n_diff;
|
||||
uint32_t n_ambi:30, trans_strand:2;
|
||||
uint32_t n_cigar;
|
||||
uint32_t cigar[];
|
||||
} mm_extra_t;
|
||||
|
|
|
|||
3
mmpriv.h
3
mmpriv.h
|
|
@ -62,7 +62,8 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc);
|
|||
|
||||
static inline int32_t mm_min_intron_len(int32_t q, int32_t e, int32_t q_intron)
|
||||
{
|
||||
return q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX;
|
||||
int32_t x = q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX;
|
||||
return x > 4? x : 4;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
Loading…
Reference in New Issue