r297: bidirectional RNA alignment

This commit is contained in:
Heng Li 2017-08-17 06:02:44 -04:00
parent b5f5929bf9
commit 2cde8d257c
6 changed files with 54 additions and 11 deletions

45
align.c
View File

@ -64,7 +64,7 @@ static int mm_check_zdrop(const uint8_t *qseq, const uint8_t *tseq, uint32_t n_c
static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *tseq, const int8_t *mat, int8_t q, int8_t e, int q_intron)
{
uint32_t k, l, toff = 0, qoff = 0;
int32_t s = 0, max = 0, min_intron_len;
int32_t s = 0, max = 0, min_intron_len, n_gtag = 0, n_ctac = 0;
min_intron_len = mm_min_intron_len(q, e, q_intron);
if (p == 0) return;
for (k = 0; k < p->n_cigar; ++k) {
@ -96,10 +96,19 @@ static void mm_update_extra(mm_extra_t *p, const uint8_t *qseq, const uint8_t *t
p->n_ambi += n_ambi, p->n_diff += len - n_ambi;
s -= q + e * len;
if (s < 0) s = 0;
} else toff += len, p->blen += len;
} else { // intron
uint8_t b[4];
b[0] = tseq[toff], b[1] = tseq[toff+1];
b[2] = tseq[toff+len-2], b[3] = tseq[toff+len-1];
if (memcmp(b, "\2\3\0\2", 4) == 0) ++n_gtag;
else if (memcmp(b, "\1\3\0\1", 4) == 0) ++n_ctac;
toff += len, p->blen += len;
}
}
}
p->dp_max = max;
if (n_gtag > n_ctac) p->trans_strand = 1;
else if (n_gtag < n_ctac) p->trans_strand = 2;
}
static void mm_append_cigar(mm_reg1_t *r, uint32_t n_cigar, uint32_t *cigar) // TODO: this calls the libc realloc()
@ -245,7 +254,7 @@ static void mm_fix_bad_ends(const mm_reg1_t *r, const mm128_t *a, int bw, int32_
}
}
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez)
static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, uint8_t *qseq0[2], mm_reg1_t *r, mm_reg1_t *r2, mm128_t *a, ksw_extz_t *ez, int splice_flag)
{
int32_t rid = a[r->as].x<<1>>33, rev = a[r->as].x>>63, as1, cnt1;
uint8_t *tseq, *qseq;
@ -267,8 +276,9 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
mm_adjust_minier(mi, qseq0, &a[as1 + cnt1 - 1], &re, &qe);
if (opt->flag & MM_F_SPLICE) {
if (opt->flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
if (opt->flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
if (splice_flag & MM_F_SPLICE_FOR) extra_flag |= rev? KSW_EZ_SPLICE_REV : KSW_EZ_SPLICE_FOR;
if (splice_flag & MM_F_SPLICE_REV) extra_flag |= rev? KSW_EZ_SPLICE_FOR : KSW_EZ_SPLICE_REV;
if (splice_flag & MM_F_SPLICE_BOTH) extra_flag |= KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV;
}
// compute rs0 and qs0
@ -368,6 +378,8 @@ static void mm_align1(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int
if (r->p) {
mm_idx_getseq(mi, rid, rs1, re1, tseq);
mm_update_extra(r->p, &qseq0[r->rev][qs1], tseq, mat, opt->q, opt->e, (opt->flag&MM_F_SPLICE)? opt->q2 : 0);
if (rev && r->p->trans_strand)
r->p->trans_strand ^= 3; // flip to the read strand
}
kfree(km, tseq);
@ -450,7 +462,28 @@ mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *m
memset(&ez, 0, sizeof(ksw_extz_t));
for (i = 0; i < n_regs; ++i) {
mm_reg1_t r2;
mm_align1(km, opt, mi, qlen, qseq0, &regs[i], &r2, a, &ez);
if ((opt->flag&MM_F_SPLICE) && (opt->flag&MM_F_SPLICE_FOR) && (opt->flag&MM_F_SPLICE_REV)) {
mm_reg1_t s[2], s2[2];
int which, trans_strand;
s[0] = s[1] = regs[i];
mm_align1(km, opt, mi, qlen, qseq0, &s[0], &s2[0], a, &ez, MM_F_SPLICE_FOR);
mm_align1(km, opt, mi, qlen, qseq0, &s[1], &s2[1], a, &ez, MM_F_SPLICE_REV);
if (s[0].p->dp_score > s[1].p->dp_score) which = 0, trans_strand = 1;
else if (s[0].p->dp_score < s[1].p->dp_score) which = 1, trans_strand = 2;
else trans_strand = 3, which = (qlen + s[0].p->dp_score) & 1; // randomly choose a strand, effectively
if (which == 0) {
regs[i] = s[0], r2 = s2[0];
free(s[1].p);
} else {
regs[i] = s[1], r2 = s2[1];
free(s[0].p);
}
regs[i].p->trans_strand = trans_strand;
} else {
mm_align1(km, opt, mi, qlen, qseq0, &regs[i], &r2, a, &ez, opt->flag);
if ((opt->flag&MM_F_SPLICE) && !(opt->flag&MM_F_SPLICE_BOTH))
regs[i].p->trans_strand = opt->flag&MM_F_SPLICE_FOR? 1 : 2;
}
if (r2.cnt > 0) regs = mm_insert_reg(&r2, i, &n_regs, regs);
if (i > 0 && mm_align1_inv(km, opt, mi, qlen, qseq0, &regs[i-1], &regs[i], &r2, &ez)) {
regs = mm_insert_reg(&r2, i, &n_regs, regs);

View File

@ -119,7 +119,11 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
mm_sprintf_lite(s, "\ttp:A:%c\tcm:i:%d\ts1:i:%d", type, r->cnt, r->score);
if (r->parent == r->id) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
if (r->split) mm_sprintf_lite(s, "\tzd:i:%d", r->split);
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
if (r->p) {
mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
if (r->p->trans_strand == 1 || r->p->trans_strand == 2)
mm_sprintf_lite(s, "\tts:A:%c", "?+-?"[r->p->trans_strand]);
}
}
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres)

5
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r296-dirty"
#define MM_VERSION "2.0-r297-dirty"
void liftrlimit()
{
@ -112,6 +112,7 @@ int main(int argc, char *argv[])
return 0;
} else if (c == 'u') {
if (*optarg == 'b') opt.flag |= MM_F_SPLICE_FOR|MM_F_SPLICE_REV;
else if (*optarg == 'B') opt.flag |= MM_F_SPLICE_BOTH;
else if (*optarg == 'f') opt.flag |= MM_F_SPLICE_FOR, opt.flag &= ~MM_F_SPLICE_REV;
else if (*optarg == 'r') opt.flag |= MM_F_SPLICE_REV, opt.flag &= ~MM_F_SPLICE_FOR;
else if (*optarg == 'n') opt.flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV);
@ -153,7 +154,7 @@ int main(int argc, char *argv[])
opt.flag |= MM_F_SPLICE | MM_F_SPLICE_FOR | MM_F_SPLICE_REV;
opt.max_gap = 2000, opt.max_gap_ref = opt.bw = 200000;
opt.a = 1, opt.b = 2, opt.q = 2, opt.e = 1, opt.q2 = 32, opt.e2 = 0;
opt.noncan = 4;
opt.noncan = 5;
opt.zdrop = 200;
} else {
fprintf(stderr, "[E::%s] unknown preset '%s'\n", __func__, optarg);

2
map.c
View File

@ -38,6 +38,8 @@ void mm_mapopt_init(mm_mapopt_t *opt)
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi)
{
if (opt->flag & MM_F_SPLICE_BOTH)
opt->flag &= ~(MM_F_SPLICE_FOR|MM_F_SPLICE_REV);
opt->max_occ = mm_idx_cal_max_occ(mi, opt->max_occ_frac);
opt->mid_occ = mm_idx_cal_max_occ(mi, opt->mid_occ_frac);
if (mm_verbose >= 3)

View File

@ -17,6 +17,7 @@
#define MM_F_SPLICE 0x080
#define MM_F_SPLICE_FOR 0x100
#define MM_F_SPLICE_REV 0x200
#define MM_F_SPLICE_BOTH 0x400
#define MM_IDX_MAGIC "MMI\2"
@ -58,7 +59,8 @@ typedef struct {
uint32_t capacity;
int32_t dp_score, dp_max, dp_max2;
uint32_t blen;
uint32_t n_diff, n_ambi;
uint32_t n_diff;
uint32_t n_ambi:30, trans_strand:2;
uint32_t n_cigar;
uint32_t cigar[];
} mm_extra_t;

View File

@ -62,7 +62,8 @@ void mm_set_mapq(int n_regs, mm_reg1_t *regs, int min_chain_sc);
static inline int32_t mm_min_intron_len(int32_t q, int32_t e, int32_t q_intron)
{
return q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX;
int32_t x = q_intron > q? (int)((float)(q_intron - q) / e + .999) : INT32_MAX;
return x > 4? x : 4;
}
#endif