diff --git a/chain.c b/chain.c index 8894d44..37c920a 100644 --- a/chain.c +++ b/chain.c @@ -56,7 +56,8 @@ int mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int min_cn int c_log, c_lin; c_lin = (int)(dd * .01 * avg_qspan); c_log = ilog2_32(dd); - sc -= c_lin < c_log? c_lin : c_log; + if (dr > dq) sc -= c_lin < c_log? c_lin : c_log; + else sc -= c_lin + (c_log>>1); } else sc -= (int)(dd * .01 * avg_qspan) + (ilog2_32(dd)>>1); sc += f[j]; if (sc > max_f) { diff --git a/format.c b/format.c index fd51c00..02cf3be 100644 --- a/format.c +++ b/format.c @@ -122,7 +122,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r) if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi); } -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag) +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres) { s->l = 0; mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]); @@ -136,8 +136,12 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m if (r->p && (opt_flag & MM_F_OUT_CG)) { uint32_t k; mm_sprintf_lite(s, "\tcg:Z:"); - for (k = 0; k < r->p->n_cigar; ++k) - mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]); + for (k = 0; k < r->p->n_cigar; ++k) { + int op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4; + if (intron_thres > 0 && op == 2 && len >= intron_thres) + mm_sprintf_lite(s, "%dN", len); + else mm_sprintf_lite(s, "%d%c", len, "MID"[op]); + } } if (r->p && (opt_flag & MM_F_OUT_CS)) write_cs(km, s, mi, t, r); @@ -167,7 +171,7 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp) } else str_copy(s, seq, seq + l); } -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs) +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs, int intron_thres) { int flag = 0; s->l = 0; @@ -186,8 +190,12 @@ void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs; int clip_char = (flag&0x800)? 'H' : 'S'; if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char); - for (k = 0; k < r->p->n_cigar; ++k) - mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]); + for (k = 0; k < r->p->n_cigar; ++k) { + int op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4; + if (intron_thres > 0 && op == 2 && len >= intron_thres) + mm_sprintf_lite(s, "%dN", len); + else mm_sprintf_lite(s, "%d%c", len, "MID"[op]); + } clip_len = r->rev? r->qs : t->l_seq - r->qe; if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char); } else mm_sprintf_lite(s, "*"); diff --git a/main.c b/main.c index 5ac5262..32a2bc7 100644 --- a/main.c +++ b/main.c @@ -8,7 +8,7 @@ #include "minimap.h" #include "mmpriv.h" -#define MM_VERSION "2.0-r279-dirty" +#define MM_VERSION "2.0-r280-dirty" void liftrlimit() { diff --git a/map.c b/map.c index 6e7b869..dd1624c 100644 --- a/map.c +++ b/map.c @@ -344,19 +344,24 @@ static void *worker_pipeline(void *shared, int step, void *in) void *km = 0; step_t *s = (step_t*)in; const mm_idx_t *mi = p->mi; + int intron_thres = -1; for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]); free(s->buf); + if (p->opt->flag & MM_F_CDNA) + intron_thres = (int)((float)(p->opt->q2 - p->opt->q) / p->opt->e + 0.999f); if ((p->opt->flag & MM_F_OUT_CS) && !(mm_dbg_flag & MM_DBG_NO_KALLOC)) km = km_init(); for (i = 0; i < s->n_seq; ++i) { mm_bseq1_t *t = &s->seq[i]; for (j = 0; j < s->n_reg[i]; ++j) { mm_reg1_t *r = &s->reg[i][j]; - if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i]); - else mm_write_paf(&p->str, mi, t, r, km, p->opt->flag); + if (p->opt->flag & MM_F_OUT_SAM) + mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i], intron_thres); + else + mm_write_paf(&p->str, mi, t, r, km, p->opt->flag, intron_thres); puts(p->str.s); } if (s->n_reg[i] == 0 && (p->opt->flag & MM_F_OUT_SAM)) { - mm_write_sam(&p->str, 0, t, 0, 0, 0); + mm_write_sam(&p->str, 0, t, 0, 0, 0, 0); puts(p->str.s); } for (j = 0; j < s->n_reg[i]; ++j) free(s->reg[i][j].p); diff --git a/mmpriv.h b/mmpriv.h index 1c03ae7..6f22b0b 100644 --- a/mmpriv.h +++ b/mmpriv.h @@ -40,8 +40,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end); void radix_sort_64(uint64_t *beg, uint64_t *end); uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk); -void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag); -void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs); +void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres); +void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs, int intron_thres); int mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int min_cnt, int min_sc, int is_cdna, int64_t n, mm128_t *a, uint64_t **_u, void *km); mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);