r280: output introns as "N" in the cdna mode

This commit is contained in:
Heng Li 2017-08-09 11:45:02 -04:00
parent 7429b12164
commit c59b0781bc
5 changed files with 27 additions and 13 deletions

View File

@ -56,7 +56,8 @@ int mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int min_cn
int c_log, c_lin;
c_lin = (int)(dd * .01 * avg_qspan);
c_log = ilog2_32(dd);
sc -= c_lin < c_log? c_lin : c_log;
if (dr > dq) sc -= c_lin < c_log? c_lin : c_log;
else sc -= c_lin + (c_log>>1);
} else sc -= (int)(dd * .01 * avg_qspan) + (ilog2_32(dd)>>1);
sc += f[j];
if (sc > max_f) {

View File

@ -122,7 +122,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
}
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag)
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres)
{
s->l = 0;
mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
@ -136,8 +136,12 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
if (r->p && (opt_flag & MM_F_OUT_CG)) {
uint32_t k;
mm_sprintf_lite(s, "\tcg:Z:");
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
for (k = 0; k < r->p->n_cigar; ++k) {
int op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
if (intron_thres > 0 && op == 2 && len >= intron_thres)
mm_sprintf_lite(s, "%dN", len);
else mm_sprintf_lite(s, "%d%c", len, "MID"[op]);
}
}
if (r->p && (opt_flag & MM_F_OUT_CS))
write_cs(km, s, mi, t, r);
@ -167,7 +171,7 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp)
} else str_copy(s, seq, seq + l);
}
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs)
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs, int intron_thres)
{
int flag = 0;
s->l = 0;
@ -186,8 +190,12 @@ void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const m
uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs;
int clip_char = (flag&0x800)? 'H' : 'S';
if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char);
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
for (k = 0; k < r->p->n_cigar; ++k) {
int op = r->p->cigar[k]&0xf, len = r->p->cigar[k]>>4;
if (intron_thres > 0 && op == 2 && len >= intron_thres)
mm_sprintf_lite(s, "%dN", len);
else mm_sprintf_lite(s, "%d%c", len, "MID"[op]);
}
clip_len = r->rev? r->qs : t->l_seq - r->qe;
if (clip_len) mm_sprintf_lite(s, "%d%c", clip_len, clip_char);
} else mm_sprintf_lite(s, "*");

2
main.c
View File

@ -8,7 +8,7 @@
#include "minimap.h"
#include "mmpriv.h"
#define MM_VERSION "2.0-r279-dirty"
#define MM_VERSION "2.0-r280-dirty"
void liftrlimit()
{

11
map.c
View File

@ -344,19 +344,24 @@ static void *worker_pipeline(void *shared, int step, void *in)
void *km = 0;
step_t *s = (step_t*)in;
const mm_idx_t *mi = p->mi;
int intron_thres = -1;
for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]);
free(s->buf);
if (p->opt->flag & MM_F_CDNA)
intron_thres = (int)((float)(p->opt->q2 - p->opt->q) / p->opt->e + 0.999f);
if ((p->opt->flag & MM_F_OUT_CS) && !(mm_dbg_flag & MM_DBG_NO_KALLOC)) km = km_init();
for (i = 0; i < s->n_seq; ++i) {
mm_bseq1_t *t = &s->seq[i];
for (j = 0; j < s->n_reg[i]; ++j) {
mm_reg1_t *r = &s->reg[i][j];
if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i]);
else mm_write_paf(&p->str, mi, t, r, km, p->opt->flag);
if (p->opt->flag & MM_F_OUT_SAM)
mm_write_sam(&p->str, mi, t, r, s->n_reg[i], s->reg[i], intron_thres);
else
mm_write_paf(&p->str, mi, t, r, km, p->opt->flag, intron_thres);
puts(p->str.s);
}
if (s->n_reg[i] == 0 && (p->opt->flag & MM_F_OUT_SAM)) {
mm_write_sam(&p->str, 0, t, 0, 0, 0);
mm_write_sam(&p->str, 0, t, 0, 0, 0, 0);
puts(p->str.s);
}
for (j = 0; j < s->n_reg[i]; ++j) free(s->reg[i][j].p);

View File

@ -40,8 +40,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end);
void radix_sort_64(uint64_t *beg, uint64_t *end);
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag);
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs);
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, void *km, int opt_flag, int intron_thres);
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r, int n_regs, const mm_reg1_t *regs, int intron_thres);
int mm_chain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int min_cnt, int min_sc, int is_cdna, int64_t n, mm128_t *a, uint64_t **_u, void *km);
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);