sam output

This commit is contained in:
Heng Li 2017-06-25 22:05:20 -04:00
parent 39083be9ab
commit b1077ff14c
5 changed files with 71 additions and 8 deletions

View File

@ -58,9 +58,9 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_re
{
s->l = 0;
mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev], mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re);
if (r->p) mm_sprintf_lite(s, "\t%d\t%d\t255", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
else mm_sprintf_lite(s, "\t%d\t%d\t255", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs);
mm_sprintf_lite(s, "\tcm:i:%d", r->cnt);
if (r->p) mm_sprintf_lite(s, "\t%d\t%d", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
else mm_sprintf_lite(s, "\t%d\t%d", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs);
mm_sprintf_lite(s, "\t%d\tcm:i:%d", r->mapq, r->cnt);
if (r->p) mm_sprintf_lite(s, "\ts1:i:%d", r->score);
if (r->parent == which) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
if (r->p) {
@ -70,3 +70,50 @@ void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_re
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
}
}
static char comp_tab[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47,
48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
64, 'T', 'V', 'G', 'H', 'E', 'F', 'C', 'D', 'I', 'J', 'M', 'L', 'K', 'N', 'O',
'P', 'Q', 'Y', 'S', 'A', 'A', 'B', 'W', 'X', 'R', 'Z', 91, 92, 93, 94, 95,
64, 't', 'v', 'g', 'h', 'e', 'f', 'c', 'd', 'i', 'j', 'm', 'l', 'k', 'n', 'o',
'p', 'q', 'y', 's', 'a', 'a', 'b', 'w', 'x', 'r', 'z', 123, 124, 125, 126, 127
};
static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp)
{
if (rev) {
int i;
str_enlarge(s, l);
for (i = 0; i < l; ++i) {
int c = seq[l - 1 - i];
s->s[s->l + i] = c < 128 && comp? comp_tab[c] : c;
}
s->l += l;
} else str_copy(s, seq, seq + l);
}
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r)
{
int flag = 0;
s->l = 0;
if (r->rev) flag |= 0x10;
if (r->parent != which) flag |= 0x80;
mm_sprintf_lite(s, "%s\t%d\t%s\t%d\t%d\t", t->name, flag, mi->seq[r->rid].name, r->rs+1, r->mapq);
if (r->p) { // TODO: using hard clippings
uint32_t k, clip_len = r->rev? t->l_seq - r->qe : r->qs;
if (clip_len) mm_sprintf_lite(s, "%dS", clip_len);
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
clip_len = r->rev? r->qs : t->l_seq - r->qe;
if (clip_len) mm_sprintf_lite(s, "%dS", clip_len);
} else mm_sprintf_lite(s, "*");
mm_sprintf_lite(s, "\t*\t0\t0\t");
sam_write_sq(s, t->seq, t->l_seq, r->rev, r->rev);
mm_sprintf_lite(s, "\t*\tcm:i:%d", r->cnt);
if (r->p) mm_sprintf_lite(s, "\ts1:i:%d", r->score);
if (r->parent == which) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->score, r->p->n_ambi);
}

9
main.c
View File

@ -54,7 +54,7 @@ int main(int argc, char *argv[])
mm_realtime0 = realtime();
mm_mapopt_init(&opt);
while ((c = getopt(argc, argv, "w:k:B:b:t:r:f:Vv:Ng:I:d:ST:s:Dx:Hp:m:z:")) >= 0) {
while ((c = getopt(argc, argv, "w:k:B:b:t:r:f:Vv:Ng:I:d:ST:s:Dx:Hp:m:z:F:")) >= 0) {
if (c == 'w') w = atoi(optarg);
else if (c == 'k') k = atoi(optarg);
else if (c == 'b') b = atoi(optarg);
@ -85,6 +85,13 @@ int main(int argc, char *argv[])
else if (*p == 'K' || *p == 'k') x *= 1e3;
if (c == 'B') mini_batch_size = (uint64_t)(x + .499);
else batch_size = (uint64_t)(x + .499);
} else if (c == 'F') {
if (strcmp(optarg, "sam") == 0) opt.flag |= MM_F_OUT_SAM;
else if (strcmp(optarg, "paf") == 0) opt.flag &= ~MM_F_OUT_SAM;
else {
fprintf(stderr, "[E::%s] unknown output format '%s'\n", __func__, optarg);
return 0;
}
} else if (c == 'x') {
if (strcmp(optarg, "ava10k") == 0) {
opt.flag |= MM_F_AVA | MM_F_NO_SELF;

10
map.c
View File

@ -306,7 +306,6 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, m
if (opt->sdust_thres > 0)
mm_dust_minier(&b->mini, l_seq, seq, opt->sdust_thres, b->sdb);
regs = mm_map_frag(opt, mi, b, 0, b->mini.n, qname, l_seq, seq, n_regs);
// mm_select_sub(opt->mask_level, opt->pri_ratio, n_regs, regs, b->km);
return regs;
}
@ -368,7 +367,8 @@ static void *worker_pipeline(void *shared, int step, void *in)
bseq1_t *t = &s->seq[i];
for (j = 0; j < s->n_reg[i]; ++j) {
mm_reg1_t *r = &s->reg[i][j];
mm_write_paf(&p->str, mi, t, j, r);
if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, j, r);
else mm_write_paf(&p->str, mi, t, j, r);
puts(p->str.s);
free(r->p);
}
@ -389,7 +389,13 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int
if (pl.fp == 0) return -1;
pl.opt = opt, pl.mi = idx;
pl.n_threads = n_threads, pl.mini_batch_size = mini_batch_size;
if (opt->flag & MM_F_OUT_SAM) {
uint32_t i;
for (i = 0; i < idx->n_seq; ++i)
printf("@SQ\tID:%s\tLN:%d\n", idx->seq[i].name, idx->seq[i].len);
}
kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3);
free(pl.str.s);
bseq_close(pl.fp);
return 0;
}

View File

@ -7,8 +7,9 @@
#define MM_IDX_DEF_B 14
#define MM_F_NO_SELF 0x2
#define MM_F_AVA 0x8
#define MM_F_NO_SELF 0x01
#define MM_F_AVA 0x02
#define MM_F_OUT_SAM 0x04
#define MM_IDX_MAGIC "MMI\2"
@ -57,6 +58,7 @@ typedef struct {
int32_t qs, qe, rs, re;
int32_t parent, subsc;
int32_t as;
int32_t mapq, n_sub; // TODO: n_sub is not used for now
mm_extra_t *p;
} mm_reg1_t;

View File

@ -28,6 +28,7 @@ void radix_sort_64(uint64_t *beg, uint64_t *end);
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r);
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r);
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_sc, int n, mm128_t *a, uint64_t **_u, void *km);
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);