separated formating printing

for SAM in future; and for performance
This commit is contained in:
Heng Li 2017-06-25 16:13:54 -04:00
parent 7780ab792a
commit 39083be9ab
5 changed files with 94 additions and 26 deletions

View File

@ -3,7 +3,7 @@ CFLAGS= -g -Wall -O2 -Wc++-compat -Wno-unused-function
CPPFLAGS=
INCLUDES= -I.
OBJS= kalloc.o kthread.o misc.o bseq.o sketch.o chain.o align.o sdust.o \
index.o map.o ksw2_extz2_sse.o
index.o format.o map.o ksw2_extz2_sse.o
PROG= minimap2
PROG_EXTRA= sdust
LIBS= -lm -lz -lpthread
@ -34,14 +34,15 @@ depend:
# DO NOT DELETE
align.o: minimap.h ksw2.h
align.o: minimap.h mmpriv.h bseq.h ksw2.h
bseq.o: bseq.h kseq.h
chain.o: minimap.h mmpriv.h kalloc.h
chain.o: minimap.h mmpriv.h bseq.h kalloc.h
format.o: mmpriv.h minimap.h bseq.h
index.o: kthread.h bseq.h minimap.h mmpriv.h kvec.h kalloc.h khash.h
kalloc.o: kalloc.h
ksw2_extz2_sse.o: ksw2.h
main.o: bseq.h minimap.h mmpriv.h
map.o: kthread.h kvec.h kalloc.h sdust.h minimap.h mmpriv.h bseq.h
map.o: kthread.h kvec.h kalloc.h sdust.h mmpriv.h minimap.h bseq.h
misc.o: minimap.h ksort.h
sdust.o: kalloc.h kdq.h kvec.h sdust.h
sketch.o: kvec.h kalloc.h minimap.h

View File

@ -4,10 +4,6 @@
#include "mmpriv.h"
#include "ksw2.h"
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
static void ksw_gen_simple_mat(int m, int8_t *mat, int8_t a, int8_t b)
{
int i, j;

72
format.c 100644
View File

@ -0,0 +1,72 @@
#include <stdarg.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "mmpriv.h"
static inline void str_enlarge(kstring_t *s, int l)
{
if (s->l + l + 1 > s->m) {
s->m = s->l + l + 1;
kroundup32(s->m);
s->s = (char*)realloc(s->s, s->m);
}
}
static inline void str_copy(kstring_t *s, const char *st, const char *en)
{
str_enlarge(s, en - st);
memcpy(&s->s[s->l], st, en - st);
s->l += en - st;
}
static void mm_sprintf_lite(kstring_t *s, const char *fmt, ...)
{
char buf[16]; // for integer to string conversion
const char *p, *q;
va_list ap;
va_start(ap, fmt);
for (q = p = fmt; *p; ++p) {
if (*p == '%') {
if (p > q) str_copy(s, q, p);
++p;
if (*p == 'd') {
int c, i, l = 0;
unsigned int x;
c = va_arg(ap, int);
x = c >= 0? c : -c;
do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
if (c < 0) buf[l++] = '-';
str_enlarge(s, l);
for (i = l - 1; i >= 0; --i) s->s[s->l++] = buf[i];
} else if (*p == 's') {
char *r = va_arg(ap, char*);
str_copy(s, r, r + strlen(r));
} else if (*p == 'c') {
str_enlarge(s, 1);
s->s[s->l++] = va_arg(ap, int);
} else abort();
q = p + 1;
}
}
if (p > q) str_copy(s, q, p);
va_end(ap);
s->s[s->l] = 0;
}
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r)
{
s->l = 0;
mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t%s\t%d\t%d\t%d", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev], mi->seq[r->rid].name, mi->seq[r->rid].len, r->rs, r->re);
if (r->p) mm_sprintf_lite(s, "\t%d\t%d\t255", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
else mm_sprintf_lite(s, "\t%d\t%d\t255", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs);
mm_sprintf_lite(s, "\tcm:i:%d", r->cnt);
if (r->p) mm_sprintf_lite(s, "\ts1:i:%d", r->score);
if (r->parent == which) mm_sprintf_lite(s, "\ts2:i:%d", r->subsc);
if (r->p) {
uint32_t k;
mm_sprintf_lite(s, "\tNM:i:%d\tAS:i:%d\tnn:i:%d\tcg:Z:", r->p->n_diff, r->p->score, r->p->n_ambi);
for (k = 0; k < r->p->n_cigar; ++k)
mm_sprintf_lite(s, "%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
}
}

20
map.c
View File

@ -4,7 +4,6 @@
#include "kvec.h"
#include "kalloc.h"
#include "sdust.h"
#include "minimap.h"
#include "mmpriv.h"
#include "bseq.h"
@ -320,6 +319,7 @@ typedef struct {
const mm_mapopt_t *opt;
bseq_file_t *fp;
const mm_idx_t *mi;
kstring_t str;
} pipeline_t;
typedef struct {
@ -368,22 +368,8 @@ static void *worker_pipeline(void *shared, int step, void *in)
bseq1_t *t = &s->seq[i];
for (j = 0; j < s->n_reg[i]; ++j) {
mm_reg1_t *r = &s->reg[i][j];
printf("%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
if (mi->seq[r->rid].name) fputs(mi->seq[r->rid].name, stdout);
else printf("%d", r->rid + 1);
printf("\t%d\t%d\t%d", mi->seq[r->rid].len, r->rs, r->re);
if (r->p) printf("\t%d\t%d\t255", r->p->blen - r->p->n_ambi - r->p->n_diff, r->p->blen);
else printf("\t%d\t%d\t255", r->score, r->re - r->rs > r->qe - r->qs? r->re - r->rs : r->qe - r->qs);
printf("\tcm:i:%d", r->cnt);
if (r->p) printf("\ts1:i:%d", r->score);
if (r->parent == j) printf("\ts2:i:%d", r->subsc);
if (r->p) {
uint32_t k;
printf("\tNM:i:%d\tAS:i:%d\tnn:i:%d\tcg:Z:", r->p->n_diff, r->p->score, r->p->n_ambi);
for (k = 0; k < r->p->n_cigar; ++k)
printf("%d%c", r->p->cigar[k]>>4, "MID"[r->p->cigar[k]&0xf]);
}
putchar('\n');
mm_write_paf(&p->str, mi, t, j, r);
puts(p->str.s);
free(r->p);
}
free(s->reg[i]);

View File

@ -2,6 +2,19 @@
#define MMPRIV2_H
#include "minimap.h"
#include "bseq.h"
#ifndef kroundup32
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
#endif
#ifndef KSTRING_T
#define KSTRING_T kstring_t
typedef struct __kstring_t {
unsigned l, m;
char *s;
} kstring_t;
#endif
#ifdef __cplusplus
extern "C" {
@ -14,8 +27,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end);
void radix_sort_64(uint64_t *beg, uint64_t *end);
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, bseq1_t *t, int which, mm_reg1_t *r);
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_sc, int n, mm128_t *a, uint64_t **_u, void *km);
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
#ifdef __cplusplus