r188: renamed bseq* to mm_bseq*
to avoid naming collisions between minimap2 and bwa/fermi-lite/etc
This commit is contained in:
parent
b9b0b6f49c
commit
71c988f6ab
20
bseq.c
20
bseq.c
|
|
@ -7,43 +7,43 @@
|
|||
#include "kseq.h"
|
||||
KSEQ_INIT(gzFile, gzread)
|
||||
|
||||
struct bseq_file_s {
|
||||
struct mm_bseq_file_s {
|
||||
int is_eof;
|
||||
gzFile fp;
|
||||
kseq_t *ks;
|
||||
};
|
||||
|
||||
bseq_file_t *bseq_open(const char *fn)
|
||||
mm_bseq_file_t *mm_bseq_open(const char *fn)
|
||||
{
|
||||
bseq_file_t *fp;
|
||||
mm_bseq_file_t *fp;
|
||||
gzFile f;
|
||||
f = fn && strcmp(fn, "-")? gzopen(fn, "r") : gzdopen(fileno(stdin), "r");
|
||||
if (f == 0) return 0;
|
||||
fp = (bseq_file_t*)calloc(1, sizeof(bseq_file_t));
|
||||
fp = (mm_bseq_file_t*)calloc(1, sizeof(mm_bseq_file_t));
|
||||
fp->fp = f;
|
||||
fp->ks = kseq_init(fp->fp);
|
||||
return fp;
|
||||
}
|
||||
|
||||
void bseq_close(bseq_file_t *fp)
|
||||
void mm_bseq_close(mm_bseq_file_t *fp)
|
||||
{
|
||||
kseq_destroy(fp->ks);
|
||||
gzclose(fp->fp);
|
||||
free(fp);
|
||||
}
|
||||
|
||||
bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_)
|
||||
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_)
|
||||
{
|
||||
int size = 0, m, n;
|
||||
bseq1_t *seqs;
|
||||
mm_bseq1_t *seqs;
|
||||
kseq_t *ks = fp->ks;
|
||||
m = n = 0; seqs = 0;
|
||||
while (kseq_read(ks) >= 0) {
|
||||
bseq1_t *s;
|
||||
mm_bseq1_t *s;
|
||||
assert(ks->seq.l <= INT32_MAX);
|
||||
if (n >= m) {
|
||||
m = m? m<<1 : 256;
|
||||
seqs = (bseq1_t*)realloc(seqs, m * sizeof(bseq1_t));
|
||||
seqs = (mm_bseq1_t*)realloc(seqs, m * sizeof(mm_bseq1_t));
|
||||
}
|
||||
s = &seqs[n];
|
||||
s->name = strdup(ks->name.s);
|
||||
|
|
@ -58,7 +58,7 @@ bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_)
|
|||
return seqs;
|
||||
}
|
||||
|
||||
int bseq_eof(bseq_file_t *fp)
|
||||
int mm_bseq_eof(mm_bseq_file_t *fp)
|
||||
{
|
||||
return fp->is_eof;
|
||||
}
|
||||
|
|
|
|||
22
bseq.h
22
bseq.h
|
|
@ -3,19 +3,27 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
struct bseq_file_s;
|
||||
typedef struct bseq_file_s bseq_file_t;
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct mm_bseq_file_s;
|
||||
typedef struct mm_bseq_file_s mm_bseq_file_t;
|
||||
|
||||
typedef struct {
|
||||
int l_seq, rid;
|
||||
char *name, *seq, *qual;
|
||||
} bseq1_t;
|
||||
} mm_bseq1_t;
|
||||
|
||||
bseq_file_t *bseq_open(const char *fn);
|
||||
void bseq_close(bseq_file_t *fp);
|
||||
bseq1_t *bseq_read(bseq_file_t *fp, int chunk_size, int with_qual, int *n_);
|
||||
int bseq_eof(bseq_file_t *fp);
|
||||
mm_bseq_file_t *mm_bseq_open(const char *fn);
|
||||
void mm_bseq_close(mm_bseq_file_t *fp);
|
||||
mm_bseq1_t *mm_bseq_read(mm_bseq_file_t *fp, int chunk_size, int with_qual, int *n_);
|
||||
int mm_bseq_eof(mm_bseq_file_t *fp);
|
||||
|
||||
extern unsigned char seq_nt4_table[256];
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
4
format.c
4
format.c
|
|
@ -62,7 +62,7 @@ static inline void write_tags(kstring_t *s, const mm_reg1_t *r)
|
|||
if (r->p) mm_sprintf_lite(s, "\tNM:i:%d\tms:i:%d\tAS:i:%d\tnn:i:%d", r->p->n_diff, r->p->dp_max, r->p->dp_score, r->p->n_ambi);
|
||||
}
|
||||
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r)
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r)
|
||||
{
|
||||
s->l = 0;
|
||||
mm_sprintf_lite(s, "%s\t%d\t%d\t%d\t%c\t", t->name, t->l_seq, r->qs, r->qe, "+-"[r->rev]);
|
||||
|
|
@ -105,7 +105,7 @@ static void sam_write_sq(kstring_t *s, char *seq, int l, int rev, int comp)
|
|||
} else str_copy(s, seq, seq + l);
|
||||
}
|
||||
|
||||
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r)
|
||||
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r)
|
||||
{
|
||||
int flag = 0;
|
||||
s->l = 0;
|
||||
|
|
|
|||
16
index.c
16
index.c
|
|
@ -190,13 +190,13 @@ static void mm_idx_post(mm_idx_t *mi, int n_threads)
|
|||
typedef struct {
|
||||
int mini_batch_size, keep_name;
|
||||
uint64_t batch_size, sum_len;
|
||||
bseq_file_t *fp;
|
||||
mm_bseq_file_t *fp;
|
||||
mm_idx_t *mi;
|
||||
} pipeline_t;
|
||||
|
||||
typedef struct {
|
||||
int n_seq;
|
||||
bseq1_t *seq;
|
||||
mm_bseq1_t *seq;
|
||||
mm128_v a;
|
||||
} step_t;
|
||||
|
||||
|
|
@ -217,7 +217,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
step_t *s;
|
||||
if (p->sum_len > p->batch_size) return 0;
|
||||
s = (step_t*)calloc(1, sizeof(step_t));
|
||||
s->seq = bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch
|
||||
s->seq = mm_bseq_read(p->fp, p->mini_batch_size, 0, &s->n_seq); // read a mini-batch
|
||||
if (s->seq) {
|
||||
uint32_t old_m, m;
|
||||
uint64_t sum_len, old_max_len, max_len;
|
||||
|
|
@ -261,7 +261,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
} else if (step == 1) { // step 1: compute sketch
|
||||
step_t *s = (step_t*)in;
|
||||
for (i = 0; i < s->n_seq; ++i) {
|
||||
bseq1_t *t = &s->seq[i];
|
||||
mm_bseq1_t *t = &s->seq[i];
|
||||
mm_sketch(0, t->seq, t->l_seq, p->mi->w, p->mi->k, t->rid, p->mi->is_hpc, &s->a);
|
||||
free(t->seq); free(t->name);
|
||||
}
|
||||
|
|
@ -275,7 +275,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
return 0;
|
||||
}
|
||||
|
||||
mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name)
|
||||
mm_idx_t *mm_idx_gen(mm_bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name)
|
||||
{
|
||||
pipeline_t pl;
|
||||
memset(&pl, 0, sizeof(pipeline_t));
|
||||
|
|
@ -299,12 +299,12 @@ mm_idx_t *mm_idx_gen(bseq_file_t *fp, int w, int k, int b, int is_hpc, int mini_
|
|||
|
||||
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads) // a simpler interface
|
||||
{
|
||||
bseq_file_t *fp;
|
||||
mm_bseq_file_t *fp;
|
||||
mm_idx_t *mi;
|
||||
fp = bseq_open(fn);
|
||||
fp = mm_bseq_open(fn);
|
||||
if (fp == 0) return 0;
|
||||
mi = mm_idx_gen(fp, w, k, MM_IDX_DEF_B, is_hpc, 1<<18, n_threads, UINT64_MAX, 1);
|
||||
bseq_close(fp);
|
||||
mm_bseq_close(fp);
|
||||
return mi;
|
||||
}
|
||||
|
||||
|
|
|
|||
8
ksw2.h
8
ksw2.h
|
|
@ -13,6 +13,10 @@
|
|||
#define KSW_EZ_EXTZ_ONLY 0x40 // only perform extension
|
||||
#define KSW_EZ_REV_CIGAR 0x80 // reverse CIGAR in the output
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint32_t max:31, zdropped:1;
|
||||
int max_q, max_t; // max extension coordinate
|
||||
|
|
@ -23,10 +27,6 @@ typedef struct {
|
|||
uint32_t *cigar;
|
||||
} ksw_extz_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* NW-like extension
|
||||
*
|
||||
|
|
|
|||
10
main.c
10
main.c
|
|
@ -10,7 +10,7 @@
|
|||
#include "minimap.h"
|
||||
#include "mmpriv.h"
|
||||
|
||||
#define MM_VERSION "2.0-r187-dirty"
|
||||
#define MM_VERSION "2.0-r188-dirty"
|
||||
|
||||
void liftrlimit()
|
||||
{
|
||||
|
|
@ -66,7 +66,7 @@ int main(int argc, char *argv[])
|
|||
int i, c, k = 17, w = -1, bucket_bits = MM_IDX_DEF_B, n_threads = 3, keep_name = 1, is_idx, is_hpc = 0, long_idx;
|
||||
int minibatch_size = 200000000;
|
||||
uint64_t batch_size = 4000000000ULL;
|
||||
bseq_file_t *fp = 0;
|
||||
mm_bseq_file_t *fp = 0;
|
||||
char *fnw = 0, *s;
|
||||
FILE *fpr = 0, *fpw = 0;
|
||||
|
||||
|
|
@ -203,12 +203,12 @@ int main(int argc, char *argv[])
|
|||
return 1;
|
||||
}
|
||||
if (is_idx) fpr = fopen(argv[optind], "rb");
|
||||
else fp = bseq_open(argv[optind]);
|
||||
else fp = mm_bseq_open(argv[optind]);
|
||||
if (fnw) fpw = fopen(fnw, "wb");
|
||||
for (;;) {
|
||||
mm_idx_t *mi = 0;
|
||||
if (fpr) mi = mm_idx_load(fpr);
|
||||
else if (!bseq_eof(fp))
|
||||
else if (!mm_bseq_eof(fp))
|
||||
mi = mm_idx_gen(fp, w, k, bucket_bits, is_hpc, minibatch_size, n_threads, batch_size, keep_name);
|
||||
if (mi == 0) break;
|
||||
if (mm_verbose >= 3)
|
||||
|
|
@ -227,7 +227,7 @@ int main(int argc, char *argv[])
|
|||
}
|
||||
if (fpw) fclose(fpw);
|
||||
if (fpr) fclose(fpr);
|
||||
if (fp) bseq_close(fp);
|
||||
if (fp) mm_bseq_close(fp);
|
||||
|
||||
fprintf(stderr, "[M::%s] Version: %s\n", __func__, MM_VERSION);
|
||||
fprintf(stderr, "[M::%s] CMD:", __func__);
|
||||
|
|
|
|||
12
map.c
12
map.c
|
|
@ -281,7 +281,7 @@ mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, m
|
|||
typedef struct {
|
||||
int mini_batch_size, n_processed, n_threads;
|
||||
const mm_mapopt_t *opt;
|
||||
bseq_file_t *fp;
|
||||
mm_bseq_file_t *fp;
|
||||
const mm_idx_t *mi;
|
||||
kstring_t str;
|
||||
} pipeline_t;
|
||||
|
|
@ -289,7 +289,7 @@ typedef struct {
|
|||
typedef struct {
|
||||
const pipeline_t *p;
|
||||
int n_seq;
|
||||
bseq1_t *seq;
|
||||
mm_bseq1_t *seq;
|
||||
int *n_reg;
|
||||
mm_reg1_t **reg;
|
||||
mm_tbuf_t **buf;
|
||||
|
|
@ -311,7 +311,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
int with_qual = (!!(p->opt->flag & MM_F_OUT_SAM) && !(p->opt->flag & MM_F_NO_QUAL));
|
||||
step_t *s;
|
||||
s = (step_t*)calloc(1, sizeof(step_t));
|
||||
s->seq = bseq_read(p->fp, p->mini_batch_size, with_qual, &s->n_seq);
|
||||
s->seq = mm_bseq_read(p->fp, p->mini_batch_size, with_qual, &s->n_seq);
|
||||
if (s->seq) {
|
||||
s->p = p;
|
||||
for (i = 0; i < s->n_seq; ++i)
|
||||
|
|
@ -332,7 +332,7 @@ static void *worker_pipeline(void *shared, int step, void *in)
|
|||
for (i = 0; i < p->n_threads; ++i) mm_tbuf_destroy(s->buf[i]);
|
||||
free(s->buf);
|
||||
for (i = 0; i < s->n_seq; ++i) {
|
||||
bseq1_t *t = &s->seq[i];
|
||||
mm_bseq1_t *t = &s->seq[i];
|
||||
for (j = 0; j < s->n_reg[i]; ++j) {
|
||||
mm_reg1_t *r = &s->reg[i][j];
|
||||
if (p->opt->flag & MM_F_OUT_SAM) mm_write_sam(&p->str, mi, t, r);
|
||||
|
|
@ -360,7 +360,7 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int
|
|||
{
|
||||
pipeline_t pl;
|
||||
memset(&pl, 0, sizeof(pipeline_t));
|
||||
pl.fp = bseq_open(fn);
|
||||
pl.fp = mm_bseq_open(fn);
|
||||
if (pl.fp == 0) return -1;
|
||||
pl.opt = opt, pl.mi = idx;
|
||||
pl.n_threads = n_threads, pl.mini_batch_size = mini_batch_size;
|
||||
|
|
@ -371,6 +371,6 @@ int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int
|
|||
}
|
||||
kt_pipeline(n_threads == 1? 1 : 2, worker_pipeline, &pl, 3);
|
||||
free(pl.str.s);
|
||||
bseq_close(pl.fp);
|
||||
mm_bseq_close(pl.fp);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
12
minimap.h
12
minimap.h
|
|
@ -15,6 +15,10 @@
|
|||
|
||||
#define MM_IDX_MAGIC "MMI\2"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
uint64_t x, y;
|
||||
} mm128_t;
|
||||
|
|
@ -100,22 +104,18 @@ extern double mm_realtime0;
|
|||
struct mm_tbuf_s;
|
||||
typedef struct mm_tbuf_s mm_tbuf_t;
|
||||
|
||||
struct bseq_file_s;
|
||||
struct mm_bseq_file_s;
|
||||
|
||||
#define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2))
|
||||
#define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf)
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// compute minimizers
|
||||
void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
|
||||
|
||||
// minimizer indexing
|
||||
mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc);
|
||||
void mm_idx_destroy(mm_idx_t *mi);
|
||||
mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name);
|
||||
mm_idx_t *mm_idx_gen(struct mm_bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name);
|
||||
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
||||
void mm_idx_stat(const mm_idx_t *idx);
|
||||
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
||||
|
|
|
|||
12
mmpriv.h
12
mmpriv.h
|
|
@ -16,6 +16,10 @@
|
|||
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef KSTRING_T
|
||||
#define KSTRING_T kstring_t
|
||||
typedef struct __kstring_t {
|
||||
|
|
@ -24,10 +28,6 @@ typedef struct __kstring_t {
|
|||
} kstring_t;
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
double cputime(void);
|
||||
double realtime(void);
|
||||
|
||||
|
|
@ -35,8 +35,8 @@ void radix_sort_128x(mm128_t *beg, mm128_t *end);
|
|||
void radix_sort_64(uint64_t *beg, uint64_t *end);
|
||||
uint32_t ks_ksmall_uint32_t(size_t n, uint32_t arr[], size_t kk);
|
||||
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r);
|
||||
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const bseq1_t *t, const mm_reg1_t *r);
|
||||
void mm_write_paf(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r);
|
||||
void mm_write_sam(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, const mm_reg1_t *r);
|
||||
int mm_chain_dp(int max_dist, int bw, int max_skip, int min_cnt, int min_sc, int64_t n, mm128_t *a, uint64_t **_u, void *km);
|
||||
mm_reg1_t *mm_align_skeleton(void *km, const mm_mapopt_t *opt, const mm_idx_t *mi, int qlen, const char *qstr, int *n_regs_, mm_reg1_t *regs, mm128_t *a);
|
||||
|
||||
|
|
|
|||
6
sdust.h
6
sdust.h
|
|
@ -3,13 +3,13 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
struct sdust_buf_s;
|
||||
typedef struct sdust_buf_s sdust_buf_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
struct sdust_buf_s;
|
||||
typedef struct sdust_buf_s sdust_buf_t;
|
||||
|
||||
// the simple interface
|
||||
uint64_t *sdust(void *km, const uint8_t *seq, int l_seq, int T, int W, int *n);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue