2017-06-06 22:16:33 +08:00
|
|
|
#ifndef MINIMAP2_H
|
|
|
|
|
#define MINIMAP2_H
|
2017-04-07 03:37:34 +08:00
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
|
|
|
|
|
#define MM_IDX_DEF_B 14
|
|
|
|
|
|
2017-06-26 10:05:20 +08:00
|
|
|
#define MM_F_NO_SELF 0x01
|
|
|
|
|
#define MM_F_AVA 0x02
|
2017-06-26 23:41:09 +08:00
|
|
|
#define MM_F_CIGAR 0x04
|
|
|
|
|
#define MM_F_OUT_SAM 0x08
|
2017-04-07 03:37:34 +08:00
|
|
|
|
2017-06-07 02:19:50 +08:00
|
|
|
#define MM_IDX_MAGIC "MMI\2"
|
|
|
|
|
|
2017-04-07 03:37:34 +08:00
|
|
|
typedef struct {
|
|
|
|
|
uint64_t x, y;
|
|
|
|
|
} mm128_t;
|
|
|
|
|
|
|
|
|
|
typedef struct { size_t n, m; mm128_t *a; } mm128_v;
|
|
|
|
|
typedef struct { size_t n, m; uint64_t *a; } uint64_v;
|
|
|
|
|
typedef struct { size_t n, m; uint32_t *a; } uint32_v;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
mm128_v a; // (minimizer, position) array
|
|
|
|
|
int32_t n; // size of the _p_ array
|
|
|
|
|
uint64_t *p; // position array for minimizers appearing >1 times
|
|
|
|
|
void *h; // hash table indexing _p_ and minimizers appearing once
|
|
|
|
|
} mm_idx_bucket_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2017-04-08 03:30:30 +08:00
|
|
|
char *name; // name of the db sequence
|
2017-06-24 01:44:45 +08:00
|
|
|
uint64_t offset; // offset in mm_idx_t::S
|
2017-04-08 03:30:30 +08:00
|
|
|
uint32_t len; // length
|
|
|
|
|
} mm_idx_seq_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2017-04-08 03:42:33 +08:00
|
|
|
int32_t b, w, k, is_hpc;
|
2017-04-08 03:30:30 +08:00
|
|
|
uint32_t n_seq; // number of reference sequences
|
|
|
|
|
mm_idx_seq_t *seq; // sequence name, length and offset
|
|
|
|
|
uint32_t *S; // 4-bit packed sequence
|
|
|
|
|
mm_idx_bucket_t *B; // index
|
2017-04-07 03:37:34 +08:00
|
|
|
} mm_idx_t;
|
|
|
|
|
|
2017-06-24 06:25:47 +08:00
|
|
|
typedef struct {
|
2017-06-24 10:42:15 +08:00
|
|
|
uint32_t capacity;
|
2017-07-01 02:21:44 +08:00
|
|
|
int32_t dp_score, dp_max;
|
2017-06-24 10:42:15 +08:00
|
|
|
uint32_t blen;
|
|
|
|
|
uint32_t n_diff, n_ambi;
|
|
|
|
|
uint32_t n_cigar;
|
2017-06-24 06:25:47 +08:00
|
|
|
uint32_t cigar[];
|
2017-06-24 10:42:15 +08:00
|
|
|
} mm_extra_t;
|
2017-06-24 06:25:47 +08:00
|
|
|
|
2017-04-07 03:37:34 +08:00
|
|
|
typedef struct {
|
2017-06-28 22:35:21 +08:00
|
|
|
int32_t id;
|
2017-06-30 10:08:46 +08:00
|
|
|
uint32_t cnt:31, rev:1;
|
2017-04-07 03:37:34 +08:00
|
|
|
uint32_t rid:31, rep:1;
|
2017-06-09 03:28:19 +08:00
|
|
|
int32_t score;
|
2017-04-07 03:37:34 +08:00
|
|
|
int32_t qs, qe, rs, re;
|
2017-06-09 03:28:19 +08:00
|
|
|
int32_t parent, subsc;
|
2017-06-24 01:44:45 +08:00
|
|
|
int32_t as;
|
2017-06-30 11:48:35 +08:00
|
|
|
uint32_t mapq:8, split:2, sam_pri:1, n_sub:21; // TODO: n_sub is not used for now
|
2017-06-24 10:42:15 +08:00
|
|
|
mm_extra_t *p;
|
2017-04-07 03:37:34 +08:00
|
|
|
} mm_reg1_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2017-04-26 19:36:46 +08:00
|
|
|
float max_occ_frac;
|
|
|
|
|
float mid_occ_frac;
|
|
|
|
|
int sdust_thres; // score threshold for SDUST; 0 to disable
|
|
|
|
|
int flag; // see MM_F_* macros
|
2017-06-29 23:11:15 +08:00
|
|
|
|
2017-06-05 04:09:45 +08:00
|
|
|
int bw; // bandwidth
|
2017-04-07 03:37:34 +08:00
|
|
|
int max_gap; // break a chain if there are no minimizers in a max_gap window
|
2017-06-28 22:35:21 +08:00
|
|
|
int max_chain_skip;
|
|
|
|
|
int min_cnt;
|
|
|
|
|
int min_chain_score;
|
2017-07-01 10:15:45 +08:00
|
|
|
float min_seedcov_ratio;
|
2017-06-29 23:11:15 +08:00
|
|
|
|
2017-06-09 03:28:19 +08:00
|
|
|
float pri_ratio;
|
|
|
|
|
float mask_level;
|
2017-06-29 23:11:15 +08:00
|
|
|
|
|
|
|
|
int max_join_long, max_join_short;
|
|
|
|
|
int min_join_flank_sc;
|
|
|
|
|
|
2017-06-24 02:38:28 +08:00
|
|
|
int a, b, q, e; // matching score, mismatch, gap-open and gap-ext penalties
|
2017-06-24 03:13:53 +08:00
|
|
|
int zdrop;
|
2017-07-01 02:21:44 +08:00
|
|
|
int min_dp_max;
|
2017-06-29 23:11:15 +08:00
|
|
|
int min_ksw_len;
|
2017-04-26 19:36:46 +08:00
|
|
|
|
|
|
|
|
int max_occ;
|
|
|
|
|
int mid_occ;
|
2017-04-07 03:37:34 +08:00
|
|
|
} mm_mapopt_t;
|
|
|
|
|
|
|
|
|
|
extern int mm_verbose;
|
|
|
|
|
extern double mm_realtime0;
|
|
|
|
|
|
|
|
|
|
struct mm_tbuf_s;
|
|
|
|
|
typedef struct mm_tbuf_s mm_tbuf_t;
|
|
|
|
|
|
2017-04-08 03:30:30 +08:00
|
|
|
struct bseq_file_s;
|
|
|
|
|
|
2017-06-24 02:11:56 +08:00
|
|
|
#define mm_seq4_set(s, i, c) ((s)[(i)>>3] |= (uint32_t)(c) << (((i)&7)<<2))
|
|
|
|
|
#define mm_seq4_get(s, i) ((s)[(i)>>3] >> (((i)&7)<<2) & 0xf)
|
2017-04-08 03:30:30 +08:00
|
|
|
|
2017-04-07 03:37:34 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// compute minimizers
|
|
|
|
|
void mm_sketch(void *km, const char *str, int len, int w, int k, uint32_t rid, int is_hpc, mm128_v *p);
|
|
|
|
|
|
|
|
|
|
// minimizer indexing
|
2017-04-08 03:42:33 +08:00
|
|
|
mm_idx_t *mm_idx_init(int w, int k, int b, int is_hpc);
|
2017-04-07 03:37:34 +08:00
|
|
|
void mm_idx_destroy(mm_idx_t *mi);
|
2017-04-08 03:30:30 +08:00
|
|
|
mm_idx_t *mm_idx_gen(struct bseq_file_s *fp, int w, int k, int b, int is_hpc, int mini_batch_size, int n_threads, uint64_t batch_size, int keep_name);
|
2017-04-26 19:36:46 +08:00
|
|
|
uint32_t mm_idx_cal_max_occ(const mm_idx_t *mi, float f);
|
2017-04-26 22:52:28 +08:00
|
|
|
void mm_idx_stat(const mm_idx_t *idx);
|
2017-04-07 03:37:34 +08:00
|
|
|
const uint64_t *mm_idx_get(const mm_idx_t *mi, uint64_t minier, int *n);
|
2017-06-24 01:44:45 +08:00
|
|
|
int mm_idx_getseq(const mm_idx_t *mi, uint32_t rid, uint32_t st, uint32_t en, uint8_t *seq);
|
2017-04-07 03:37:34 +08:00
|
|
|
|
2017-04-08 03:30:30 +08:00
|
|
|
mm_idx_t *mm_idx_build(const char *fn, int w, int k, int is_hpc, int n_threads);
|
2017-04-07 03:37:34 +08:00
|
|
|
|
|
|
|
|
// minimizer index I/O
|
|
|
|
|
void mm_idx_dump(FILE *fp, const mm_idx_t *mi);
|
|
|
|
|
mm_idx_t *mm_idx_load(FILE *fp);
|
|
|
|
|
|
|
|
|
|
// mapping
|
|
|
|
|
void mm_mapopt_init(mm_mapopt_t *opt);
|
2017-04-26 19:36:46 +08:00
|
|
|
void mm_mapopt_update(mm_mapopt_t *opt, const mm_idx_t *mi);
|
2017-04-07 03:37:34 +08:00
|
|
|
mm_tbuf_t *mm_tbuf_init(void);
|
|
|
|
|
void mm_tbuf_destroy(mm_tbuf_t *b);
|
2017-06-08 02:18:32 +08:00
|
|
|
mm_reg1_t *mm_map(const mm_idx_t *mi, int l_seq, const char *seq, int *n_regs, mm_tbuf_t *b, const mm_mapopt_t *opt, const char *name);
|
2017-04-07 03:37:34 +08:00
|
|
|
|
|
|
|
|
int mm_map_file(const mm_idx_t *idx, const char *fn, const mm_mapopt_t *opt, int n_threads, int tbatch_size);
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2017-06-06 22:16:33 +08:00
|
|
|
#endif // MINIMAP2_H
|