2013-02-01 02:59:48 +08:00
|
|
|
#ifndef BWAMEM_H_
|
|
|
|
|
#define BWAMEM_H_
|
|
|
|
|
|
|
|
|
|
#include "bwt.h"
|
2013-02-08 02:13:43 +08:00
|
|
|
#include "bntseq.h"
|
2013-02-12 23:36:15 +08:00
|
|
|
#include "utils.h"
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-13 05:15:26 +08:00
|
|
|
#define MEM_MAPQ_COEF 40.0
|
|
|
|
|
|
2013-02-02 03:38:44 +08:00
|
|
|
struct __smem_i;
|
|
|
|
|
typedef struct __smem_i smem_i;
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-01 04:55:22 +08:00
|
|
|
typedef struct {
|
2013-02-01 05:26:05 +08:00
|
|
|
int64_t rbeg;
|
|
|
|
|
int32_t qbeg, len;
|
2013-02-02 05:39:50 +08:00
|
|
|
} mem_seed_t;
|
2013-02-01 04:55:22 +08:00
|
|
|
|
2013-02-01 02:59:48 +08:00
|
|
|
typedef struct {
|
|
|
|
|
int a, b, q, r, w;
|
2013-02-09 05:56:28 +08:00
|
|
|
int split_width;
|
2013-02-01 04:55:22 +08:00
|
|
|
int min_seed_len, max_occ, max_chain_gap;
|
2013-02-08 02:13:43 +08:00
|
|
|
int n_threads, chunk_size;
|
|
|
|
|
int pe_dir, is_pe;
|
2013-02-12 04:29:03 +08:00
|
|
|
int is_hard; // if to use hard clip
|
2013-02-05 13:17:20 +08:00
|
|
|
float mask_level, chain_drop_ratio;
|
2013-02-11 23:59:38 +08:00
|
|
|
int max_ins; // maximum insert size
|
2013-02-08 02:13:43 +08:00
|
|
|
int8_t mat[25]; // scoring matrix; mat[0] == 0 if unset
|
2013-02-02 05:39:50 +08:00
|
|
|
} mem_opt_t;
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-01 04:55:22 +08:00
|
|
|
typedef struct {
|
|
|
|
|
int n, m;
|
|
|
|
|
int64_t pos;
|
2013-02-02 05:39:50 +08:00
|
|
|
mem_seed_t *seeds;
|
|
|
|
|
} mem_chain_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
2013-02-07 01:25:49 +08:00
|
|
|
int64_t rb, re;
|
2013-02-09 03:46:57 +08:00
|
|
|
int score, qb, qe, seedcov, sub, csub; // sub: suboptimal score; csub: suboptimal inside the chain
|
2013-02-13 04:52:23 +08:00
|
|
|
int sub_n; // approximate number of suboptimal hits
|
|
|
|
|
int secondary; // non-negative if the hit is secondary
|
2013-02-07 02:59:32 +08:00
|
|
|
} mem_alnreg_t;
|
2013-02-01 04:55:22 +08:00
|
|
|
|
2013-02-11 23:59:38 +08:00
|
|
|
typedef struct {
|
|
|
|
|
int low, high, failed;
|
|
|
|
|
double avg, std;
|
|
|
|
|
} mem_pestat_t;
|
|
|
|
|
|
2013-02-12 04:29:03 +08:00
|
|
|
typedef struct {
|
|
|
|
|
int64_t rb, re;
|
|
|
|
|
int qb, qe, flag, qual;
|
|
|
|
|
// optional info
|
|
|
|
|
int score, sub;
|
|
|
|
|
int64_t mb, me; // mb: mate start; -1 if single-end; -2 if mate unmapped
|
|
|
|
|
} bwahit_t;
|
|
|
|
|
|
2013-02-12 23:36:15 +08:00
|
|
|
typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
|
|
|
|
|
typedef struct { size_t n, m; mem_alnreg_t *a; } mem_alnreg_v;
|
2013-02-08 02:13:43 +08:00
|
|
|
|
2013-02-11 23:59:38 +08:00
|
|
|
extern int mem_verbose;
|
|
|
|
|
|
2013-02-01 02:59:48 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
smem_i *smem_itr_init(const bwt_t *bwt);
|
|
|
|
|
void smem_itr_destroy(smem_i *itr);
|
2013-02-02 03:20:38 +08:00
|
|
|
void smem_set_query(smem_i *itr, int len, const uint8_t *query);
|
2013-02-09 05:56:28 +08:00
|
|
|
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width);
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-02 05:39:50 +08:00
|
|
|
mem_opt_t *mem_opt_init(void);
|
2013-02-05 01:37:38 +08:00
|
|
|
void mem_fill_scmat(int a, int b, int8_t mat[25]);
|
2013-02-01 02:59:48 +08:00
|
|
|
|
2013-02-08 02:13:43 +08:00
|
|
|
mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq);
|
|
|
|
|
int mem_chain_flt(const mem_opt_t *opt, int n_chn, mem_chain_t *chains);
|
|
|
|
|
void mem_chain2aln(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_t *a);
|
2013-02-07 02:59:32 +08:00
|
|
|
uint32_t *mem_gen_cigar(const mem_opt_t *opt, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
|
2013-02-01 05:26:05 +08:00
|
|
|
|
2013-02-08 02:13:43 +08:00
|
|
|
int mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int n, bseq1_t *seqs);
|
|
|
|
|
|
2013-02-11 23:59:38 +08:00
|
|
|
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]);
|
|
|
|
|
|
2013-02-01 02:59:48 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2013-02-15 01:59:32 +08:00
|
|
|
static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t *dist)
|
|
|
|
|
{
|
|
|
|
|
int64_t p2;
|
|
|
|
|
int r1 = (b1 >= l_pac), r2 = (b2 >= l_pac);
|
|
|
|
|
p2 = r1 == r2? b2 : (l_pac<<1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand
|
|
|
|
|
*dist = p2 > b1? p2 - b1 : b1 - p2;
|
|
|
|
|
return (r1 == r2? 0 : 1) ^ (p2 > b1? 0 : 3);
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-01 02:59:48 +08:00
|
|
|
#endif
|