139 lines
4.4 KiB
C
139 lines
4.4 KiB
C
/* The MIT License
|
||
|
||
Copyright (c) 2018- Dana-Farber Cancer Institute
|
||
2009-2018 Broad Institute, Inc.
|
||
2008-2009 Genome Research Ltd. (GRL)
|
||
|
||
Permission is hereby granted, free of charge, to any person obtaining
|
||
a copy of this software and associated documentation files (the
|
||
"Software"), to deal in the Software without restriction, including
|
||
without limitation the rights to use, copy, modify, merge, publish,
|
||
distribute, sublicense, and/or sell copies of the Software, and to
|
||
permit persons to whom the Software is furnished to do so, subject to
|
||
the following conditions:
|
||
|
||
The above copyright notice and this permission notice shall be
|
||
included in all copies or substantial portions of the Software.
|
||
|
||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
||
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
||
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||
SOFTWARE.
|
||
*/
|
||
#ifndef BWA_H_
|
||
#define BWA_H_
|
||
|
||
#include <stdint.h>
|
||
#include "bntseq.h"
|
||
#include "kstring.h"
|
||
#include "bwt.h"
|
||
#include "fmt_idx.h"
|
||
#include "kvec.h"
|
||
#include "ksw.h"
|
||
#include "ksw_align_avx.h"
|
||
#include "mate_sw.h"
|
||
|
||
#define BWA_IDX_BWT 0x1
|
||
#define BWA_IDX_BNS 0x2
|
||
#define BWA_IDX_PAC 0x4
|
||
#define BWA_IDX_FMT 0x8
|
||
#define BWA_IDX_ALL 0xF
|
||
|
||
#define BWA_CTL_SIZE 0x10000
|
||
|
||
#define BWTALGO_AUTO 0
|
||
#define BWTALGO_RB2 1
|
||
#define BWTALGO_BWTSW 2
|
||
#define BWTALGO_IS 3
|
||
|
||
#define BWA_DBG_QNAME 0x1
|
||
|
||
typedef struct {
|
||
bwt_t *bwt; // FM-index
|
||
FMTIndex *fmt;// FMT-index
|
||
bntseq_t *bns; // information on the reference sequences
|
||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||
|
||
int is_shm;
|
||
int64_t l_mem;
|
||
uint8_t *mem;
|
||
} bwaidx_t;
|
||
|
||
#if 0
|
||
// 需要做mate sw的read
|
||
typedef struct {
|
||
int is_rev; // seq是否在反向互补链上
|
||
int xtra;
|
||
int64_t rb, re; // ref的起始截止位置,左闭右开
|
||
int64_t seq_id; // 对应的当前数据块的seq id
|
||
kswr_avx_t aln;
|
||
} matesw_data_t;
|
||
|
||
typedef struct {
|
||
size_t n, m;
|
||
matesw_data_t* a;
|
||
} matesw_data_v;
|
||
|
||
typedef kvec_t(matesw_data_t*) matesw_ptr_v;
|
||
#endif
|
||
|
||
typedef struct {
|
||
int l_seq, id;
|
||
int m_name, m_comment, m_seq, m_qual;
|
||
char *name, *comment, *seq, *qual;
|
||
// msw_task_ptr_v msw;
|
||
msw_seq_task_v msw_task;
|
||
// int_v msw;
|
||
// kstring_t sam;
|
||
} bseq1_t;
|
||
|
||
typedef struct {
|
||
kstring_t sam;
|
||
} seq_sam_t;
|
||
|
||
extern int bwa_verbose, bwa_dbg;
|
||
extern char bwa_rg_id[256];
|
||
|
||
#ifdef __cplusplus
|
||
extern "C" {
|
||
#endif
|
||
|
||
//bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||
void bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment, int64_t *size_, int *m_, bseq1_t **seqs_ptr);
|
||
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]);
|
||
|
||
void bwa_fill_scmat(int a, int b, int8_t mat[25]);
|
||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||
|
||
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size);
|
||
|
||
char *bwa_idx_infer_prefix(const char *hint);
|
||
bwt_t *bwa_idx_load_bwt(const char *hint);
|
||
|
||
bwaidx_t *bwa_idx_load_from_shm(const char *hint);
|
||
bwaidx_t *bwa_idx_load_from_disk(const char *hint, int which);
|
||
bwaidx_t *bwa_fmtidx_load_from_shm(const char *hint);
|
||
bwaidx_t *bwa_idx_load(const char *hint, int which);
|
||
void bwa_idx_destroy(bwaidx_t *idx);
|
||
int bwa_idx2mem(bwaidx_t *idx);
|
||
int bwa_fmtidx2mem(bwaidx_t *idx);
|
||
int bwa_ertidx2mem(bwaidx_t *idx);
|
||
int bwa_mem2idx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx);
|
||
int bwa_mem2fmtidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx);
|
||
int bwa_mem2ertidx(int64_t l_mem, uint8_t *mem, bwaidx_t *idx);
|
||
|
||
void bwa_print_sam_hdr(const bntseq_t *bns, const char *hdr_line);
|
||
char *bwa_set_rg(const char *s);
|
||
char *bwa_insert_header(const char *s, char *hdr);
|
||
|
||
#ifdef __cplusplus
|
||
}
|
||
#endif
|
||
|
||
#endif
|