2011-01-14 09:52:12 +08:00
|
|
|
/* The MIT License
|
|
|
|
|
|
2020-07-02 11:02:01 +08:00
|
|
|
Copyright (c) 2018- Dana-Farber Cancer Institute
|
|
|
|
|
2009-2018 Broad Institute, Inc.
|
|
|
|
|
2008-2009 Genome Research Ltd. (GRL)
|
2011-01-14 09:52:12 +08:00
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
|
a copy of this software and associated documentation files (the
|
|
|
|
|
"Software"), to deal in the Software without restriction, including
|
|
|
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
|
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
|
SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#ifndef BWT_BNTSEQ_H
|
|
|
|
|
#define BWT_BNTSEQ_H
|
|
|
|
|
|
2014-04-11 08:54:27 +08:00
|
|
|
#include <assert.h>
|
2011-01-14 09:52:12 +08:00
|
|
|
#include <stdint.h>
|
2013-02-11 23:59:38 +08:00
|
|
|
#include <stdio.h>
|
2011-01-14 09:52:12 +08:00
|
|
|
#include <zlib.h>
|
|
|
|
|
|
|
|
|
|
#ifndef BWA_UBYTE
|
|
|
|
|
#define BWA_UBYTE
|
|
|
|
|
typedef uint8_t ubyte_t;
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
int64_t offset;
|
|
|
|
|
int32_t len;
|
|
|
|
|
int32_t n_ambs;
|
|
|
|
|
uint32_t gi;
|
2014-09-06 00:49:50 +08:00
|
|
|
int32_t is_alt;
|
2011-01-14 09:52:12 +08:00
|
|
|
char *name, *anno;
|
|
|
|
|
} bntann1_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
int64_t offset;
|
|
|
|
|
int32_t len;
|
|
|
|
|
char amb;
|
|
|
|
|
} bntamb1_t;
|
|
|
|
|
|
|
|
|
|
typedef struct {
|
|
|
|
|
int64_t l_pac;
|
|
|
|
|
int32_t n_seqs;
|
|
|
|
|
uint32_t seed;
|
2023-12-24 17:23:14 +08:00
|
|
|
bntann1_t *anns; // n_seqs elements 染色体
|
2011-01-14 09:52:12 +08:00
|
|
|
int32_t n_holes;
|
2023-12-24 17:23:14 +08:00
|
|
|
bntamb1_t *ambs; // n_holes elements 非AGCT字符
|
2011-01-14 09:52:12 +08:00
|
|
|
FILE *fp_pac;
|
|
|
|
|
} bntseq_t;
|
|
|
|
|
|
|
|
|
|
extern unsigned char nst_nt4_table[256];
|
|
|
|
|
|
|
|
|
|
#ifdef __cplusplus
|
|
|
|
|
extern "C" {
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
void bns_dump(const bntseq_t *bns, const char *prefix);
|
|
|
|
|
bntseq_t *bns_restore(const char *prefix);
|
|
|
|
|
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
|
|
|
|
|
void bns_destroy(bntseq_t *bns);
|
2011-10-22 00:03:14 +08:00
|
|
|
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix, int for_only);
|
2013-02-26 11:49:15 +08:00
|
|
|
int bns_pos2rid(const bntseq_t *bns, int64_t pos_f);
|
2011-10-22 00:03:14 +08:00
|
|
|
int bns_cnt_ambi(const bntseq_t *bns, int64_t pos_f, int len, int *ref_id);
|
2013-02-02 05:39:50 +08:00
|
|
|
uint8_t *bns_get_seq(int64_t l_pac, const uint8_t *pac, int64_t beg, int64_t end, int64_t *len);
|
2014-04-11 08:54:27 +08:00
|
|
|
uint8_t *bns_fetch_seq(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid);
|
|
|
|
|
int bns_intv2rid(const bntseq_t *bns, int64_t rb, int64_t re);
|
2024-03-07 18:23:21 +08:00
|
|
|
void bns_fetch_seq_no_alloc(const bntseq_t *bns, const uint8_t *pac, int64_t *beg, int64_t mid, int64_t *end, int *rid, size_t *m_seq, uint8_t **seqp);
|
2011-01-14 09:52:12 +08:00
|
|
|
#ifdef __cplusplus
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
|
2011-10-25 23:22:08 +08:00
|
|
|
static inline int64_t bns_depos(const bntseq_t *bns, int64_t pos, int *is_rev)
|
|
|
|
|
{
|
2011-10-26 01:00:41 +08:00
|
|
|
return (*is_rev = (pos >= bns->l_pac))? (bns->l_pac<<1) - 1 - pos : pos;
|
2011-10-25 23:22:08 +08:00
|
|
|
}
|
|
|
|
|
|
2011-01-14 09:52:12 +08:00
|
|
|
#endif
|