hybrid-index和bwt结果一致
This commit is contained in:
parent
caf01ce0f3
commit
70979c1b60
|
|
@ -8,6 +8,7 @@ bwamem-lite
|
|||
test_index/
|
||||
index/
|
||||
orig_index/
|
||||
output/
|
||||
run.sh
|
||||
debug.sh
|
||||
hybalign
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"mem",
|
||||
"-t",
|
||||
|
|
@ -17,9 +17,9 @@
|
|||
"-M",
|
||||
"-R",
|
||||
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
|
||||
"~/data/fmt_ref/human_g1k_v37_decoy.fasta",
|
||||
"./b1.fq",
|
||||
"./b2.fq",
|
||||
"/home/zzh/work/bioinfo/hyb-align/index/human_g1k_v37_decoy.fasta",
|
||||
//"./b1.fq",
|
||||
//"./b2.fq",
|
||||
//"./b1.fq",
|
||||
//"~/data/dataset/real/D1/n1.fq",
|
||||
//"~/data/dataset/real/D1/n2.fq",
|
||||
|
|
@ -29,11 +29,11 @@
|
|||
//"~/data/dataset/real/D3/n2.fq",
|
||||
//"~/data/dataset/real/D1/n1.fq.gz",
|
||||
//"~/data/dataset/real/D1/n2.fq.gz",
|
||||
//"~/data/dataset/real/D3/1w1.fq",
|
||||
//"~/data/dataset/real/D3/1w2.fq",
|
||||
"~/data/dataset/real/D3/1w1.fq",
|
||||
"~/data/dataset/real/D3/1w2.fq",
|
||||
"-o",
|
||||
"/dev/null",
|
||||
//"-Z",
|
||||
// "-g",
|
||||
],
|
||||
"cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间
|
||||
},
|
||||
|
|
@ -42,7 +42,7 @@
|
|||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"index",
|
||||
"~/data/reference/human_g1k_v37_decoy.fasta"
|
||||
|
|
@ -54,7 +54,7 @@
|
|||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"buildkmer",
|
||||
"~/data/reference/human_g1k_v37_decoy.fasta.256.64.fmt",
|
||||
|
|
@ -67,7 +67,7 @@
|
|||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"shm",
|
||||
"-Z",
|
||||
|
|
@ -80,7 +80,7 @@
|
|||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"pac2bref",
|
||||
"~/data1/fmt_ref/human_g1k_v37_decoy.fasta"
|
||||
|
|
@ -102,18 +102,15 @@
|
|||
"cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间
|
||||
},
|
||||
{
|
||||
"name": "train hybrid index",
|
||||
"name": "fa2pac",
|
||||
"preLaunchTask": "Build",
|
||||
"type": "cppdbg",
|
||||
"request": "launch",
|
||||
"program": "${workspaceRoot}/hbwa",
|
||||
"program": "${workspaceRoot}/hybalign",
|
||||
"args": [
|
||||
"trainhybrid",
|
||||
"-t",
|
||||
"1",
|
||||
"fa2pac",
|
||||
"-f",
|
||||
"~/data/fmt_ref/human_g1k_v37_decoy.fasta",
|
||||
"~/data/dataset/real/D1/n1.fq.gz",
|
||||
"~/data/dataset/real/D1/n2.fq.gz"
|
||||
],
|
||||
"cwd": "${workspaceFolder}", // 当前工作路径:当前文件所在的工作空间
|
||||
},
|
||||
|
|
|
|||
|
|
@ -70,6 +70,7 @@
|
|||
"share_mem.h": "c",
|
||||
"kseq.h": "c",
|
||||
"ostream": "c",
|
||||
"streambuf": "c"
|
||||
"streambuf": "c",
|
||||
"kbtree.h": "c"
|
||||
}
|
||||
}
|
||||
7
Makefile
7
Makefile
|
|
@ -1,10 +1,11 @@
|
|||
CC= gcc
|
||||
#CC= clang --analyze
|
||||
CFLAGS= -g -Wall -Wno-unused-function -O3
|
||||
CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O3
|
||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
HYBOBJS= hyb_bwa.o hyb_utils.o hyb_seeding_1.o hyb_seeding_2.o hyb_seeding_3.o hyb_create_idx.o debug.o profiling.o
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC) -DUSE_AVX2_EXT -DSHOW_PERF -DDEBUG_FILE_OUTPUT
|
||||
HYBOBJS= hyb_bwa.o hyb_utils.o hyb_seeding_1.o hyb_seeding_2.o hyb_seeding_3.o hyb_create_idx.o debug.o profiling.o share_mem.o yarn.o \
|
||||
ksw_extend2_avx2.o ksw_extend2_avx2_u8.o
|
||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
|
||||
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
|
||||
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
|
|
|
|||
2
bntseq.c
2
bntseq.c
|
|
@ -346,7 +346,9 @@ int bwa_fa2pac(int argc, char *argv[])
|
|||
return 1;
|
||||
}
|
||||
fp = xzopen(argv[optind], "r");
|
||||
start_async_read(fp);
|
||||
bns_fasta2bntseq(fp, (optind+1 < argc)? argv[optind+1] : argv[optind], for_only);
|
||||
stop_async_read(fp);
|
||||
err_gzclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
236
bwa.c
236
bwa.c
|
|
@ -24,16 +24,19 @@
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <zlib.h>
|
||||
#include <assert.h>
|
||||
#include "bntseq.h"
|
||||
#include "bwa.h"
|
||||
#include "ksw.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "kstring.h"
|
||||
#include "ksw.h"
|
||||
#include "kvec.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef USE_MALLOC_WRAPPERS
|
||||
# include "malloc_wrap.h"
|
||||
|
|
@ -57,28 +60,193 @@ static inline void trim_readno(kstring_t *s)
|
|||
s->l -= 2, s->s[s->l] = 0;
|
||||
}
|
||||
|
||||
static inline char *dupkstring(const kstring_t *str, int dupempty)
|
||||
{
|
||||
char *s = (str->l > 0 || dupempty)? malloc(str->l + 1) : NULL;
|
||||
if (!s) return NULL;
|
||||
|
||||
memcpy(s, str->s, str->l);
|
||||
s[str->l] = '\0';
|
||||
return s;
|
||||
static inline void dupkstring(const kstring_t* str, int dupempty, char** dstp, int* sm) {
|
||||
if (!dupempty && str->l == 0) {
|
||||
if (*dstp) free(*dstp);
|
||||
*dstp = 0; *sm = 0;
|
||||
} else if (*dstp == 0 || *sm < str->l) {
|
||||
*sm = str->l;
|
||||
*dstp = (char*)realloc(*dstp, str->l + 1);
|
||||
}
|
||||
char* s = *dstp;
|
||||
if (!s) return;
|
||||
memcpy(s, str->s, str->l);
|
||||
s[str->l] = '\0';
|
||||
}
|
||||
|
||||
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s)
|
||||
static inline void kseq2bseq1(const kseq_t *ks, bseq1_t *s, int copy_comment)
|
||||
{ // TODO: it would be better to allocate one chunk of memory, but probably it does not matter in practice
|
||||
s->name = dupkstring(&ks->name, 1);
|
||||
s->comment = dupkstring(&ks->comment, 0);
|
||||
s->seq = dupkstring(&ks->seq, 1);
|
||||
s->qual = dupkstring(&ks->qual, 0);
|
||||
s->l_seq = ks->seq.l;
|
||||
dupkstring(&ks->name, 1, &s->name, &s->m_name);
|
||||
if (copy_comment) dupkstring(&ks->comment, 0, &s->comment, &s->m_comment);
|
||||
dupkstring(&ks->seq, 1, &s->seq, &s->m_seq);
|
||||
dupkstring(&ks->qual, 0, &s->qual, &s->m_qual);
|
||||
s->l_seq = ks->seq.l;
|
||||
}
|
||||
|
||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
||||
{
|
||||
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
||||
typedef struct {
|
||||
kseq_t* ks;
|
||||
bseq1_t* seq;
|
||||
int start_pos;
|
||||
int n_bound;
|
||||
int copy_comment;
|
||||
int ret_n;
|
||||
int ret_size;
|
||||
int ret_status;
|
||||
int chunk_size;
|
||||
} read_data_t;
|
||||
|
||||
static void* thread_bseq_read(void* data) {
|
||||
read_data_t* d = (read_data_t*)data;
|
||||
kseq_t* ks = d->ks;
|
||||
bseq1_t* seqs = d->seq;
|
||||
int copy_comment = d->copy_comment;
|
||||
int chunk_size = d->chunk_size;
|
||||
int cur_n = 0, cur_pos = d->start_pos, size = 0;
|
||||
int ret_status = 1;
|
||||
|
||||
while (cur_n < d->n_bound && (ret_status = kseq_read(ks)) >= 0) {
|
||||
trim_readno(&ks->name);
|
||||
kseq2bseq1(ks, seqs + cur_pos, copy_comment);
|
||||
seqs[cur_pos].id = cur_pos;
|
||||
size += seqs[cur_pos].l_seq;
|
||||
cur_pos += 2; cur_n += 1;
|
||||
if (size >= chunk_size) break;
|
||||
}
|
||||
d->ret_n = cur_n; d->ret_size = size; d->ret_status = ret_status;
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define READ_ONE_SEQ(ksin) \
|
||||
trim_readno(&(ksin)->name); \
|
||||
kseq2bseq1(ksin, &seqs[n], copy_comment); \
|
||||
seqs[n].id = n; \
|
||||
size += seqs[n++].l_seq;
|
||||
|
||||
// multi thread reading input seqs
|
||||
void bseq_read_pe_mt(int chunk_size, int* n_, void* ks1_, void* ks2_, int copy_comment, int64_t* size_, int* m_, bseq1_t** seqs_ptr) {
|
||||
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
||||
int size = 0, m = *m_, n = 0;
|
||||
bseq1_t* seqs = *seqs_ptr;
|
||||
read_data_t d[2];
|
||||
pthread_t tid[2];
|
||||
const int chunk_size_narrow = 4 * 1024 * 1024;
|
||||
const int init_n_reads = 20;
|
||||
if (m == 0) { // 还没开辟空间,要初始化
|
||||
seqs = (bseq1_t*)calloc(init_n_reads,
|
||||
sizeof(bseq1_t)); // 先读取20个reads,根据reads的长度和chunk size决定要读取多少条reads
|
||||
#if 1
|
||||
int ks1_ret = 0, ks2_ret = 0;
|
||||
int i = init_n_reads >> 1;
|
||||
while (i-- > 0) {
|
||||
ks1_ret = kseq_read(ks);
|
||||
if (ks1_ret < 0)
|
||||
break;
|
||||
ks2_ret = kseq_read(ks2);
|
||||
if (ks2_ret < 0) {
|
||||
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
|
||||
break;
|
||||
}
|
||||
READ_ONE_SEQ(ks);
|
||||
READ_ONE_SEQ(ks2);
|
||||
}
|
||||
if (ks1_ret < 0 || ks2_ret < 0) {
|
||||
if (size == 0 && kseq_read(ks2) >= 0) { // test if the 2nd file is finished
|
||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||
}
|
||||
*n_ = n;
|
||||
*seqs_ptr = seqs;
|
||||
*size_ = size;
|
||||
*m_ = n;
|
||||
return;
|
||||
}
|
||||
m = (chunk_size + size / init_n_reads - 1) / (size / init_n_reads);
|
||||
#else
|
||||
m = 50000;
|
||||
#endif
|
||||
seqs = (bseq1_t*)realloc(seqs, m * sizeof(bseq1_t));
|
||||
memset(seqs + n, 0, sizeof(bseq1_t) * (m - n));
|
||||
}
|
||||
|
||||
d[0].copy_comment = copy_comment;
|
||||
d[1].copy_comment = copy_comment;
|
||||
d[0].ks = ks;
|
||||
d[0].seq = &seqs[0];
|
||||
d[0].n_bound = (m >> 1) - (n >> 1);
|
||||
d[0].start_pos = n;
|
||||
d[1].ks = ks2;
|
||||
d[1].seq = &seqs[0];
|
||||
d[1].n_bound = (m >> 1) - (n >> 1);
|
||||
d[1].start_pos = n + 1;
|
||||
d[0].chunk_size = d[1].chunk_size = (chunk_size - chunk_size_narrow - size) >> 1;
|
||||
|
||||
pthread_create(&tid[0], 0, thread_bseq_read, &d[0]);
|
||||
pthread_create(&tid[1], 0, thread_bseq_read, &d[1]);
|
||||
pthread_join(tid[0], 0);
|
||||
pthread_join(tid[1], 0);
|
||||
|
||||
size += d[0].ret_size + d[1].ret_size;
|
||||
|
||||
// 如果两个线程读入的reads数量不一致
|
||||
if (d[0].ret_n < d[1].ret_n) {
|
||||
int num_to_read = d[1].ret_n - d[0].ret_n;
|
||||
int offset = n + d[0].ret_n * 2;
|
||||
while (num_to_read-- > 0 && kseq_read(ks) >= 0) {
|
||||
trim_readno(&ks->name);
|
||||
kseq2bseq1(ks, &seqs[offset], copy_comment);
|
||||
seqs[offset].id = offset;
|
||||
size += seqs[offset].l_seq;
|
||||
offset += 2;
|
||||
}
|
||||
d[0].ret_n = d[1].ret_n;
|
||||
} else if (d[1].ret_n < d[0].ret_n) {
|
||||
int num_to_read = d[0].ret_n - d[1].ret_n;
|
||||
int offset = n + 1 + d[1].ret_n * 2;
|
||||
while (num_to_read-- > 0 && kseq_read(ks2) >= 0) {
|
||||
trim_readno(&ks2->name);
|
||||
kseq2bseq1(ks2, &seqs[offset], copy_comment);
|
||||
seqs[offset].id = offset;
|
||||
size += seqs[offset].l_seq;
|
||||
offset += 2;
|
||||
}
|
||||
d[1].ret_n = d[0].ret_n;
|
||||
}
|
||||
|
||||
n += d[0].ret_n + d[1].ret_n;
|
||||
|
||||
if (size == 0 && kseq_read(ks2) >= 0) { // test if the 2nd file is finished
|
||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||
} else if (size < chunk_size && d[0].ret_status > 0 && d[1].ret_status > 0) {
|
||||
while (kseq_read(ks) >= 0) {
|
||||
if (kseq_read(ks2) < 0) { // the 2nd file has fewer reads
|
||||
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
|
||||
break;
|
||||
}
|
||||
if (n >= m) {
|
||||
m = m ? m << 1 : 256;
|
||||
seqs = (bseq1_t*)realloc(seqs, m * sizeof(bseq1_t));
|
||||
memset(seqs + n, 0, (m - n) * sizeof(bseq1_t));
|
||||
}
|
||||
READ_ONE_SEQ(ks);
|
||||
READ_ONE_SEQ(ks2);
|
||||
if (size >= chunk_size && (n & 1) == 0)
|
||||
break;
|
||||
}
|
||||
if (size == 0) { // test if the 2nd file is finished
|
||||
if (kseq_read(ks2) >= 0)
|
||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||
}
|
||||
}
|
||||
*n_ = n;
|
||||
*size_ = size;
|
||||
if (m > *m_)
|
||||
*m_ = m;
|
||||
*seqs_ptr = seqs;
|
||||
}
|
||||
|
||||
void bseq_read(int chunk_size, int* n_, void* ks1_, void* ks2_, int copy_comment, int64_t* size_, int* m_, bseq1_t** seqs_ptr) {
|
||||
// using multi-thread reading
|
||||
if (ks2_) return bseq_read_pe_mt(chunk_size, n_, ks1_, ks2_, copy_comment, size_, m_, seqs_ptr);
|
||||
kseq_t *ks = (kseq_t*)ks1_, *ks2 = (kseq_t*)ks2_;
|
||||
int size = 0, m, n;
|
||||
bseq1_t *seqs;
|
||||
m = n = 0; seqs = 0;
|
||||
|
|
@ -91,24 +259,20 @@ bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_)
|
|||
m = m? m<<1 : 256;
|
||||
seqs = realloc(seqs, m * sizeof(bseq1_t));
|
||||
}
|
||||
trim_readno(&ks->name);
|
||||
kseq2bseq1(ks, &seqs[n]);
|
||||
seqs[n].id = n;
|
||||
size += seqs[n++].l_seq;
|
||||
if (ks2) {
|
||||
trim_readno(&ks2->name);
|
||||
kseq2bseq1(ks2, &seqs[n]);
|
||||
seqs[n].id = n;
|
||||
size += seqs[n++].l_seq;
|
||||
}
|
||||
READ_ONE_SEQ(ks);
|
||||
if (ks2) {
|
||||
READ_ONE_SEQ(ks2);
|
||||
}
|
||||
if (size >= chunk_size && (n&1) == 0) break;
|
||||
}
|
||||
if (size == 0) { // test if the 2nd file is finished
|
||||
if (ks2 && kseq_read(ks2) >= 0)
|
||||
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
|
||||
}
|
||||
*n_ = n;
|
||||
return seqs;
|
||||
*n_ = n;
|
||||
*size_ = size;
|
||||
if (m > *m_) *m_ = m;
|
||||
*seqs_ptr = seqs;
|
||||
}
|
||||
|
||||
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2])
|
||||
|
|
|
|||
23
bwa.h
23
bwa.h
|
|
@ -28,8 +28,11 @@
|
|||
#define BWA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "kstring.h"
|
||||
#include "hyb_idx.h"
|
||||
|
||||
#define BWA_IDX_BWT 0x1
|
||||
#define BWA_IDX_BNS 0x2
|
||||
|
|
@ -49,17 +52,24 @@ typedef struct {
|
|||
bwt_t *bwt; // FM-index
|
||||
bntseq_t *bns; // information on the reference sequences
|
||||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||
HybridIndex *hyb; // Hybrid index
|
||||
|
||||
int is_shm;
|
||||
int64_t l_mem;
|
||||
int is_shm;
|
||||
int64_t l_mem;
|
||||
uint8_t *mem;
|
||||
} bwaidx_t;
|
||||
|
||||
typedef struct {
|
||||
int l_seq, id;
|
||||
char *name, *comment, *seq, *qual, *sam;
|
||||
int l_seq, id;
|
||||
int m_name, m_comment, m_seq, m_qual;
|
||||
char *name, *comment, *seq, *qual;
|
||||
kstring_t sam;
|
||||
} bseq1_t;
|
||||
|
||||
typedef struct {
|
||||
kstring_t sam;
|
||||
} seq_sam_t;
|
||||
|
||||
extern int bwa_verbose, bwa_dbg;
|
||||
extern char bwa_rg_id[256];
|
||||
|
||||
|
|
@ -67,8 +77,9 @@ extern char bwa_rg_id[256];
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||||
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]);
|
||||
// bseq1_t *bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_);
|
||||
void bseq_read(int chunk_size, int* n_, void* ks1_, void* ks2_, int copy_comment, int64_t* size_, int* m_, bseq1_t** seqs_ptr);
|
||||
void bseq_classify(int n, bseq1_t *seqs, int m[2], bseq1_t *sep[2]);
|
||||
|
||||
void bwa_fill_scmat(int a, int b, int8_t mat[25]);
|
||||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||||
|
|
|
|||
683
bwamem.c
683
bwamem.c
|
|
@ -41,6 +41,7 @@
|
|||
#include "kvec.h"
|
||||
#include "ksort.h"
|
||||
#include "utils.h"
|
||||
#include "hyb_idx.h"
|
||||
|
||||
#ifdef USE_MALLOC_WRAPPERS
|
||||
# include "malloc_wrap.h"
|
||||
|
|
@ -110,7 +111,7 @@ mem_opt_t *mem_opt_init()
|
|||
o->use_bwt = 0;
|
||||
o->skip_entire_match = 0;
|
||||
o->batch_size = 256;
|
||||
|
||||
|
||||
return o;
|
||||
}
|
||||
|
||||
|
|
@ -121,17 +122,23 @@ mem_opt_t *mem_opt_init()
|
|||
#define intv_lt(a, b) ((a).info < (b).info)
|
||||
KSORT_INIT(mem_intv, bwtintv_t, intv_lt)
|
||||
|
||||
typedef struct {
|
||||
bwtintv_v mem, mem1, *tmpv[2];
|
||||
} smem_aux_t;
|
||||
|
||||
static smem_aux_t *smem_aux_init()
|
||||
{
|
||||
smem_aux_t *a;
|
||||
a = calloc(1, sizeof(smem_aux_t));
|
||||
a->tmpv[0] = calloc(1, sizeof(bwtintv_v));
|
||||
a->tmpv[1] = calloc(1, sizeof(bwtintv_v));
|
||||
return a;
|
||||
a->sw_buf = (buf_t*)calloc(1, sizeof(buf_t));
|
||||
a->seq_buf = (buf_t*)calloc(1, sizeof(buf_t));
|
||||
a->byte_seq = (byte_v*)calloc(1, sizeof(byte_v));
|
||||
a->reverse_seq = (byte_v*)calloc(1, sizeof(byte_v));
|
||||
a->for_bits = (byte_v*)calloc(1, sizeof(byte_v));
|
||||
a->back_bits = (byte_v*)calloc(1, sizeof(byte_v));
|
||||
kv_resize(uint8_t, *a->byte_seq, HYB_MAX_SEQ_LEN);
|
||||
kv_resize(uint8_t, *a->reverse_seq, HYB_MAX_SEQ_LEN);
|
||||
kv_resize(uint8_t, *a->for_bits, HYB_MAX_SEQ_LEN);
|
||||
kv_resize(uint8_t, *a->back_bits, HYB_MAX_SEQ_LEN);
|
||||
return a;
|
||||
}
|
||||
|
||||
static void smem_aux_destroy(smem_aux_t *a)
|
||||
|
|
@ -142,13 +149,41 @@ static void smem_aux_destroy(smem_aux_t *a)
|
|||
free(a);
|
||||
}
|
||||
|
||||
static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, const uint8_t *seq, smem_aux_t *a)
|
||||
{
|
||||
int i, k, x = 0, old_n;
|
||||
// 初始化线程需要的数据
|
||||
mem_worker_t* init_mem_worker(const mem_opt_t* opt, const bwt_t *bwt, const HybridIndex* hyb, const bntseq_t* bns, const uint8_t* pac) {
|
||||
int i = opt->n_threads, j;
|
||||
mem_worker_t *w = (mem_worker_t *)calloc(1, sizeof(mem_worker_t));
|
||||
w->opt = opt; w->bwt = bwt; w->hyb = hyb; w->bns = bns; w->pac = pac;
|
||||
w->calc_isize = 0; w->n = 0; w->regs = 0;
|
||||
w->aux = (smem_aux_t**)malloc(i * sizeof(smem_aux_t*));
|
||||
w->smem_arr = (smem_v **)malloc(i * sizeof(smem_v *));
|
||||
w->chain_arr = (mem_chain_v **)malloc(i * sizeof(mem_chain_v *));
|
||||
w->isize_arr = (uint64_v **)malloc(i * sizeof(uint64_v *));
|
||||
w->seed_arr = (HybSeedArr **)malloc(i * sizeof(HybSeedArr*));
|
||||
|
||||
for (i = 0; i < opt->n_threads; ++i) {
|
||||
w->aux[i] = smem_aux_init();
|
||||
w->smem_arr[i] = (smem_v*)malloc(opt->batch_size * sizeof(smem_v));
|
||||
w->chain_arr[i] = (mem_chain_v*)malloc(opt->batch_size * sizeof(mem_chain_v));
|
||||
w->isize_arr[i] = (uint64_v *)calloc(4, sizeof(uint64_v));
|
||||
w->seed_arr[i] = (HybSeedArr *)malloc(opt->batch_size * sizeof(HybSeedArr));
|
||||
for (j = 0; j < opt->batch_size; ++j) {
|
||||
kv_init(w->smem_arr[i][j].mem);
|
||||
kv_init(w->smem_arr[i][j].pos_arr);
|
||||
kv_init(w->chain_arr[i][j]);
|
||||
kv_init(w->seed_arr[i][j]);
|
||||
}
|
||||
}
|
||||
return w;
|
||||
}
|
||||
|
||||
// seeding
|
||||
static void mem_collect_intv(const mem_opt_t* opt, const bwt_t* bwt, int len, const uint8_t* seq, smem_v* smem, smem_aux_t* a, int tid) {
|
||||
int i, k, x = 0, old_n;
|
||||
int start_width = 1;
|
||||
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
||||
a->mem.n = 0;
|
||||
// first pass: find all SMEMs
|
||||
smem->mem.n = 0;
|
||||
// first pass: find all SMEMs
|
||||
while (x < len) {
|
||||
if (seq[x] < 4) {
|
||||
x = bwt_smem1(bwt, len, seq, x, start_width, &a->mem1, a->tmpv);
|
||||
|
|
@ -156,21 +191,21 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
|
|||
bwtintv_t *p = &a->mem1.a[i];
|
||||
int slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||
if (slen >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, a->mem, *p);
|
||||
}
|
||||
kv_push(bwtintv_t, smem->mem, *p);
|
||||
}
|
||||
} else ++x;
|
||||
}
|
||||
// second pass: find MEMs inside a long SMEM
|
||||
old_n = a->mem.n;
|
||||
for (k = 0; k < old_n; ++k) {
|
||||
bwtintv_t *p = &a->mem.a[k];
|
||||
old_n = smem->mem.n;
|
||||
for (k = 0; k < old_n; ++k) {
|
||||
bwtintv_t *p = &smem->mem.a[k];
|
||||
int start = p->info>>32, end = (int32_t)p->info;
|
||||
if (end - start < split_len || p->x[2] > opt->split_width) continue;
|
||||
bwt_smem1(bwt, len, seq, (start + end)>>1, p->x[2]+1, &a->mem1, a->tmpv);
|
||||
for (i = 0; i < a->mem1.n; ++i)
|
||||
if ((uint32_t)a->mem1.a[i].info - (a->mem1.a[i].info>>32) >= opt->min_seed_len)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
}
|
||||
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
||||
}
|
||||
// third pass: LAST-like
|
||||
if (opt->max_mem_intv > 0) {
|
||||
x = 0;
|
||||
|
|
@ -179,39 +214,120 @@ static void mem_collect_intv(const mem_opt_t *opt, const bwt_t *bwt, int len, co
|
|||
if (1) {
|
||||
bwtintv_t m;
|
||||
x = bwt_seed_strategy1(bwt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
|
||||
if (m.x[2] > 0) kv_push(bwtintv_t, a->mem, m);
|
||||
if (m.x[2] > 0) kv_push(bwtintv_t, smem->mem, m);
|
||||
} else { // for now, we never come to this block which is slower
|
||||
x = bwt_smem1a(bwt, len, seq, x, start_width, opt->max_mem_intv, &a->mem1, a->tmpv);
|
||||
for (i = 0; i < a->mem1.n; ++i)
|
||||
kv_push(bwtintv_t, a->mem, a->mem1.a[i]);
|
||||
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
|
||||
}
|
||||
} else ++x;
|
||||
}
|
||||
}
|
||||
// sort
|
||||
ks_introsort(mem_intv, a->mem.n, a->mem.a);
|
||||
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
|
||||
}
|
||||
|
||||
void find_smem(const mem_opt_t* opt, const bwt_t* bwt, int len, const uint8_t* seq, smem_aux_t* aux, smem_v* smemv, int tid) {
|
||||
if (len < opt->min_seed_len)
|
||||
return; // if the query is shorter than the seed length, no match
|
||||
mem_collect_intv(opt, bwt, len, seq, smemv, aux, tid);
|
||||
smemv->pos_arr.n = 0;
|
||||
}
|
||||
|
||||
// hybrid-index-based seeding
|
||||
#define hyb_seed_lt(a, b) ((a).seed_start == (b).seed_start ? (a).seed_end < (b).seed_end : (a).seed_start < (b).seed_start)
|
||||
KSORT_INIT(hyb_seed, HybSeed, hyb_seed_lt)
|
||||
|
||||
static void hyb_seeding(const mem_opt_t* opt, const HybridIndex* hyb, ReadSeq* read_seq, RangeArr* read_ranges, RangeArr* seeds_ranges,
|
||||
HybSeedArr* seeds, uint64_t seq_id, int tid) {
|
||||
int i = 0;
|
||||
int split_len = (int)(opt->min_seed_len * opt->split_factor + .499);
|
||||
seeds->n = 0;
|
||||
|
||||
// fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||
|
||||
if (seq_id == 4) {
|
||||
fprintf(stderr, "seq-id: %ld\n", seq_id);
|
||||
}
|
||||
|
||||
// 1. seeding-1: find all SMEMs
|
||||
PROF_START(seed_1);
|
||||
for (i = 0; i < read_ranges->n; ++i) {
|
||||
Range range = kv_A(*read_ranges, i);
|
||||
if (range.len < opt->min_seed_len)
|
||||
continue;
|
||||
seeds_ranges->a[i].start = seeds->n;
|
||||
hyb_first_seeding(hyb, read_seq, &range, opt->min_seed_len, seeds, tid);
|
||||
seeds_ranges->a[i].end = seeds->n;
|
||||
}
|
||||
|
||||
tprof[T_SEED_LEN][tid] += seeds->n;
|
||||
PROF_END(tprof[T_SEED_1][tid], seed_1);
|
||||
#if 1
|
||||
// 2. seeding-2: find MEMs inside a long SMEM
|
||||
PROF_START(seed_2);
|
||||
int pre_pivot = 0;
|
||||
int old_n = seeds->n;
|
||||
int pre_start = old_n, pre_end = old_n, pre_n = old_n;
|
||||
|
||||
for (i = 0; i < old_n; ++i) {
|
||||
HybSeed* seed = &kv_A(*seeds, i);
|
||||
int start = seed->seed_start, end = seed->seed_end;
|
||||
if (end - start < split_len || seed->ref_pos_arr.n > opt->split_width)
|
||||
continue;
|
||||
pre_n = seeds->n;
|
||||
if (seed->ref_pos_arr.n == 1) {
|
||||
pre_pivot = hyb_second_seeding(hyb, read_seq, start, end, seed->read_start, seed->read_end, seed->ref_pos_arr.a[0],
|
||||
seed->ref_pos_arr.n + 1, pre_pivot, pre_start, pre_end, opt->min_seed_len, seeds, tid);
|
||||
pre_start = pre_n;
|
||||
pre_end = seeds->n;
|
||||
} else {
|
||||
hyb_second_seeding(hyb, read_seq, start, end, seed->read_start, seed->read_end, seed->ref_pos_arr.a[0], seed->ref_pos_arr.n + 1, 0, 0, 0,
|
||||
opt->min_seed_len, seeds, tid);
|
||||
}
|
||||
}
|
||||
PROF_END(tprof[T_SEED_2][tid], seed_2);
|
||||
#endif
|
||||
|
||||
#if 1
|
||||
// 3. seeding-3: LAST-like
|
||||
old_n = seeds->n;
|
||||
PROF_START(seed_3);
|
||||
if (opt->max_mem_intv > 0) {
|
||||
for (i = 0; i < read_ranges->n; ++i) {
|
||||
Range range = kv_A(*read_ranges, i);
|
||||
if (range.len < opt->min_seed_len)
|
||||
continue;
|
||||
Range seeds_range = kv_A(*seeds_ranges, i);
|
||||
hyb_third_seeding(hyb, read_seq, &range, &seeds_range, opt->min_seed_len, opt->max_mem_intv, seeds, tid);
|
||||
}
|
||||
}
|
||||
PROF_END(tprof[T_SEED_3][tid], seed_3);
|
||||
#endif
|
||||
#if 0
|
||||
{
|
||||
FILE *fp = gf[1];
|
||||
int j;
|
||||
// fprintf(fp, "%ld ", seq_id);
|
||||
for (i = 0; i < seeds->n; ++i) {
|
||||
HybSeed *seed = &kv_A(*seeds, i);
|
||||
fprintf(fp, "s:%d e:%d n:%ld ", seed->seed_start, seed->seed_end, seed->ref_pos_arr.n);
|
||||
for (j = 0; j < seed->ref_pos_arr.n; ++j) {
|
||||
fprintf(fp, "%ld ", seed->ref_pos_arr.a[j]);
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
}
|
||||
fprintf(fp, "\n");
|
||||
// fprintf(fp, "seq_id:%ld\n", seq_id);
|
||||
}
|
||||
#endif
|
||||
ks_introsort(hyb_seed, kv_size(*seeds), seeds->a);
|
||||
}
|
||||
|
||||
/************
|
||||
* Chaining *
|
||||
************/
|
||||
|
||||
typedef struct {
|
||||
int64_t rbeg;
|
||||
int32_t qbeg, len;
|
||||
int score;
|
||||
} mem_seed_t; // unaligned memory
|
||||
|
||||
typedef struct {
|
||||
int n, m, first, rid;
|
||||
uint32_t w:29, kept:2, is_alt:1;
|
||||
float frac_rep;
|
||||
int64_t pos;
|
||||
mem_seed_t *seeds;
|
||||
} mem_chain_t;
|
||||
|
||||
typedef struct { size_t n, m; mem_chain_t *a; } mem_chain_v;
|
||||
|
||||
#include "kbtree.h"
|
||||
|
||||
#define chain_cmp(a, b) (((b).pos < (a).pos) - ((a).pos < (b).pos))
|
||||
|
|
@ -279,30 +395,25 @@ void mem_print_chain(const bntseq_t *bns, mem_chain_v *chn)
|
|||
}
|
||||
}
|
||||
|
||||
mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, int len, const uint8_t *seq, void *buf)
|
||||
{
|
||||
int i, b, e, l_rep;
|
||||
void generate_chain(const mem_opt_t* opt, const bwt_t* bwt, const bntseq_t* bns, int len, const uint8_t* seq, bwtintv_v mem, mem_chain_v* chain, int tid) {
|
||||
int i, b, e, l_rep;
|
||||
int64_t l_pac = bns->l_pac;
|
||||
mem_chain_v chain;
|
||||
kbtree_t(chn) *tree;
|
||||
smem_aux_t *aux;
|
||||
chain->n = 0;
|
||||
|
||||
kv_init(chain);
|
||||
if (len < opt->min_seed_len) return chain; // if the query is shorter than the seed length, no match
|
||||
if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match
|
||||
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
||||
|
||||
aux = buf? (smem_aux_t*)buf : smem_aux_init();
|
||||
mem_collect_intv(opt, bwt, len, seq, aux);
|
||||
for (i = 0, b = e = l_rep = 0; i < aux->mem.n; ++i) { // compute frac_rep
|
||||
bwtintv_t *p = &aux->mem.a[i];
|
||||
for (i = 0, b = e = l_rep = 0; i < mem.n; ++i) { // compute frac_rep
|
||||
bwtintv_t *p = &mem.a[i];
|
||||
int sb = (p->info>>32), se = (uint32_t)p->info;
|
||||
if (p->x[2] <= opt->max_occ) continue;
|
||||
if (sb > e) l_rep += e - b, b = sb, e = se;
|
||||
else e = e > se? e : se;
|
||||
}
|
||||
l_rep += e - b;
|
||||
for (i = 0; i < aux->mem.n; ++i) {
|
||||
bwtintv_t *p = &aux->mem.a[i];
|
||||
for (i = 0; i < mem.n; ++i) {
|
||||
bwtintv_t *p = &mem.a[i];
|
||||
int step, count, slen = (uint32_t)p->info - (p->info>>32); // seed length
|
||||
int64_t k;
|
||||
// if (slen < opt->min_seed_len) continue; // ignore if too short or too repetitive
|
||||
|
|
@ -330,19 +441,78 @@ mem_chain_v mem_chain(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bn
|
|||
}
|
||||
}
|
||||
}
|
||||
if (buf == 0) smem_aux_destroy(aux);
|
||||
if (chain->m < kb_size(tree)) {
|
||||
kv_resize(mem_chain_t, *chain, kb_size(tree));
|
||||
}
|
||||
|
||||
kv_resize(mem_chain_t, chain, kb_size(tree));
|
||||
#define traverse_func(p_) (chain->a[chain->n++] = *(p_))
|
||||
__kb_traverse(mem_chain_t, tree, traverse_func);
|
||||
#undef traverse_func
|
||||
for (i = 0; i < chain->n; ++i) chain->a[i].frac_rep = (float)l_rep / len;
|
||||
if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
|
||||
kb_destroy(chn, tree);
|
||||
}
|
||||
|
||||
#define traverse_func(p_) (chain.a[chain.n++] = *(p_))
|
||||
__kb_traverse(mem_chain_t, tree, traverse_func);
|
||||
#undef traverse_func
|
||||
void hyb_generate_chain(const mem_opt_t *opt, const HybridIndex *hyb, const bntseq_t *bns, int len, const uint8_t *seq,
|
||||
HybSeedArr *seeds, mem_chain_v *chain, int tid) {
|
||||
int i, b, e, l_rep;
|
||||
int64_t l_pac = bns->l_pac;
|
||||
kbtree_t(chn) * tree;
|
||||
chain->n = 0;
|
||||
if (len < opt->min_seed_len) return; // if the query is shorter than the seed length, no match
|
||||
tree = kb_init(chn, KB_DEFAULT_SIZE);
|
||||
for (i = 0, b = e = l_rep = 0; i < seeds->n; ++i) { // compute frac_rep
|
||||
HybSeed *seed = &kv_A(*seeds, i);
|
||||
int sb = seed->seed_start, se = seed->seed_end;
|
||||
if (seed->ref_pos_arr.n <= opt->max_occ) continue;
|
||||
if (sb > e) l_rep += e - b, b = sb, e = se;
|
||||
else e = e > se ? e : se;
|
||||
}
|
||||
l_rep += e - b;
|
||||
for (i = 0; i < seeds->n; ++i) {
|
||||
HybSeed *seed = &kv_A(*seeds, i);
|
||||
int step, count; // seed length
|
||||
int64_t k;
|
||||
step = seed->ref_pos_arr.n > opt->max_occ ? seed->ref_pos_arr.n / opt->max_occ : 1;
|
||||
for (k = count = 0; k < seed->ref_pos_arr.n && count < opt->max_occ; k += step, ++count) {
|
||||
mem_chain_t tmp, *lower, *upper;
|
||||
mem_seed_t s;
|
||||
int rid, to_add = 0;
|
||||
s.rbeg = tmp.pos = seed->ref_pos_arr.a[k];
|
||||
s.qbeg = seed->seed_start;
|
||||
s.len = seed->seed_end - seed->seed_start;
|
||||
s.score = s.len;
|
||||
rid = bns_intv2rid(bns, s.rbeg, s.rbeg + s.len);
|
||||
if (rid < 0)
|
||||
continue; // bridging multiple reference sequences or1 the forward-reverse boundary; TODO: split the seed;
|
||||
// don't discard it!!!
|
||||
if (kb_size(tree)) {
|
||||
kb_intervalp(chn, tree, &tmp, &lower, &upper); // find the closest chain
|
||||
if (!lower || !test_and_merge(opt, l_pac, lower, &s, rid))
|
||||
to_add = 1;
|
||||
} else
|
||||
to_add = 1;
|
||||
if (to_add) { // add the seed as a new chain
|
||||
tmp.n = 1;
|
||||
tmp.m = 4;
|
||||
tmp.seeds = (mem_seed_t *)calloc(tmp.m, sizeof(mem_seed_t));
|
||||
tmp.seeds[0] = s;
|
||||
tmp.rid = rid;
|
||||
tmp.is_alt = !!bns->anns[rid].is_alt;
|
||||
kb_putp(chn, tree, &tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (chain->m < kb_size(tree)) {
|
||||
kv_resize(mem_chain_t, *chain, kb_size(tree));
|
||||
}
|
||||
#define traverse_func(p_) (chain->a[chain->n++] = *(p_))
|
||||
__kb_traverse(mem_chain_t, tree, traverse_func);
|
||||
#undef traverse_func
|
||||
|
||||
for (i = 0; i < chain.n; ++i) chain.a[i].frac_rep = (float)l_rep / len;
|
||||
if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
|
||||
|
||||
kb_destroy(chn, tree);
|
||||
return chain;
|
||||
for (i = 0; i < chain->n; ++i) chain->a[i].frac_rep = (float)l_rep / len;
|
||||
if (bwa_verbose >= 4) printf("* fraction of repetitive seeds: %.3f\n", (float)l_rep / len);
|
||||
kb_destroy(chn, tree);
|
||||
}
|
||||
|
||||
/********************
|
||||
|
|
@ -660,15 +830,16 @@ static inline int cal_max_gap(const mem_opt_t *opt, int qlen)
|
|||
|
||||
#define MAX_BAND_TRY 2
|
||||
|
||||
void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av)
|
||||
void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const uint8_t *query, const mem_chain_t *c, mem_alnreg_v *av, void *buf, int tid)
|
||||
{
|
||||
int i, k, rid, max_off[2], aw[2]; // aw: actual bandwidth used in extension
|
||||
int64_t l_pac = bns->l_pac, rmax[2], tmp, max = 0;
|
||||
const mem_seed_t *s;
|
||||
uint8_t *rseq = 0;
|
||||
uint64_t *srt;
|
||||
smem_aux_t* aux = (smem_aux_t*)buf;
|
||||
|
||||
if (c->n == 0) return;
|
||||
if (c->n == 0) return;
|
||||
// get the max possible span
|
||||
rmax[0] = l_pac<<1; rmax[1] = 0;
|
||||
for (i = 0; i < c->n; ++i) {
|
||||
|
|
@ -744,23 +915,30 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
|
||||
if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name);
|
||||
if (s->qbeg) { // left extension
|
||||
uint8_t *rs, *qs;
|
||||
int qle, tle, gtle, gscore;
|
||||
qs = malloc(s->qbeg);
|
||||
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
|
||||
tmp = s->rbeg - rmax[0];
|
||||
rs = malloc(tmp);
|
||||
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
||||
#ifndef USE_AVX2_EXT
|
||||
uint8_t *rs, *qs;
|
||||
qs = malloc(s->qbeg);
|
||||
for (i = 0; i < s->qbeg; ++i) qs[i] = query[s->qbeg - 1 - i];
|
||||
rs = malloc(tmp);
|
||||
for (i = 0; i < tmp; ++i) rs[i] = rseq[tmp - 1 - i];
|
||||
#endif
|
||||
for (i = 0; i < MAX_BAND_TRY; ++i) {
|
||||
int prev = a->score;
|
||||
aw[0] = opt->w << i;
|
||||
if (bwa_verbose >= 4) {
|
||||
int j;
|
||||
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rs[j]]); putchar('\n');
|
||||
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)qs[j]]); putchar('\n');
|
||||
printf("*** Left ref: "); for (j = 0; j < tmp; ++j) putchar("ACGTN"[(int)rseq[tmp - 1 - j]]); putchar('\n');
|
||||
printf("*** Left query: "); for (j = 0; j < s->qbeg; ++j) putchar("ACGTN"[(int)query[s->qbeg - 1 - j]]); putchar('\n');
|
||||
}
|
||||
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||
#ifndef USE_AVX2_EXT
|
||||
a->score = ksw_extend2(s->qbeg, qs, tmp, rs, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0]);
|
||||
#else
|
||||
a->score = ksw_extend2_avx2(s->qbeg, query, tmp, rseq, 1, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b,
|
||||
aw[0], opt->pen_clip5, opt->zdrop, s->len * opt->a, &qle, &tle, >le, &gscore, &max_off[0], aux->sw_buf);
|
||||
#endif
|
||||
if (bwa_verbose >= 4) { printf("*** Left extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[0], max_off[0]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[0] < (aw[0]>>1) + (aw[0]>>2)) break;
|
||||
}
|
||||
// check whether we prefer to reach the end of the query
|
||||
|
|
@ -771,7 +949,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
a->qb = 0, a->rb = s->rbeg - gtle;
|
||||
a->truesc = gscore;
|
||||
}
|
||||
free(qs); free(rs);
|
||||
#ifndef USE_AVX2_EXT
|
||||
free(qs); free(rs);
|
||||
#endif
|
||||
} else a->score = a->truesc = s->len * opt->a, a->qb = 0, a->rb = s->rbeg;
|
||||
|
||||
if (s->qbeg + s->len != l_query) { // right extension
|
||||
|
|
@ -787,7 +967,11 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
|
|||
printf("*** Right ref: "); for (j = 0; j < rmax[1] - rmax[0] - re; ++j) putchar("ACGTN"[(int)rseq[re+j]]); putchar('\n');
|
||||
printf("*** Right query: "); for (j = 0; j < l_query - qe; ++j) putchar("ACGTN"[(int)query[qe+j]]); putchar('\n');
|
||||
}
|
||||
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
#ifndef USE_AVX2_EXT
|
||||
a->score = ksw_extend2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1]);
|
||||
#else
|
||||
a->score = ksw_extend2_avx2(l_query - qe, query + qe, rmax[1] - rmax[0] - re, rseq + re, 0, 5, opt->mat, opt->o_del, opt->e_del, opt->o_ins, opt->e_ins, opt->a, opt->b, aw[1], opt->pen_clip3, opt->zdrop, sc0, &qle, &tle, >le, &gscore, &max_off[1], aux->sw_buf);
|
||||
#endif
|
||||
if (bwa_verbose >= 4) { printf("*** Right extension: prev_score=%d; score=%d; bandwidth=%d; max_off_diagonal_dist=%d\n", prev, a->score, aw[1], max_off[1]); fflush(stdout); }
|
||||
if (a->score == prev || max_off[1] < (aw[1]>>1) + (aw[1]>>2)) break;
|
||||
}
|
||||
|
|
@ -1035,10 +1219,9 @@ void mem_reorder_primary5(int T, mem_alnreg_v *a)
|
|||
}
|
||||
|
||||
// TODO (future plan): group hits into a uint64_t[] array. This will be cleaner and more flexible
|
||||
void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m)
|
||||
{
|
||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
|
||||
kstring_t str;
|
||||
void mem_reg2sam(const mem_opt_t* opt, const bntseq_t* bns, const uint8_t* pac, bseq1_t* s, mem_alnreg_v* a, int extra_flag, const mem_aln_t* m, seq_sam_t* ss) {
|
||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, mem_alnreg_v *a, int l_query, const char *query);
|
||||
// kstring_t str;
|
||||
kvec_t(mem_aln_t) aa;
|
||||
int k, l;
|
||||
char **XA = 0;
|
||||
|
|
@ -1046,8 +1229,9 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
|
|||
if (!(opt->flag & MEM_F_ALL))
|
||||
XA = mem_gen_alt(opt, bns, pac, a, s->l_seq, s->seq);
|
||||
kv_init(aa);
|
||||
str.l = str.m = 0; str.s = 0;
|
||||
for (k = l = 0; k < a->n; ++k) {
|
||||
// str.l = str.m = 0; str.s = 0;
|
||||
ss->sam.l = 0;
|
||||
for (k = l = 0; k < a->n; ++k) {
|
||||
mem_alnreg_t *p = &a->a[k];
|
||||
mem_aln_t *q;
|
||||
if (p->score < opt->T) continue;
|
||||
|
|
@ -1069,58 +1253,19 @@ void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac,
|
|||
mem_aln_t t;
|
||||
t = mem_reg2aln(opt, bns, pac, s->l_seq, s->seq, 0);
|
||||
t.flag |= extra_flag;
|
||||
mem_aln2sam(opt, bns, &str, s, 1, &t, 0, m);
|
||||
} else {
|
||||
for (k = 0; k < aa.n; ++k)
|
||||
mem_aln2sam(opt, bns, &str, s, aa.n, aa.a, k, m);
|
||||
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||
mem_aln2sam(opt, bns, &ss->sam, s, 1, &t, 0, m);
|
||||
} else {
|
||||
for (k = 0; k < aa.n; ++k) mem_aln2sam(opt, bns, &ss->sam, s, aa.n, aa.a, k, m);
|
||||
for (k = 0; k < aa.n; ++k) free(aa.a[k].cigar);
|
||||
free(aa.a);
|
||||
}
|
||||
s->sam = str.s;
|
||||
// s->sam = str.s;
|
||||
if (XA) {
|
||||
for (k = 0; k < a->n; ++k) free(XA[k]);
|
||||
free(XA);
|
||||
}
|
||||
}
|
||||
|
||||
mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf)
|
||||
{
|
||||
int i;
|
||||
mem_chain_v chn;
|
||||
mem_alnreg_v regs;
|
||||
|
||||
for (i = 0; i < l_seq; ++i) // convert to 2-bit encoding if we have not done so
|
||||
seq[i] = seq[i] < 4? seq[i] : nst_nt4_table[(int)seq[i]];
|
||||
|
||||
chn = mem_chain(opt, bwt, bns, l_seq, (uint8_t*)seq, buf);
|
||||
chn.n = mem_chain_flt(opt, chn.n, chn.a);
|
||||
mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chn.n, chn.a);
|
||||
if (bwa_verbose >= 4) mem_print_chain(bns, &chn);
|
||||
|
||||
kv_init(regs);
|
||||
for (i = 0; i < chn.n; ++i) {
|
||||
mem_chain_t *p = &chn.a[i];
|
||||
if (bwa_verbose >= 4) err_printf("* ---> Processing chain(%d) <---\n", i);
|
||||
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, ®s);
|
||||
free(chn.a[i].seeds);
|
||||
}
|
||||
free(chn.a);
|
||||
regs.n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regs.n, regs.a);
|
||||
if (bwa_verbose >= 4) {
|
||||
err_printf("* %ld chains remain after removing duplicated chains\n", regs.n);
|
||||
for (i = 0; i < regs.n; ++i) {
|
||||
mem_alnreg_t *p = ®s.a[i];
|
||||
printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < regs.n; ++i) {
|
||||
mem_alnreg_t *p = ®s.a[i];
|
||||
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
|
||||
p->is_alt = 1;
|
||||
}
|
||||
return regs;
|
||||
}
|
||||
|
||||
mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, int l_query, const char *query_, const mem_alnreg_t *ar)
|
||||
{
|
||||
mem_aln_t a;
|
||||
|
|
@ -1193,77 +1338,267 @@ mem_aln_t mem_reg2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *
|
|||
return a;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
const mem_opt_t *opt;
|
||||
const bwt_t *bwt;
|
||||
const bntseq_t *bns;
|
||||
const uint8_t *pac;
|
||||
const mem_pestat_t *pes;
|
||||
smem_aux_t **aux;
|
||||
bseq1_t *seqs;
|
||||
mem_alnreg_v *regs;
|
||||
int64_t n_processed;
|
||||
} worker_t;
|
||||
|
||||
static void worker1(void *data, long i, int tid)
|
||||
{
|
||||
worker_t *w = (worker_t*)data;
|
||||
if (!(w->opt->flag&MEM_F_PE)) {
|
||||
if (bwa_verbose >= 4) printf("=====> Processing read '%s' <=====\n", w->seqs[i].name);
|
||||
w->regs[i] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i].l_seq, w->seqs[i].seq, w->aux[tid]);
|
||||
} else {
|
||||
if (bwa_verbose >= 4) printf("=====> Processing read '%s'/1 <=====\n", w->seqs[i<<1|0].name);
|
||||
w->regs[i<<1|0] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|0].l_seq, w->seqs[i<<1|0].seq, w->aux[tid]);
|
||||
if (bwa_verbose >= 4) printf("=====> Processing read '%s'/2 <=====\n", w->seqs[i<<1|1].name);
|
||||
w->regs[i<<1|1] = mem_align1_core(w->opt, w->bwt, w->bns, w->pac, w->seqs[i<<1|1].l_seq, w->seqs[i<<1|1].seq, w->aux[tid]);
|
||||
}
|
||||
static inline int cal_sub(const mem_opt_t* opt, mem_alnreg_v* r) {
|
||||
int j;
|
||||
for (j = 1; j < r->n; ++j) { // choose unique alignment
|
||||
int b_max = r->a[j].qb > r->a[0].qb ? r->a[j].qb : r->a[0].qb;
|
||||
int e_min = r->a[j].qe < r->a[0].qe ? r->a[j].qe : r->a[0].qe;
|
||||
if (e_min > b_max) { // have overlap
|
||||
int min_l = r->a[j].qe - r->a[j].qb < r->a[0].qe - r->a[0].qb ? r->a[j].qe - r->a[j].qb : r->a[0].qe - r->a[0].qb;
|
||||
if (e_min - b_max >= min_l * opt->mask_level)
|
||||
break; // significant overlap
|
||||
}
|
||||
}
|
||||
return j < r->n ? r->a[j].score : opt->min_seed_len * opt->a;
|
||||
}
|
||||
|
||||
static void worker2(void *data, long i, int tid)
|
||||
static inline int mem_infer_dir(int64_t l_pac, int64_t b1, int64_t b2, int64_t* dist) {
|
||||
int64_t p2;
|
||||
int r1 = (b1 >= l_pac), r2 = (b2 >= l_pac);
|
||||
p2 = r1 == r2 ? b2 : (l_pac << 1) - 1 - b2; // p2 is the coordinate of read 2 on the read 1 strand
|
||||
*dist = p2 > b1 ? p2 - b1 : b1 - p2;
|
||||
return (r1 == r2 ? 0 : 1) ^ (p2 > b1 ? 0 : 3);
|
||||
}
|
||||
|
||||
// mem主要流程
|
||||
void mem_core_process(const mem_opt_t* opt, const bwt_t* bwt, const HybridIndex* hyb, const bntseq_t* bns, const uint8_t* pac, bseq1_t* seq_arr,
|
||||
int nseq, smem_aux_t* aux, void* seed_arr, mem_chain_v* chain_arr, mem_alnreg_v* reg_arr, int calc_isize, int64_t l_pac,
|
||||
uint64_v* isize, int tid) {
|
||||
int i, j, l_seq;
|
||||
mem_chain_v* chnp;
|
||||
mem_alnreg_v* regp;
|
||||
char* seq;
|
||||
|
||||
if (opt->use_bwt) {
|
||||
smem_v *smem_arr = (smem_v*)seed_arr;
|
||||
// 1. seeding
|
||||
PROF_START(seed_all);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
for (j = 0; j < l_seq; ++j) {
|
||||
seq[j] = seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]];
|
||||
}
|
||||
find_smem(opt, bwt, l_seq, (uint8_t*)seq, aux, &smem_arr[i], tid);
|
||||
}
|
||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
||||
|
||||
// 2. chain
|
||||
PROF_START(chain_all);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
chnp = chain_arr + i;
|
||||
PROF_START(gen_chain);
|
||||
generate_chain(opt, bwt, bns, l_seq, (uint8_t*)seq, smem_arr[i].mem, chnp, tid);
|
||||
PROF_END(tprof[T_GEN_CHAIN][tid], gen_chain);
|
||||
PROF_START(flt_chain);
|
||||
chnp->n = mem_chain_flt(opt, chnp->n, chnp->a);
|
||||
PROF_END(tprof[T_FLT_CHAIN][tid], flt_chain);
|
||||
PROF_START(flt_chained_seeds);
|
||||
mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a);
|
||||
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
|
||||
if (bwa_verbose >= 4) mem_print_chain(bns, chnp);
|
||||
}
|
||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
||||
} else {
|
||||
HybSeedArr* seeds = (HybSeedArr*)seed_arr;
|
||||
// 1. seeding
|
||||
PROF_START(seed_all);
|
||||
RangeArr read_ranges = {0};
|
||||
RangeArr seeds_ranges = {0};
|
||||
Range init_range = {0};
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
uint8_t* reverse_seq = aux->reverse_seq->a;
|
||||
uint8_t* for_bits = aux->for_bits->a;
|
||||
uint8_t* back_bits = aux->back_bits->a;
|
||||
|
||||
read_ranges.n = 0;
|
||||
seeds_ranges.n = 0;
|
||||
int last_N = -1;
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
|
||||
for (j = 0; j < l_seq; ++j) {
|
||||
seq[j] = (uint8_t)(seq[j] < 4 ? seq[j] : nst_nt4_table[(int)seq[j]]);
|
||||
if (seq[j] >= 4) { // N
|
||||
reverse_seq[l_seq - 1 - j] = seq[j];
|
||||
if (last_N + 1 < j) {
|
||||
const Range range = {last_N + 1, j, j - last_N - 1};
|
||||
kv_push(Range, read_ranges, range);
|
||||
kv_push(Range, seeds_ranges, init_range);
|
||||
}
|
||||
last_N = j;
|
||||
} else {
|
||||
reverse_seq[l_seq - 1 - j] = 3 - seq[j];
|
||||
}
|
||||
}
|
||||
if (last_N + 1 < j) {
|
||||
const Range range = {last_N + 1, j, j - last_N - 1};
|
||||
kv_push(Range, read_ranges, range);
|
||||
kv_push(Range, seeds_ranges, init_range);
|
||||
}
|
||||
create_seq_fb_bits((uint8_t*)seq, l_seq, for_bits, back_bits);
|
||||
ReadSeq read_seq = {l_seq, (uint8_t*)seq, reverse_seq, for_bits, back_bits, aux->seq_id};
|
||||
++aux->seq_id;
|
||||
hyb_seeding(opt, hyb, &read_seq, &read_ranges, &seeds_ranges, &seeds[i], aux->seq_id, tid);
|
||||
}
|
||||
kv_destroy(read_ranges);
|
||||
kv_destroy(seeds_ranges);
|
||||
PROF_END(tprof[T_SEED_ALL][tid], seed_all);
|
||||
|
||||
// 2. chain
|
||||
PROF_START(chain_all);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
chnp = chain_arr + i;
|
||||
PROF_START(gen_chain);
|
||||
hyb_generate_chain(opt, hyb, bns, l_seq, (uint8_t*)seq, &seeds[i], chnp, tid);
|
||||
PROF_END(tprof[T_GEN_CHAIN][tid], gen_chain);
|
||||
PROF_START(flt_chain);
|
||||
chnp->n = mem_chain_flt(opt, chnp->n, chnp->a);
|
||||
PROF_END(tprof[T_FLT_CHAIN][tid], flt_chain);
|
||||
PROF_START(flt_chained_seeds);
|
||||
mem_flt_chained_seeds(opt, bns, pac, l_seq, (uint8_t*)seq, chnp->n, chnp->a);
|
||||
PROF_END(tprof[T_FLT_CHANNED_SEEDS][tid], flt_chained_seeds);
|
||||
if (bwa_verbose >= 4)
|
||||
mem_print_chain(bns, chnp);
|
||||
}
|
||||
PROF_END(tprof[T_CHAIN_ALL][tid], chain_all);
|
||||
}
|
||||
|
||||
// 3. align
|
||||
PROF_START(aln_all);
|
||||
for (i = 0; i < nseq; ++i) {
|
||||
seq = seq_arr[i].seq;
|
||||
l_seq = seq_arr[i].l_seq;
|
||||
chnp = chain_arr + i;
|
||||
regp = reg_arr + i;
|
||||
kv_init(*regp);
|
||||
|
||||
for (j = 0; j < chnp->n; ++j) {
|
||||
mem_chain_t* p = &chnp->a[j];
|
||||
if (bwa_verbose >= 4)
|
||||
err_printf("* ---> Processing chain(%d) <---\n", j);
|
||||
mem_chain2aln(opt, bns, pac, l_seq, (uint8_t*)seq, p, regp, aux, tid);
|
||||
free(chnp->a[j].seeds);
|
||||
}
|
||||
|
||||
free(chnp->a);
|
||||
chnp->m = 0;
|
||||
chnp->a = 0;
|
||||
regp->n = mem_sort_dedup_patch(opt, bns, pac, (uint8_t*)seq, regp->n, regp->a);
|
||||
if (bwa_verbose >= 4) {
|
||||
err_printf("* %ld chains remain after removing duplicated chains\n", regp->n);
|
||||
for (j = 0; j < regp->n; ++j) {
|
||||
mem_alnreg_t* p = ®p->a[j];
|
||||
printf("** %d, [%d,%d) <=> [%ld,%ld)\n", p->score, p->qb, p->qe, (long)p->rb, (long)p->re);
|
||||
}
|
||||
}
|
||||
for (j = 0; j < regp->n; ++j) {
|
||||
mem_alnreg_t* p = ®p->a[j];
|
||||
if (p->rid >= 0 && bns->anns[p->rid].is_alt)
|
||||
p->is_alt = 1;
|
||||
}
|
||||
}
|
||||
PROF_END(tprof[T_ALN_ALL][tid], aln_all);
|
||||
|
||||
// 4. calc insert size
|
||||
#define MIN_RATIO 0.8
|
||||
if (calc_isize) {
|
||||
PROF_START(ins_size);
|
||||
for (i = 0; i < nseq >> 1; ++i) {
|
||||
int dir;
|
||||
int64_t is;
|
||||
mem_alnreg_v* r[2];
|
||||
r[0] = (mem_alnreg_v*)®_arr[i << 1 | 0];
|
||||
r[1] = (mem_alnreg_v*)®_arr[i << 1 | 1];
|
||||
if (r[0]->n == 0 || r[1]->n == 0)
|
||||
continue;
|
||||
if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score)
|
||||
continue;
|
||||
if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score)
|
||||
continue;
|
||||
if (r[0]->a[0].rid != r[1]->a[0].rid)
|
||||
continue; // not on the same chr
|
||||
dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is);
|
||||
if (is && is <= opt->max_ins)
|
||||
kv_push(uint64_t, isize[dir], is);
|
||||
}
|
||||
PROF_END(tprof[T_INS_SIZE][tid], ins_size);
|
||||
}
|
||||
}
|
||||
|
||||
static void worker_smem_align(void *data, long i, int tid)
|
||||
{
|
||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2]);
|
||||
mem_worker_t *w = (mem_worker_t*)data;
|
||||
int start = i * w->opt->batch_size;
|
||||
int end = MIN(start + w->opt->batch_size, w->n_reads);
|
||||
mem_core_process(w->opt, w->bwt, w->hyb, w->bns, w->pac, w->seqs + start, end - start, w->aux[tid], w->smem_arr[tid], w->chain_arr[tid], w->regs + start,
|
||||
w->calc_isize, w->bns->l_pac, w->isize_arr[tid], tid);
|
||||
}
|
||||
|
||||
static void worker_sam(void *data, long i, int tid)
|
||||
{
|
||||
extern int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], seq_sam_t ss[2], int tid);
|
||||
extern void mem_reg2ovlp(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a);
|
||||
worker_t *w = (worker_t*)data;
|
||||
mem_worker_t *w = (mem_worker_t*)data;
|
||||
if (!(w->opt->flag&MEM_F_PE)) {
|
||||
if (bwa_verbose >= 4) printf("=====> Finalizing read '%s' <=====\n", w->seqs[i].name);
|
||||
mem_mark_primary_se(w->opt, w->regs[i].n, w->regs[i].a, w->n_processed + i);
|
||||
if (w->opt->flag & MEM_F_PRIMARY5) mem_reorder_primary5(w->opt->T, &w->regs[i]);
|
||||
mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0);
|
||||
free(w->regs[i].a);
|
||||
mem_reg2sam(w->opt, w->bns, w->pac, &w->seqs[i], &w->regs[i], 0, 0, &w->sams[i]);
|
||||
free(w->regs[i].a);
|
||||
} else {
|
||||
if (bwa_verbose >= 4) printf("=====> Finalizing read pair '%s' <=====\n", w->seqs[i<<1|0].name);
|
||||
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1]);
|
||||
mem_sam_pe(w->opt, w->bns, w->pac, w->pes, (w->n_processed>>1) + i, &w->seqs[i<<1], &w->regs[i<<1], &w->sams[i<<1], tid);
|
||||
free(w->regs[i<<1|0].a); free(w->regs[i<<1|1].a);
|
||||
}
|
||||
}
|
||||
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0)
|
||||
{
|
||||
extern void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
|
||||
worker_t w;
|
||||
void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed, int n, bseq1_t* seqs, const mem_pestat_t* pes0, seq_sam_t* sams) {
|
||||
extern void kt_for(int n_threads, void (*func)(void*,long,int), void *data, long n);
|
||||
mem_pestat_t pes[4];
|
||||
double ctime, rtime;
|
||||
int i;
|
||||
int n_batch = (n + opt->batch_size - 1) / opt->batch_size;
|
||||
|
||||
ctime = cputime(); rtime = realtime();
|
||||
global_bns = bns;
|
||||
w.regs = malloc(n * sizeof(mem_alnreg_v));
|
||||
w.opt = opt; w.bwt = bwt; w.bns = bns; w.pac = pac;
|
||||
w.seqs = seqs; w.n_processed = n_processed;
|
||||
w.pes = &pes[0];
|
||||
w.aux = malloc(opt->n_threads * sizeof(smem_aux_t*));
|
||||
for (i = 0; i < opt->n_threads; ++i)
|
||||
w.aux[i] = smem_aux_init();
|
||||
kt_for(opt->n_threads, worker1, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // find mapping positions
|
||||
for (i = 0; i < opt->n_threads; ++i)
|
||||
smem_aux_destroy(w.aux[i]);
|
||||
free(w.aux);
|
||||
ctime = cputime(); rtime = realtime();
|
||||
global_bns = w->bns;
|
||||
|
||||
w->opt = opt;
|
||||
if (w->n < n) {
|
||||
w->n = n;
|
||||
w->regs = (mem_alnreg_v*)realloc(w->regs, n * sizeof(mem_alnreg_v));
|
||||
}
|
||||
w->seqs = seqs;
|
||||
w->n_processed = n_processed;
|
||||
w->sams = sams;
|
||||
w->n_reads = n;
|
||||
w->pes = &pes[0];
|
||||
|
||||
if ((opt->flag & MEM_F_PE) && !pes0) { // infer insert sizes if not provided
|
||||
int i, j;
|
||||
w->calc_isize = 1;
|
||||
for (i = 0; i < opt->n_threads; ++i)
|
||||
for (j = 0; j < 4; ++j) w->isize_arr[i][j].n = 0;
|
||||
}
|
||||
|
||||
PROF_START(kernel);
|
||||
kt_for(opt->n_threads, worker_smem_align, w, n_batch); // find mapping positions
|
||||
PROF_END(gprof[G_MEM_KERNEL], kernel);
|
||||
|
||||
PROF_START(pestat);
|
||||
if (opt->flag&MEM_F_PE) { // infer insert sizes if not provided
|
||||
if (pes0) memcpy(pes, pes0, 4 * sizeof(mem_pestat_t)); // if pes0 != NULL, set the insert-size distribution as pes0
|
||||
else mem_pestat(opt, bns->l_pac, n, w.regs, pes); // otherwise, infer the insert size distribution from data
|
||||
else mem_pestat(opt, w->bns->l_pac, n, w->isize_arr, pes); // otherwise, infer the insert size distribution from data
|
||||
}
|
||||
kt_for(opt->n_threads, worker2, &w, (opt->flag&MEM_F_PE)? n>>1 : n); // generate alignment
|
||||
free(w.regs);
|
||||
PROF_END(gprof[G_MEM_PESTAT], pestat);
|
||||
|
||||
PROF_START(mem_sam);
|
||||
kt_for(opt->n_threads, worker_sam, w, (opt->flag & MEM_F_PE) ? n >> 1 : n); // generate alignment
|
||||
PROF_END(gprof[G_MEM_SAM], mem_sam);
|
||||
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] Processed %d reads in %.3f CPU sec, %.3f real sec\n", __func__, n, cputime() - ctime, realtime() - rtime);
|
||||
}
|
||||
|
|
|
|||
72
bwamem.h
72
bwamem.h
|
|
@ -27,9 +27,11 @@
|
|||
#ifndef BWAMEM_H_
|
||||
#define BWAMEM_H_
|
||||
|
||||
#include "bwt.h"
|
||||
#include "bntseq.h"
|
||||
#include "bwa.h"
|
||||
#include "bwt.h"
|
||||
#include "hyb_idx.h"
|
||||
#include "utils.h"
|
||||
|
||||
#define MEM_MAPQ_COEF 30.0
|
||||
#define MEM_MAPQ_MAX 60
|
||||
|
|
@ -126,9 +128,68 @@ typedef struct { // This struct is only used for the convenience of API.
|
|||
int score, sub, alt_sc;
|
||||
} mem_aln_t;
|
||||
|
||||
typedef struct {
|
||||
int64_t rbeg;
|
||||
int32_t qbeg, len;
|
||||
int score;
|
||||
} mem_seed_t; // unaligned memory
|
||||
|
||||
typedef struct {
|
||||
int n, m, first, rid;
|
||||
uint32_t w : 29, kept : 2, is_alt : 1;
|
||||
float frac_rep;
|
||||
int64_t pos;
|
||||
mem_seed_t* seeds;
|
||||
} mem_chain_t;
|
||||
|
||||
typedef struct {
|
||||
size_t n, m;
|
||||
mem_chain_t* a;
|
||||
} mem_chain_v;
|
||||
|
||||
typedef kvec_t(uint8_t) byte_v;
|
||||
typedef kvec_t(byte_v) byte_vv;
|
||||
|
||||
typedef struct {
|
||||
bwtintv_v mem, mem1, *tmpv[2];
|
||||
buf_t *sw_buf, *seq_buf;
|
||||
byte_v* byte_seq;
|
||||
byte_v* reverse_seq;
|
||||
byte_v* for_bits;
|
||||
byte_v* back_bits;
|
||||
uint64_t seq_id;
|
||||
} smem_aux_t;
|
||||
|
||||
typedef struct {
|
||||
bwtintv_v mem;
|
||||
uint64_v pos_arr;
|
||||
} smem_v;
|
||||
|
||||
typedef struct {
|
||||
int calc_isize;
|
||||
const mem_opt_t* opt;
|
||||
const bwt_t* bwt;
|
||||
const HybridIndex* hyb;
|
||||
const bntseq_t* bns;
|
||||
const uint8_t* pac;
|
||||
const mem_pestat_t* pes;
|
||||
smem_aux_t** aux;
|
||||
bseq1_t* seqs;
|
||||
seq_sam_t* sams;
|
||||
smem_v** smem_arr;
|
||||
HybSeedArr** seed_arr;
|
||||
mem_chain_v** chain_arr;
|
||||
mem_alnreg_v* regs;
|
||||
uint64_v** isize_arr;
|
||||
int64_t n_processed;
|
||||
int64_t n;
|
||||
int64_t n_reads;
|
||||
} mem_worker_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
mem_worker_t *init_mem_worker(const mem_opt_t *opt, const bwt_t *bwt, const HybridIndex *hyb, const bntseq_t *bns, const uint8_t *pac);
|
||||
|
||||
smem_i *smem_itr_init(const bwt_t *bwt);
|
||||
void smem_itr_destroy(smem_i *itr);
|
||||
|
|
@ -161,9 +222,10 @@ extern "C" {
|
|||
* @param pes0 insert-size info; if NULL, infer from data; if not NULL, it should be an array with 4 elements,
|
||||
* corresponding to each FF, FR, RF and RR orientation. See mem_pestat() for more info.
|
||||
*/
|
||||
void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
|
||||
// void mem_process_seqs(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int64_t n_processed, int n, bseq1_t *seqs, const mem_pestat_t *pes0);
|
||||
void mem_process_seqs(const mem_opt_t* opt, mem_worker_t* w, int64_t n_processed, int n, bseq1_t* seqs, const mem_pestat_t* pes0, seq_sam_t* sams);
|
||||
|
||||
/**
|
||||
/**
|
||||
* Find the aligned regions for one query sequence
|
||||
*
|
||||
* Note that this routine does not generate CIGAR. CIGAR should be
|
||||
|
|
@ -207,10 +269,10 @@ extern "C" {
|
|||
* @param regs region array of size $n; 2i-th and (2i+1)-th elements constitute a pair
|
||||
* @param pes inferred insert size distribution (output)
|
||||
*/
|
||||
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4]);
|
||||
void mem_pestat(const mem_opt_t* opt, int64_t l_pac, int n, uint64_v** isize_arr, mem_pestat_t pes[4]);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -101,14 +101,14 @@ const bwtintv_v *smem_next(smem_i *itr)
|
|||
|
||||
mem_alnreg_v mem_align1(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, const char *seq_)
|
||||
{ // the difference from mem_align1_core() is that this routine: 1) calls mem_mark_primary_se(); 2) does not modify the input sequence
|
||||
extern mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf);
|
||||
// extern mem_alnreg_v mem_align1_core(const mem_opt_t *opt, const bwt_t *bwt, const bntseq_t *bns, const uint8_t *pac, int l_seq, char *seq, void *buf);
|
||||
extern void mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||
mem_alnreg_v ar;
|
||||
mem_alnreg_v ar = {0,0,0};
|
||||
char *seq;
|
||||
seq = malloc(l_seq);
|
||||
memcpy(seq, seq_, l_seq); // makes a copy of seq_
|
||||
ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq, 0);
|
||||
mem_mark_primary_se(opt, ar.n, ar.a, lrand48());
|
||||
// ar = mem_align1_core(opt, bwt, bns, pac, l_seq, seq, 0);
|
||||
// mem_mark_primary_se(opt, ar.n, ar.a, lrand48());
|
||||
free(seq);
|
||||
return ar;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -69,26 +69,19 @@ static int cal_sub(const mem_opt_t *opt, mem_alnreg_v *r)
|
|||
return j < r->n? r->a[j].score : opt->min_seed_len * opt->a;
|
||||
}
|
||||
|
||||
void mem_pestat(const mem_opt_t *opt, int64_t l_pac, int n, const mem_alnreg_v *regs, mem_pestat_t pes[4])
|
||||
{
|
||||
int i, d, max;
|
||||
void mem_pestat(const mem_opt_t* opt, int64_t l_pac, int n, uint64_v** isize_arr, mem_pestat_t pes[4]) {
|
||||
int i, j, d, max;
|
||||
uint64_v isize[4];
|
||||
memset(pes, 0, 4 * sizeof(mem_pestat_t));
|
||||
memset(isize, 0, sizeof(kvec_t(int)) * 4);
|
||||
for (i = 0; i < n>>1; ++i) {
|
||||
int dir;
|
||||
int64_t is;
|
||||
mem_alnreg_v *r[2];
|
||||
r[0] = (mem_alnreg_v*)®s[i<<1|0];
|
||||
r[1] = (mem_alnreg_v*)®s[i<<1|1];
|
||||
if (r[0]->n == 0 || r[1]->n == 0) continue;
|
||||
if (cal_sub(opt, r[0]) > MIN_RATIO * r[0]->a[0].score) continue;
|
||||
if (cal_sub(opt, r[1]) > MIN_RATIO * r[1]->a[0].score) continue;
|
||||
if (r[0]->a[0].rid != r[1]->a[0].rid) continue; // not on the same chr
|
||||
dir = mem_infer_dir(l_pac, r[0]->a[0].rb, r[1]->a[0].rb, &is);
|
||||
if (is && is <= opt->max_ins) kv_push(uint64_t, isize[dir], is);
|
||||
}
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
|
||||
for (i = 0; i < opt->n_threads; ++i) {
|
||||
for (d = 0; d < 4; ++d) {
|
||||
for (j = 0; j < isize_arr[i][d].n; ++j) {
|
||||
kv_push(uint64_t, isize[d], isize_arr[i][d].a[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[M::%s] # candidate unique pairs for (FF, FR, RF, RR): (%ld, %ld, %ld, %ld)\n", __func__, isize[0].n, isize[1].n, isize[2].n, isize[3].n);
|
||||
for (d = 0; d < 4; ++d) { // TODO: this block is nearly identical to the one in bwtsw2_pair.c. It would be better to merge these two.
|
||||
mem_pestat_t *r = &pes[d];
|
||||
uint64_v *q = &isize[d];
|
||||
|
|
@ -273,11 +266,11 @@ void mem_reorder_primary5(int T, mem_alnreg_v *a);
|
|||
|
||||
#define raw_mapq(diff, a) ((int)(6.02 * (diff) / (a) + .499))
|
||||
|
||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2])
|
||||
int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_pestat_t pes[4], uint64_t id, bseq1_t s[2], mem_alnreg_v a[2], seq_sam_t ss[2], int tid)
|
||||
{
|
||||
extern int mem_mark_primary_se(const mem_opt_t *opt, int n, mem_alnreg_t *a, int64_t id);
|
||||
extern int mem_approx_mapq_se(const mem_opt_t *opt, const mem_alnreg_t *a);
|
||||
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m);
|
||||
extern void mem_reg2sam(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, bseq1_t *s, mem_alnreg_v *a, int extra_flag, const mem_aln_t *m, seq_sam_t *ss);
|
||||
extern char **mem_gen_alt(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, const mem_alnreg_v *a, int l_query, const char *query);
|
||||
|
||||
int n = 0, i, j, z[2], o, subo, n_sub, extra_flag = 1, n_pri[2], n_aa[2];
|
||||
|
|
@ -288,7 +281,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
memset(h, 0, sizeof(mem_aln_t) * 2);
|
||||
memset(g, 0, sizeof(mem_aln_t) * 2);
|
||||
n_aa[0] = n_aa[1] = 0;
|
||||
if (!(opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment
|
||||
PROF_START(matesw);
|
||||
if (!(opt->flag & MEM_F_NO_RESCUE)) { // then perform SW for the best alignment
|
||||
mem_alnreg_v b[2];
|
||||
kv_init(b[0]); kv_init(b[1]);
|
||||
for (i = 0; i < 2; ++i)
|
||||
|
|
@ -300,7 +294,8 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
n += mem_matesw(opt, bns, pac, pes, &b[i].a[j], s[!i].l_seq, (uint8_t*)s[!i].seq, &a[!i]);
|
||||
free(b[0].a); free(b[1].a);
|
||||
}
|
||||
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
|
||||
PROF_END(tprof[T_SAM_MATESW][tid], matesw);
|
||||
n_pri[0] = mem_mark_primary_se(opt, a[0].n, a[0].a, id<<1|0);
|
||||
n_pri[1] = mem_mark_primary_se(opt, a[1].n, a[1].a, id<<1|1);
|
||||
if (opt->flag & MEM_F_PRIMARY5) {
|
||||
mem_reorder_primary5(opt->T, &a[0]);
|
||||
|
|
@ -363,8 +358,10 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
} else XA[0] = XA[1] = 0;
|
||||
// write SAM
|
||||
for (i = 0; i < 2; ++i) {
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]);
|
||||
h[i].mapq = q_se[i];
|
||||
PROF_START(reg2aln);
|
||||
h[i] = mem_reg2aln(opt, bns, pac, s[i].l_seq, s[i].seq, &a[i].a[z[i]]);
|
||||
PROF_END(tprof[T_SAM_REG2ALN][tid], reg2aln);
|
||||
h[i].mapq = q_se[i];
|
||||
h[i].flag |= 0x40<<i | extra_flag;
|
||||
h[i].XA = XA[i]? XA[i][z[i]] : 0;
|
||||
aa[i][n_aa[i]++] = h[i];
|
||||
|
|
@ -377,12 +374,12 @@ int mem_sam_pe(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac, co
|
|||
aa[i][n_aa[i]++] = g[i];
|
||||
}
|
||||
}
|
||||
for (i = 0; i < n_aa[0]; ++i)
|
||||
mem_aln2sam(opt, bns, &str, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
|
||||
s[0].sam = strdup(str.s); str.l = 0;
|
||||
for (i = 0; i < n_aa[1]; ++i)
|
||||
mem_aln2sam(opt, bns, &str, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
|
||||
s[1].sam = str.s;
|
||||
ss[0].sam.l = 0;
|
||||
for (i = 0; i < n_aa[0]; ++i)
|
||||
mem_aln2sam(opt, bns, &ss[0].sam, &s[0], n_aa[0], aa[0], i, &h[1]); // write read1 hits
|
||||
ss[1].sam.l = 0;
|
||||
for (i = 0; i < n_aa[1]; ++i)
|
||||
mem_aln2sam(opt, bns, &ss[1].sam, &s[1], n_aa[1], aa[1], i, &h[0]); // write read2 hits
|
||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
// free
|
||||
for (i = 0; i < 2; ++i) {
|
||||
|
|
@ -411,9 +408,9 @@ no_pairing:
|
|||
d = mem_infer_dir(bns->l_pac, a[0].a[0].rb, a[1].a[0].rb, &dist);
|
||||
if (!pes[d].failed && dist >= pes[d].low && dist <= pes[d].high) extra_flag |= 2;
|
||||
}
|
||||
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41|extra_flag, &h[1]);
|
||||
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81|extra_flag, &h[0]);
|
||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
mem_reg2sam(opt, bns, pac, &s[0], &a[0], 0x41 | extra_flag, &h[1], &ss[0]);
|
||||
mem_reg2sam(opt, bns, pac, &s[1], &a[1], 0x81 | extra_flag, &h[0], &ss[1]);
|
||||
if (strcmp(s[0].name, s[1].name) != 0) err_fatal(__func__, "paired reads have different names: \"%s\", \"%s\"\n", s[0].name, s[1].name);
|
||||
free(h[0].cigar); free(h[1].cigar);
|
||||
return n;
|
||||
}
|
||||
|
|
|
|||
12
bwtindex.c
12
bwtindex.c
|
|
@ -266,6 +266,7 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s
|
|||
|
||||
{ // nucleotide indexing
|
||||
gzFile fp = xzopen(fa, "r");
|
||||
start_async_read(fp);
|
||||
t = clock();
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
||||
|
|
@ -280,8 +281,9 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s
|
|||
//exit(0);
|
||||
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
err_gzclose(fp);
|
||||
}
|
||||
stop_async_read(fp);
|
||||
err_gzclose(fp);
|
||||
}
|
||||
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
||||
{
|
||||
strcpy(str, prefix); strcat(str, ".pac");
|
||||
|
|
@ -310,11 +312,13 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s
|
|||
}
|
||||
{
|
||||
gzFile fp = xzopen(fa, "r");
|
||||
t = clock();
|
||||
start_async_read(fp);
|
||||
t = clock();
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
||||
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
err_gzclose(fp);
|
||||
stop_async_read(fp);
|
||||
err_gzclose(fp);
|
||||
}
|
||||
{
|
||||
bwt_t *bwt;
|
||||
|
|
|
|||
13
bwtsw2_aux.c
13
bwtsw2_aux.c
|
|
@ -732,8 +732,10 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
|||
uint8_t *pac;
|
||||
bsw2seq_t *_seq;
|
||||
bseq1_t *bseq;
|
||||
int64_t seq_size = 0;
|
||||
int m = 0;
|
||||
|
||||
pac = calloc(bns->l_pac/4+1, 1);
|
||||
pac = calloc(bns->l_pac/4+1, 1);
|
||||
for (l = 0; l < bns->n_seqs; ++l)
|
||||
err_printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
|
||||
err_fread_noeof(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||
|
|
@ -745,13 +747,14 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
|||
ks2 = kseq_init(fp2);
|
||||
is_pe = 1;
|
||||
} else fp2 = 0, ks2 = 0, is_pe = 0;
|
||||
while ((bseq = bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2)) != 0) {
|
||||
bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2, 1, &seq_size, &m, &bseq);
|
||||
while (n > 0) {
|
||||
int size = 0;
|
||||
if (n > _seq->max) {
|
||||
_seq->max = n;
|
||||
kroundup32(_seq->max);
|
||||
_seq->seq = realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t));
|
||||
}
|
||||
_seq->seq = (bsw2seq1_t*)realloc(_seq->seq, _seq->max * sizeof(bsw2seq1_t));
|
||||
}
|
||||
_seq->n = n;
|
||||
for (i = 0; i < n; ++i) {
|
||||
bseq1_t *b = &bseq[i];
|
||||
|
|
@ -761,8 +764,8 @@ void bsw2_aln(const bsw2opt_t *opt, const bntseq_t *bns, bwt_t * const target, c
|
|||
size += p->l;
|
||||
}
|
||||
fprintf(stderr, "[bsw2_aln] read %d sequences/pairs (%d bp) ...\n", n, size);
|
||||
free(bseq);
|
||||
process_seqs(_seq, opt, bns, pac, target, is_pe);
|
||||
bseq_read(opt->chunk_size * opt->n_threads, &n, ks, ks2, 1, &seq_size, &m, &bseq);
|
||||
}
|
||||
// free
|
||||
free(pac);
|
||||
|
|
|
|||
2
debug.h
2
debug.h
|
|
@ -10,7 +10,7 @@
|
|||
|
||||
////////////////// for debug and test //////////////////////////
|
||||
|
||||
#define DEBUG_FILE_OUTPUT // 打开gfp1-4文件,并记录debug信息
|
||||
// #define DEBUG_FILE_OUTPUT // 打开gfp1-4文件,并记录debug信息
|
||||
// #define COUNT_SEED_LENGTH // 记录seed匹配数量降低到1时的长度,以及最终扩展的长度
|
||||
// #define GET_FULL_MATCH_READ // 获取完全匹配的reads
|
||||
// #define COUNT_CALC_NUM // 统计BSW的剪枝后的计算量和未剪枝前的计算量
|
||||
|
|
|
|||
447
fastmap.c
447
fastmap.c
|
|
@ -24,20 +24,27 @@
|
|||
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
*/
|
||||
#include <zlib.h>
|
||||
#include <ctype.h>
|
||||
#include <limits.h>
|
||||
#include <math.h>
|
||||
#include <pthread.h>
|
||||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <unistd.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "bntseq.h"
|
||||
#include "bwa.h"
|
||||
#include "bwamem.h"
|
||||
#include "kvec.h"
|
||||
#include "utils.h"
|
||||
#include "bntseq.h"
|
||||
#include "debug.h"
|
||||
#include "hyb_idx.h"
|
||||
#include "kseq.h"
|
||||
#include "kvec.h"
|
||||
#include "profiling.h"
|
||||
#include "share_mem.h"
|
||||
#include "utils.h"
|
||||
#include "yarn.h"
|
||||
KSEQ_DECLARE(gzFile)
|
||||
|
||||
extern unsigned char nst_nt4_table[256];
|
||||
|
|
@ -45,83 +52,242 @@ extern unsigned char nst_nt4_table[256];
|
|||
void *kopen(const char *fn, int *_fd);
|
||||
int kclose(void *a);
|
||||
void kt_pipeline(int n_threads, void *(*func)(void*, int, void*), void *shared_data, int n_steps);
|
||||
HybridIndex* bwa_hyb_idx_load_from_shm(const char* idx_prefix);
|
||||
HybridIndex* bwa_hyb_idx_load_from_disk(const char* idx_prefix);
|
||||
|
||||
typedef struct {
|
||||
kseq_t *ks, *ks2;
|
||||
mem_opt_t *opt;
|
||||
mem_pestat_t *pes0;
|
||||
int64_t n_processed;
|
||||
int copy_comment, actual_chunk_size;
|
||||
bwaidx_t *idx;
|
||||
} ktp_aux_t;
|
||||
|
||||
typedef struct {
|
||||
ktp_aux_t *aux;
|
||||
int n_seqs;
|
||||
bseq1_t *seqs;
|
||||
int n_seqs;
|
||||
int n_sams;
|
||||
int m_seqs;
|
||||
int m_sams;
|
||||
bseq1_t* seqs;
|
||||
seq_sam_t* sams;
|
||||
} ktp_data_t;
|
||||
|
||||
static void *process(void *shared, int step, void *_data)
|
||||
{
|
||||
ktp_aux_t *aux = (ktp_aux_t*)shared;
|
||||
ktp_data_t *data = (ktp_data_t*)_data;
|
||||
int i;
|
||||
if (step == 0) {
|
||||
ktp_data_t *ret;
|
||||
int64_t size = 0;
|
||||
ret = calloc(1, sizeof(ktp_data_t));
|
||||
ret->seqs = bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2);
|
||||
if (ret->seqs == 0) {
|
||||
free(ret);
|
||||
return 0;
|
||||
}
|
||||
if (!aux->copy_comment)
|
||||
for (i = 0; i < ret->n_seqs; ++i) {
|
||||
free(ret->seqs[i].comment);
|
||||
ret->seqs[i].comment = 0;
|
||||
}
|
||||
for (i = 0; i < ret->n_seqs; ++i) size += ret->seqs[i].l_seq;
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size);
|
||||
return ret;
|
||||
} else if (step == 1) {
|
||||
const mem_opt_t *opt = aux->opt;
|
||||
const bwaidx_t *idx = aux->idx;
|
||||
if (opt->flag & MEM_F_SMARTPE) {
|
||||
bseq1_t *sep[2];
|
||||
int n_sep[2];
|
||||
mem_opt_t tmp_opt = *opt;
|
||||
bseq_classify(data->n_seqs, data->seqs, n_sep, sep);
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]);
|
||||
if (n_sep[0]) {
|
||||
tmp_opt.flag &= ~MEM_F_PE;
|
||||
mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, n_sep[0], sep[0], 0);
|
||||
for (i = 0; i < n_sep[0]; ++i)
|
||||
data->seqs[sep[0][i].id].sam = sep[0][i].sam;
|
||||
}
|
||||
if (n_sep[1]) {
|
||||
tmp_opt.flag |= MEM_F_PE;
|
||||
mem_process_seqs(&tmp_opt, idx->bwt, idx->bns, idx->pac, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0);
|
||||
for (i = 0; i < n_sep[1]; ++i)
|
||||
data->seqs[sep[1][i].id].sam = sep[1][i].sam;
|
||||
}
|
||||
free(sep[0]); free(sep[1]);
|
||||
} else mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, aux->n_processed, data->n_seqs, data->seqs, aux->pes0);
|
||||
aux->n_processed += data->n_seqs;
|
||||
return data;
|
||||
} else if (step == 2) {
|
||||
for (i = 0; i < data->n_seqs; ++i) {
|
||||
if (data->seqs[i].sam) err_fputs(data->seqs[i].sam, stdout);
|
||||
free(data->seqs[i].name); free(data->seqs[i].comment);
|
||||
free(data->seqs[i].seq); free(data->seqs[i].qual); free(data->seqs[i].sam);
|
||||
}
|
||||
free(data->seqs); free(data);
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
typedef struct {
|
||||
kseq_t *ks, *ks2;
|
||||
mem_opt_t* opt;
|
||||
mem_pestat_t* pes0;
|
||||
int64_t n_processed;
|
||||
int copy_comment, actual_chunk_size;
|
||||
bwaidx_t* idx;
|
||||
mem_worker_t* w;
|
||||
int data_idx; // pingpong buffer index
|
||||
ktp_data_t* data;
|
||||
int wbuf_size;
|
||||
char* wbuf;
|
||||
volatile int read_complete;
|
||||
volatile int calc_complete;
|
||||
long read_idx;
|
||||
long calc_idx;
|
||||
long write_idx;
|
||||
} ktp_aux_t;
|
||||
|
||||
///////////////////// new parallel pipeline ///////////////////
|
||||
|
||||
// read
|
||||
static inline void* read_data(ktp_aux_t* aux, ktp_data_t* data) {
|
||||
PROF_START(read);
|
||||
ktp_data_t* ret = aux->data + aux->data_idx;
|
||||
aux->data_idx = !aux->data_idx;
|
||||
int64_t size = 0;
|
||||
bseq_read(aux->actual_chunk_size, &ret->n_seqs, aux->ks, aux->ks2, aux->copy_comment, &size, &ret->m_seqs, &ret->seqs);
|
||||
PROF_END(gprof[G_READ], read);
|
||||
if (ret->n_seqs == 0) {
|
||||
return 0;
|
||||
}
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] read %d sequences (%ld bp)...\n", __func__, ret->n_seqs, (long)size);
|
||||
return ret;
|
||||
}
|
||||
|
||||
// calculate
|
||||
static inline void* calc_data(ktp_aux_t* aux, ktp_data_t* data) {
|
||||
PROF_START(compute);
|
||||
const mem_opt_t* opt = aux->opt;
|
||||
if (data->n_sams != data->n_seqs) {
|
||||
if (data->m_sams < data->m_seqs) {
|
||||
data->m_sams = data->m_seqs;
|
||||
data->sams = (seq_sam_t*)realloc(data->sams, data->m_sams * sizeof(seq_sam_t));
|
||||
memset(data->sams + data->n_sams, 0, (data->m_sams - data->n_sams) * sizeof(seq_sam_t));
|
||||
}
|
||||
data->n_sams = data->n_seqs;
|
||||
}
|
||||
if (opt->flag & MEM_F_SMARTPE) {
|
||||
// 这里应该是把pair-end数据都放在一个文件里了,需要先区分,这里没有内存优化,涉及较多的开辟和释放
|
||||
int i;
|
||||
bseq1_t* sep[2];
|
||||
seq_sam_t* ss[2];
|
||||
int n_sep[2];
|
||||
mem_opt_t tmp_opt = *opt;
|
||||
bseq_classify(data->n_seqs, data->seqs, n_sep, sep);
|
||||
ss[0] = (seq_sam_t*)calloc(0, n_sep[0] * sizeof(seq_sam_t));
|
||||
ss[1] = (seq_sam_t*)calloc(0, n_sep[1] * sizeof(seq_sam_t));
|
||||
if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] %d single-end sequences; %d paired-end sequences\n", __func__, n_sep[0], n_sep[1]);
|
||||
if (n_sep[0]) {
|
||||
tmp_opt.flag &= ~MEM_F_PE;
|
||||
mem_process_seqs(&tmp_opt, aux->w, aux->n_processed, n_sep[0], sep[0], 0, ss[0]);
|
||||
for (i = 0; i < n_sep[0]; ++i) data->sams[sep[0][i].id].sam = ss[0][i].sam;
|
||||
}
|
||||
if (n_sep[1]) {
|
||||
tmp_opt.flag |= MEM_F_PE;
|
||||
mem_process_seqs(&tmp_opt, aux->w, aux->n_processed + n_sep[0], n_sep[1], sep[1], aux->pes0, ss[1]);
|
||||
for (i = 0; i < n_sep[1]; ++i) data->sams[sep[1][i].id].sam = ss[1][i].sam;
|
||||
}
|
||||
free(sep[0]);
|
||||
free(sep[1]);
|
||||
free(ss[0]);
|
||||
free(ss[1]);
|
||||
} else
|
||||
mem_process_seqs(opt, aux->w, aux->n_processed, data->n_seqs, data->seqs, aux->pes0, data->sams);
|
||||
aux->n_processed += data->n_seqs;
|
||||
PROF_END(gprof[G_COMPUTE], compute);
|
||||
|
||||
return data;
|
||||
}
|
||||
|
||||
// write
|
||||
static inline void* write_data(ktp_aux_t* aux, ktp_data_t* data) {
|
||||
int i;
|
||||
PROF_START(write);
|
||||
int buf_written = 0;
|
||||
for (i = 0; i < data->n_sams; ++i) {
|
||||
const int slen = data->sams[i].sam.l;
|
||||
if (slen && (buf_written + slen) < aux->wbuf_size) {
|
||||
memcpy(&aux->wbuf[buf_written], data->sams[i].sam.s, slen);
|
||||
buf_written += slen;
|
||||
} else if (buf_written > 0) {
|
||||
err_fwrite(aux->wbuf, 1, buf_written, stdout);
|
||||
if ((buf_written + slen) >= aux->wbuf_size) {
|
||||
memcpy(&aux->wbuf[0], data->sams[i].sam.s, slen);
|
||||
buf_written = slen;
|
||||
} else {
|
||||
buf_written = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (buf_written > 0) {
|
||||
err_fwrite(aux->wbuf, 1, buf_written, stdout);
|
||||
}
|
||||
PROF_END(gprof[G_WRITE], write);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// io 异步,读和写不能同时
|
||||
static void* process(void* shared, int step, void* _data) {
|
||||
ktp_aux_t* aux = (ktp_aux_t*)shared;
|
||||
ktp_data_t* data = (ktp_data_t*)_data;
|
||||
if (step == 0) {
|
||||
return read_data(aux, data);
|
||||
} else if (step == 1) {
|
||||
return calc_data(aux, data);
|
||||
} else if (step == 2) {
|
||||
return write_data(aux, data);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
////////////// 读和写可以同时进行的pipeline
|
||||
static lock_t* input_have = NULL;
|
||||
static lock_t* output_have = NULL;
|
||||
|
||||
static void* thread_read(void* data) {
|
||||
ktp_aux_t* aux = (ktp_aux_t*)data;
|
||||
while (1) {
|
||||
POSSESS(input_have);
|
||||
WAIT_FOR(input_have, NOT_TO_BE, 0);
|
||||
RELEASE(input_have);
|
||||
if (read_data(aux, aux->data) == 0) {
|
||||
POSSESS(input_have);
|
||||
aux->read_complete = 1;
|
||||
TWIST(input_have, BY, -1);
|
||||
break;
|
||||
}
|
||||
POSSESS(input_have);
|
||||
aux->read_idx++;
|
||||
TWIST(input_have, BY, -1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void* thread_calc(void* data) {
|
||||
ktp_aux_t* aux = (ktp_aux_t*)data;
|
||||
int d_idx = 0;
|
||||
int add_idx = 0;
|
||||
while (1) {
|
||||
POSSESS(input_have);
|
||||
WAIT_FOR(input_have, NOT_TO_BE, 2);
|
||||
RELEASE(input_have);
|
||||
|
||||
POSSESS(output_have);
|
||||
WAIT_FOR(output_have, NOT_TO_BE, 2);
|
||||
RELEASE(output_have);
|
||||
|
||||
if (aux->calc_idx < aux->read_idx) {
|
||||
calc_data(aux, aux->data + d_idx);
|
||||
d_idx = !d_idx;
|
||||
add_idx = 1;
|
||||
}
|
||||
if (aux->read_complete) {
|
||||
POSSESS(output_have);
|
||||
if (add_idx)
|
||||
aux->calc_idx++;
|
||||
aux->calc_complete = 1;
|
||||
TWIST(output_have, BY, 1); // 最后要唤醒写线程
|
||||
break; // 计算完了
|
||||
}
|
||||
POSSESS(output_have);
|
||||
if (add_idx)
|
||||
aux->calc_idx++;
|
||||
TWIST(output_have, BY, 1);
|
||||
|
||||
POSSESS(input_have);
|
||||
TWIST(input_have, BY, 1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void* thread_write(void* data) {
|
||||
ktp_aux_t* aux = (ktp_aux_t*)data;
|
||||
int d_idx = 0;
|
||||
while (1) {
|
||||
POSSESS(output_have);
|
||||
WAIT_FOR(output_have, NOT_TO_BE, 0);
|
||||
RELEASE(output_have);
|
||||
if (aux->write_idx < aux->calc_idx) {
|
||||
write_data(aux, aux->data + d_idx);
|
||||
d_idx = !d_idx;
|
||||
aux->write_idx++;
|
||||
}
|
||||
if (aux->calc_complete) {
|
||||
if (aux->write_idx < aux->calc_idx)
|
||||
write_data(aux, aux->data + d_idx);
|
||||
break;
|
||||
}
|
||||
POSSESS(output_have);
|
||||
TWIST(output_have, BY, -1);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void new_pipeline(ktp_aux_t* aux) {
|
||||
input_have = NEW_LOCK(2);
|
||||
output_have = NEW_LOCK(0);
|
||||
pthread_t tid[3];
|
||||
int i;
|
||||
|
||||
pthread_create(&tid[0], 0, thread_read, aux);
|
||||
pthread_create(&tid[1], 0, thread_calc, aux);
|
||||
pthread_create(&tid[2], 0, thread_write, aux);
|
||||
|
||||
for (i = 0; i < 3; ++i) pthread_join(tid[i], 0);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
|
||||
static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
|
||||
{
|
||||
if (opt0->a) { // matching score is changed
|
||||
|
|
@ -150,13 +316,28 @@ int main_mem(int argc, char *argv[])
|
|||
mem_pestat_t pes[4];
|
||||
ktp_aux_t aux;
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
open_debug_files();
|
||||
#endif
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
#if USE_RDTSC
|
||||
uint64_t tmp_time = __rdtsc();
|
||||
sleep(1);
|
||||
proc_freq = __rdtsc() - tmp_time;
|
||||
#else
|
||||
proc_freq = 1000;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
PROF_START(all);
|
||||
memset(&aux, 0, sizeof(ktp_aux_t));
|
||||
memset(pes, 0, 4 * sizeof(mem_pestat_t));
|
||||
for (i = 0; i < 4; ++i) pes[i].failed = 1;
|
||||
|
||||
aux.opt = opt = mem_opt_init();
|
||||
memset(&opt0, 0, sizeof(mem_opt_t));
|
||||
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:b:we")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "51qpaMCSPVYjuk:c:v:s:r:t:R:A:B:O:E:U:w:L:d:T:Q:D:m:I:N:o:f:W:x:G:h:y:K:X:H:F:z:b:ge")) >= 0) {
|
||||
if (c == 'k') opt->min_seed_len = atoi(optarg), opt0.min_seed_len = 1;
|
||||
else if (c == '1') no_mt_io = 1;
|
||||
else if (c == 'x') mode = optarg;
|
||||
|
|
@ -256,7 +437,7 @@ int main_mem(int argc, char *argv[])
|
|||
__func__, pes[1].avg, pes[1].std, pes[1].high, pes[1].low);
|
||||
} else if (c == 'b')
|
||||
opt->batch_size = atoi(optarg) >> 1 << 1, opt->batch_size = opt->batch_size > 1 ? opt->batch_size : 256;
|
||||
else if (c == 'w')
|
||||
else if (c == 'g')
|
||||
opt->use_bwt = 1;
|
||||
else if (c == 'e') opt->skip_entire_match = 1;
|
||||
else return 1;
|
||||
|
|
@ -325,7 +506,7 @@ int main_mem(int argc, char *argv[])
|
|||
fprintf(stderr, " FR orientation only. [inferred]\n");
|
||||
fprintf(stderr, " -u output XB instead of XA; XB is XA with the alignment score and mapping quality added.\n");
|
||||
fprintf(stderr, " -b INT batch size of reads to process at one time [%d].\n", opt->batch_size);
|
||||
fprintf(stderr, " -w Use bwt index for seeding\n");
|
||||
fprintf(stderr, " -g Use bwt index for seeding\n");
|
||||
fprintf(stderr, " -e Skip the second and third seeding steps for entire matching reads.\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Note: Please read the man page for detailed description of the command line and options.\n");
|
||||
|
|
@ -334,6 +515,9 @@ int main_mem(int argc, char *argv[])
|
|||
return 1;
|
||||
}
|
||||
|
||||
if (opt->n_threads < 1) opt->n_threads = 1;
|
||||
if (opt->batch_size < 1) opt->batch_size = 256;
|
||||
|
||||
if (mode) {
|
||||
if (strcmp(mode, "intractg") == 0) {
|
||||
if (!opt0.o_del) opt->o_del = 16;
|
||||
|
|
@ -366,22 +550,48 @@ int main_mem(int argc, char *argv[])
|
|||
} else update_a(opt, &opt0);
|
||||
bwa_fill_scmat(opt->a, opt->b, opt->mat);
|
||||
|
||||
aux.idx = bwa_idx_load_from_shm(argv[optind]);
|
||||
if (aux.idx == 0) {
|
||||
if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0) return 1; // FIXME: memory leak
|
||||
} else if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] load the bwa index from shared memory\n", __func__);
|
||||
if (ignore_alt)
|
||||
PROF_START(load_idx);
|
||||
if (opt->use_bwt) {
|
||||
aux.idx = bwa_idx_load_from_shm(argv[optind]);
|
||||
if (aux.idx == 0) {
|
||||
if ((aux.idx = bwa_idx_load(argv[optind], BWA_IDX_ALL)) == 0)
|
||||
return 1; // FIXME: memory leak
|
||||
} else if (bwa_verbose >= 3)
|
||||
fprintf(stderr, "[M::%s] load the bwa index from shared memory\n", __func__);
|
||||
} else { // load hybrid-index
|
||||
// 加载除了hyb之外,其他的必要部分
|
||||
char fn[MAX_PATH];
|
||||
FILE* fp = NULL;
|
||||
uint64_t ref_len = 0;
|
||||
sprintf(fn, "%s.ref-len", argv[optind]);
|
||||
fp = xopen(fn, "r");
|
||||
err_check_false(fscanf(fp, "%ld", &ref_len), EOF);
|
||||
err_fclose(fp);
|
||||
|
||||
aux.idx = bwa_idx_load(argv[optind], BWA_IDX_BNS | BWA_IDX_PAC);
|
||||
//////////////////////////////
|
||||
aux.idx->hyb = bwa_hyb_idx_load_from_shm(argv[optind]);
|
||||
if (aux.idx->hyb == 0) {
|
||||
aux.idx->hyb = bwa_hyb_idx_load_from_disk(argv[optind]);
|
||||
} else {
|
||||
aux.idx->is_shm = 1;
|
||||
}
|
||||
aux.idx->hyb->ref_len = ref_len;
|
||||
}
|
||||
|
||||
if (ignore_alt)
|
||||
for (i = 0; i < aux.idx->bns->n_seqs; ++i)
|
||||
aux.idx->bns->anns[i].is_alt = 0;
|
||||
PROF_END(gprof[G_LOAD_IDX], load_idx);
|
||||
|
||||
ko = kopen(argv[optind + 1], &fd);
|
||||
ko = kopen(argv[optind + 1], &fd);
|
||||
if (ko == 0) {
|
||||
if (bwa_verbose >= 1) fprintf(stderr, "[E::%s] fail to open file `%s'.\n", __func__, argv[optind + 1]);
|
||||
return 1;
|
||||
}
|
||||
fp = gzdopen(fd, "r");
|
||||
aux.ks = kseq_init(fp);
|
||||
start_async_read(fp); // 采用双buffer技术将读取和解压overlap
|
||||
aux.ks = kseq_init(fp);
|
||||
if (optind + 2 < argc) {
|
||||
if (opt->flag&MEM_F_PE) {
|
||||
if (bwa_verbose >= 2)
|
||||
|
|
@ -393,23 +603,52 @@ int main_mem(int argc, char *argv[])
|
|||
return 1;
|
||||
}
|
||||
fp2 = gzdopen(fd2, "r");
|
||||
aux.ks2 = kseq_init(fp2);
|
||||
start_async_read(fp2);
|
||||
aux.ks2 = kseq_init(fp2);
|
||||
opt->flag |= MEM_F_PE;
|
||||
}
|
||||
}
|
||||
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
|
||||
aux.w = init_mem_worker(opt, aux.idx->bwt, aux.idx->hyb, aux.idx->bns, aux.idx->pac);
|
||||
aux.data = (ktp_data_t*)calloc(2, sizeof(ktp_data_t));
|
||||
// allocate write buffer
|
||||
aux.wbuf_size = 16777216;
|
||||
aux.wbuf = (char*)malloc(aux.wbuf_size);
|
||||
|
||||
bwa_print_sam_hdr(aux.idx->bns, hdr_line);
|
||||
aux.actual_chunk_size = fixed_chunk_size > 0? fixed_chunk_size : opt->chunk_size * opt->n_threads;
|
||||
kt_pipeline(no_mt_io? 1 : 2, process, &aux, 3);
|
||||
free(hdr_line);
|
||||
free(opt);
|
||||
bwa_idx_destroy(aux.idx);
|
||||
kseq_destroy(aux.ks);
|
||||
err_gzclose(fp); kclose(ko);
|
||||
|
||||
PROF_START(pipeline);
|
||||
if (no_mt_io) { // 不同时读写
|
||||
kt_pipeline(2, process, &aux, 3);
|
||||
} else {
|
||||
new_pipeline(&aux);
|
||||
}
|
||||
PROF_END(gprof[G_PIPELINE], pipeline);
|
||||
|
||||
// no need to free these
|
||||
// free(hdr_line);
|
||||
// free(opt);
|
||||
// bwa_idx_destroy(aux.idx);
|
||||
// kseq_destroy(aux.ks);
|
||||
|
||||
stop_async_read(fp);
|
||||
err_gzclose(fp); kclose(ko);
|
||||
if (aux.ks2) {
|
||||
kseq_destroy(aux.ks2);
|
||||
err_gzclose(fp2); kclose(ko2);
|
||||
// kseq_destroy(aux.ks2);
|
||||
stop_async_read(fp2);
|
||||
err_gzclose(fp2); kclose(ko2);
|
||||
}
|
||||
return 0;
|
||||
PROF_END(gprof[G_ALL], all);
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
display_stats(opt->n_threads);
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
close_files();
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main_fastmap(int argc, char *argv[])
|
||||
|
|
@ -447,7 +686,8 @@ int main_fastmap(int argc, char *argv[])
|
|||
}
|
||||
|
||||
fp = xzopen(argv[optind + 1], "r");
|
||||
seq = kseq_init(fp);
|
||||
start_async_read(fp);
|
||||
seq = kseq_init(fp);
|
||||
if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 1;
|
||||
itr = smem_itr_init(idx->bwt);
|
||||
smem_config(itr, min_intv, max_len, max_intv);
|
||||
|
|
@ -485,6 +725,7 @@ int main_fastmap(int argc, char *argv[])
|
|||
smem_itr_destroy(itr);
|
||||
bwa_idx_destroy(idx);
|
||||
kseq_destroy(seq);
|
||||
err_gzclose(fp);
|
||||
stop_async_read(fp);
|
||||
err_gzclose(fp);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
145
hyb_bwa.c
145
hyb_bwa.c
|
|
@ -38,6 +38,7 @@
|
|||
#include "utils.h"
|
||||
#include "kvec.h"
|
||||
#include "hyb_idx.h"
|
||||
#include "share_mem.h"
|
||||
|
||||
|
||||
#ifdef _DIVBWT
|
||||
|
|
@ -219,6 +220,61 @@ int bwa_bwt2kmer(int argc, char* argv[]) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// 将原始的pac转换一下,从低到高存储
|
||||
void convert_to_hyb_pac(uint8_t* old_pac, uint64_t l_pac, const char* new_pac_fn) {
|
||||
#define _gp(l) ((old_pac)[(l) >> 2] >> ((~(l) & 3) << 1) & 3)
|
||||
const uint64_t kPacByteNum = l_pac / 4 + 1;
|
||||
uint8_t* pac = (uint8_t*)calloc(l_pac, 1);
|
||||
FILE* pacFp = fopen(new_pac_fn, "wb");
|
||||
uint8_t byte_bases = 0;
|
||||
uint64_t i = 0;
|
||||
uint8_t* p1;
|
||||
for (; i + 3 < l_pac; i += 4) {
|
||||
p1 = pac + (i >> 2);
|
||||
byte_bases = _gp(i) | (_gp(i + 1) << 2) | (_gp(i + 2) << 4) | (_gp(i + 3) << 6);
|
||||
*p1 = byte_bases;
|
||||
}
|
||||
byte_bases = 0;
|
||||
p1 = pac + (i >> 2);
|
||||
for (uint32_t j = 0; i < l_pac; ++i, ++j) {
|
||||
byte_bases |= _gp(i) << j * 2;
|
||||
}
|
||||
*p1 = byte_bases;
|
||||
|
||||
fwrite(pac, 1, kPacByteNum, pacFp);
|
||||
uint8_t ct = 0;
|
||||
if (l_pac % 4 == 0) {
|
||||
ct = 0;
|
||||
err_fwrite(&ct, 1, 1, pacFp);
|
||||
}
|
||||
ct = l_pac % 4;
|
||||
err_fwrite(&ct, 1, 1, pacFp);
|
||||
fclose(pacFp);
|
||||
}
|
||||
|
||||
// 将原pac文件转为hyb需要的格式(翻转byte)
|
||||
int bwa_pac2hybpac(int argc, char* argv[]) {
|
||||
if (optind + 1 > argc) {
|
||||
fprintf(stderr, "Usage: bwa pac2hybpac <in.prefix>\n\n");
|
||||
return 1;
|
||||
}
|
||||
char fn[MAX_PATH];
|
||||
FILE* fp;
|
||||
uint8_t* old_pac = NULL;
|
||||
uint64_t l_pac = 0;
|
||||
// fprintf(stderr, "here-1\n");
|
||||
snprintf(fn, MAX_PATH, "%s.pac", argv[optind]);
|
||||
_load_file_to_data(fn, old_pac);
|
||||
sprintf(fn, "%s.ref-len", argv[optind]);
|
||||
fp = xopen(fn, "r");
|
||||
err_check_false(fscanf(fp, "%ld", &l_pac), EOF);
|
||||
err_fclose(fp);
|
||||
sprintf(fn, "%s.hyb.pac", argv[optind]);
|
||||
// fprintf(stderr, "here-2\n");
|
||||
convert_to_hyb_pac(old_pac, l_pac, fn);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 创建hybrid index,并保存到文件
|
||||
int bwa_bwt2hyb(int argc, char* argv[]) {
|
||||
int hyb_idx_build_and_dump(int num_threads, bwt_t* bwt, const char* idx_prefix);
|
||||
|
|
@ -238,7 +294,7 @@ int bwa_bwt2hyb(int argc, char* argv[]) {
|
|||
}
|
||||
}
|
||||
if (optind + 1 > argc || error) {
|
||||
fprintf(stderr, "Usage: bwa bwt2hyb [Options] <bwt-prefix>\n\n");
|
||||
fprintf(stderr, "Usage: bwa bwt2hyb [Options] <in.prefix>\n\n");
|
||||
fprintf(stderr, "Options: -t INT number of threads for hybrid index building [%d]\n", num_threads);
|
||||
fprintf(stderr, "\n");
|
||||
return 1;
|
||||
|
|
@ -254,6 +310,93 @@ int bwa_bwt2hyb(int argc, char* argv[]) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
// 尝试从share memory中加载hybrid index
|
||||
HybridIndex* bwa_hyb_idx_load_from_shm(const char* idx_prefix) {
|
||||
char fn[MAX_PATH];
|
||||
uint8_t* ref_bits = (uint8_t*)shm_get_index(strcat(strcpy(fn, idx_prefix), HYB_PAC_SUFFIX));
|
||||
uint8_t* sa = (uint8_t*)shm_get_index(strcat(strcpy(fn, idx_prefix), HYB_SA_SUFFIX));
|
||||
uint8_t* kmer_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, idx_prefix), HYB_KMER_SUFFIX));
|
||||
uint8_t* index_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, idx_prefix), HYB_DATA_SUFFIX));
|
||||
if (!ref_bits || !sa || !kmer_data || !index_data) {
|
||||
return NULL;
|
||||
}
|
||||
HybridIndex* hyb = (HybridIndex*)calloc(1, sizeof(HybridIndex));
|
||||
hyb->ref_bits = ref_bits;
|
||||
hyb->sa = sa;
|
||||
hyb->kmer_data = kmer_data;
|
||||
hyb->index_data = index_data;
|
||||
return hyb;
|
||||
}
|
||||
|
||||
// 从硬盘中加载hybrid index
|
||||
HybridIndex* bwa_hyb_idx_load_from_disk(const char* idx_prefix) {
|
||||
char fn[MAX_PATH];
|
||||
FILE* fp = NULL;
|
||||
struct stat st;
|
||||
double sec_time;
|
||||
|
||||
#define __load_hybrid_idx_code(suffix, data) \
|
||||
sec_time = realtime(); \
|
||||
sprintf(fn, "%s%s", idx_prefix, suffix); \
|
||||
err_check_true(stat(fn, &st), 0); \
|
||||
fp = xopen(fn, "r"); \
|
||||
data = (uint8_t*)malloc(st.st_size); \
|
||||
err_fread_noeof(data, 1, st.st_size, fp); \
|
||||
err_fclose(fp); \
|
||||
fprintf(stderr, "%s, %0.2f GB, %0.2f s\n", fn, (double)st.st_size / 1024 / 1024 / 1024, realtime() - sec_time);
|
||||
|
||||
HybridIndex* hyb = (HybridIndex*)calloc(1, sizeof(HybridIndex));
|
||||
|
||||
__load_hybrid_idx_code(HYB_PAC_SUFFIX, hyb->ref_bits);
|
||||
// load hyb byte-sa
|
||||
__load_hybrid_idx_code(HYB_SA_SUFFIX, hyb->sa);
|
||||
// load hyb kmer data
|
||||
__load_hybrid_idx_code(HYB_KMER_SUFFIX, hyb->kmer_data);
|
||||
// load hyb index data
|
||||
__load_hybrid_idx_code(HYB_DATA_SUFFIX, hyb->index_data);
|
||||
|
||||
return hyb;
|
||||
}
|
||||
|
||||
// 在共享内存中处理hybrid index
|
||||
int main_shm_hyb(int argc, char* argv[]) {
|
||||
char c;
|
||||
int clear_shm = 0;
|
||||
int list_shm = 0;
|
||||
int error = 0;
|
||||
while ((c = getopt(argc, argv, "dl")) >= 0) {
|
||||
switch (c) {
|
||||
case 'd':
|
||||
clear_shm = 1;
|
||||
break;
|
||||
case 'l':
|
||||
list_shm = 1;
|
||||
break;
|
||||
default:
|
||||
error = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// fprintf(stderr, "%d %d\n", optind, argc);
|
||||
|
||||
if ((optind == argc && !clear_shm && !list_shm) || error) {
|
||||
fprintf(stderr, "Usage: bwa hybshm [-d|-l] [idx_prefix]\n\n");
|
||||
fprintf(stderr, "Options: -d destroy all hyb indices in shared memory\n");
|
||||
fprintf(stderr, " -l list names of indices in shared memory\n");
|
||||
fprintf(stderr, "\n");
|
||||
return 1;
|
||||
}
|
||||
if (list_shm) {
|
||||
return list_shm_hyb_indices();
|
||||
} else if (clear_shm) {
|
||||
return shm_clear_hyb();
|
||||
}
|
||||
return shm_keep_hyb(argv[optind]);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////// for test /////////////////////////////////////
|
||||
|
||||
// 创建正向的kmer
|
||||
uint64_t build_forward_kmer(const uint8_t* q, int qlen, int kmer_len, int* base_consumed) {
|
||||
uint64_t qbit = 0, i;
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ typedef struct {
|
|||
uint8_t* for_bits; // 正向序列 2-bit编码
|
||||
uint8_t* back_bits; // 反向互补序列 2-bit编码
|
||||
int id; // for test;
|
||||
char* seqstr;
|
||||
// char* seqstr;
|
||||
} ReadSeq;
|
||||
|
||||
typedef kvec_t(ReadSeq) ReadSeqArr;
|
||||
|
|
|
|||
314
hyb_seeding_1.c
314
hyb_seeding_1.c
|
|
@ -0,0 +1,314 @@
|
|||
#include "hyb_idx.h"
|
||||
#include "profiling.h"
|
||||
|
||||
static int handle_hits_1(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, const Range* rr, int x,
|
||||
int rx, int init_match_len, uint64_t ref_pos, const int min_seed_len, HybSeedArr* seeds,
|
||||
uint64_t* seeds_cap) {
|
||||
int left_match = 0, right_match = 0;
|
||||
both_end_match(hyb, read_seq->len, rr, read_seq->back_bits, read_seq->for_bits, rx, init_match_len, ref_pos, &left_match,
|
||||
&right_match);
|
||||
if (left_match + right_match >= min_seed_len) {
|
||||
ref_pos = _rev_ref(hyb, ref_pos);
|
||||
__add_seed_one_pos(seed, ref_pos - right_match + 1, x - right_match + 1, x + left_match + 1);
|
||||
}
|
||||
return MAX(x + left_match + 1, x - right_match + 1 + min_seed_len);
|
||||
}
|
||||
|
||||
static int handle_hits_2(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, const Range* rr, int x,
|
||||
int rx, int init_match_len, uint64_t sa_pos, const int min_seed_len, HybSeedArr* seeds,
|
||||
uint64_t* seeds_cap, int tid) {
|
||||
int left_match_arr[2] = {0}, right_match_arr[2] = {0};
|
||||
Range mr_arr[2] = {0};
|
||||
uint64_t ref_pos_arr[2] = {hyb_sa_to_ref_pos(hyb->sa, sa_pos), hyb_sa_to_ref_pos(hyb->sa, sa_pos + 1)};
|
||||
int i = 0;
|
||||
for (i = 0; i < 2; ++i) {
|
||||
both_end_match(hyb, read_seq->len, rr, read_seq->back_bits, read_seq->for_bits, rx, init_match_len, ref_pos_arr[i],
|
||||
&left_match_arr[i], &right_match_arr[i]);
|
||||
_set_range(mr_arr[i], x - right_match_arr[i] + 1, x + left_match_arr[i] + 1);
|
||||
}
|
||||
if (_range_equal(mr_arr[0], mr_arr[1])) { // 相等
|
||||
if (mr_arr[0].end - mr_arr[0].start >= min_seed_len) { // 正向搜索确定ref_pos的先后顺序
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
get_kmer_data(hyb, read_seq->for_bits, mr_arr[0].start, &type_hits, &offset);
|
||||
if (type_hits == 2) {
|
||||
ref_pos_arr[0] = hyb_sa_to_ref_pos(hyb->sa, offset);
|
||||
ref_pos_arr[1] = hyb_sa_to_ref_pos(hyb->sa, offset + 1);
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], mr_arr[0].start, mr_arr[0].end);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
} else { // 需要反向搜索确定ref
|
||||
uint32_t seq_pos = mr_arr[0].start + HYB_KMER_LEN;
|
||||
uint32_t hits = type_hits;
|
||||
uint64_t sa_start = 0;
|
||||
uint8_t cmp_ref = 0;
|
||||
// PROF_START(seed_1);
|
||||
get_leaf_node(hyb->index_data + offset, read_seq->for_bits, read_seq->seq, read_range->end, &seq_pos, &hits,
|
||||
&sa_start, &cmp_ref, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_1][tid], seed_1);
|
||||
ref_pos_arr[0] = hyb_sa_to_ref_pos(hyb->sa, sa_start);
|
||||
ref_pos_arr[1] = hyb_sa_to_ref_pos(hyb->sa, sa_start + 1);
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], mr_arr[0].start, mr_arr[0].end);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
}
|
||||
}
|
||||
} else if (_range_cross(mr_arr[0], mr_arr[1])) { // 交叉
|
||||
if (mr_arr[0].start < mr_arr[1].start) {
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[0]) - right_match_arr[0] + 1, mr_arr[0].start,
|
||||
mr_arr[0].end);
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[1]) - right_match_arr[1] + 1, mr_arr[1].start,
|
||||
mr_arr[1].end);
|
||||
} else {
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[1]) - right_match_arr[1] + 1, mr_arr[1].start,
|
||||
mr_arr[1].end);
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[0]) - right_match_arr[0] + 1, mr_arr[0].start,
|
||||
mr_arr[0].end);
|
||||
}
|
||||
} else { // 包含
|
||||
if (mr_arr[0].start < mr_arr[1].start || mr_arr[0].end > mr_arr[1].end) {
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[0]) - right_match_arr[0] + 1, mr_arr[0].start,
|
||||
mr_arr[0].end);
|
||||
} else {
|
||||
__check_add_seed_one_pos(seed, _rev_ref(hyb, ref_pos_arr[1]) - right_match_arr[1] + 1, mr_arr[1].start,
|
||||
mr_arr[1].end);
|
||||
}
|
||||
}
|
||||
return MAX(MAX(mr_arr[0].end, mr_arr[1].end), MIN(mr_arr[0].start, mr_arr[1].start) + min_seed_len);
|
||||
}
|
||||
|
||||
static int handle_hits_much(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, int x,
|
||||
const int min_seed_len, HybSeedArr* seeds, uint64_t* seeds_cap, int tid) {
|
||||
int max_reach = x + HYB_KMER_LEN;
|
||||
int right_match = 0;
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
uint64_t ref_pos = 0;
|
||||
uint32_t seq_pos = x + HYB_KMER_LEN;
|
||||
uint32_t hits = 0;
|
||||
uint64_t sa_start = 0;
|
||||
uint8_t cmp_ref = 0;
|
||||
int i = 0;
|
||||
|
||||
get_kmer_data(hyb, read_seq->for_bits, x, &type_hits, &offset);
|
||||
if (type_hits == 2) {
|
||||
int match_end[2] = {0};
|
||||
uint64_t ref_pos_arr[2] = {hyb_sa_to_ref_pos(hyb->sa, offset), hyb_sa_to_ref_pos(hyb->sa, offset + 1)};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, x, HYB_KMER_LEN,
|
||||
ref_pos_arr[i], &right_match);
|
||||
match_end[i] = x + right_match;
|
||||
}
|
||||
max_reach = MAX(match_end[0], match_end[1]);
|
||||
if (max_reach - x >= min_seed_len) {
|
||||
if (match_end[0] == match_end[1]) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, max_reach);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
} else if (match_end[0] > match_end[1]) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, match_end[0]);
|
||||
} else {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[1], x, match_end[1]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hits = type_hits;
|
||||
// PROF_START(seed_1);
|
||||
get_leaf_node(hyb->index_data + offset, read_seq->for_bits, read_seq->seq, read_range->end, &seq_pos, &hits, &sa_start,
|
||||
&cmp_ref, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_1][tid], seed_1);
|
||||
// tdat[(seq_pos - x - HYB_KMER_LEN + 2) / 3][tid]++;
|
||||
if (seq_pos == read_range->end || !cmp_ref) {
|
||||
max_reach = seq_pos;
|
||||
if (max_reach - x >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, hyb_sa_to_ref_pos(hyb->sa, sa_start), x, max_reach);
|
||||
int i = 0;
|
||||
for (i = 1; i < hits; ++i) {
|
||||
kv_push(uint64_t, seed->ref_pos_arr, hyb_sa_to_ref_pos(hyb->sa, sa_start + i));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ref_pos = hyb_sa_to_ref_pos(hyb->sa, sa_start);
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, x, seq_pos - x, ref_pos,
|
||||
&right_match);
|
||||
max_reach = x + right_match;
|
||||
if (right_match >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos, x, max_reach);
|
||||
}
|
||||
}
|
||||
}
|
||||
return MAX(max_reach, x + min_seed_len);
|
||||
}
|
||||
|
||||
int seeding_from_start(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, const int min_seed_len,
|
||||
HybSeedArr* seeds, int tid) {
|
||||
// PROF_START(seed_1);
|
||||
uint64_t seeds_m = seeds->m;
|
||||
uint64_t* seeds_cap = &seeds_m; // 记录当前seeds的长度, 如果扩容,则需要初始化ref_pos_arr.m, n, a 为0
|
||||
int max_reach = read_range->start + HYB_KMER_LEN; // 返回的结果,最远匹配的read的位置
|
||||
int x = read_range->start; // 从read_range的起始位置开始匹配
|
||||
uint64_t i = 0;
|
||||
int right_match = 0;
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
uint64_t ref_pos = 0;
|
||||
get_kmer_data(hyb, read_seq->for_bits, x, &type_hits, &offset);
|
||||
// PROF_END(tprof[T_SEED_1_0][tid], seed_1);
|
||||
if (type_hits == 0) {
|
||||
// tdat[TD_SEED_1_0][tid]++;
|
||||
} else if (type_hits == 1) {
|
||||
// tdat[TD_SEED_1_1][tid]++;
|
||||
// PROF_START(seed_1);
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, x, HYB_KMER_LEN, offset,
|
||||
&right_match);
|
||||
max_reach = x + right_match;
|
||||
if (max_reach - x >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, offset, x, max_reach);
|
||||
seed->first_len = HYB_KMER_LEN;
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_1][tid], seed_1);
|
||||
} else if (type_hits == 2) {
|
||||
// tdat[TD_SEED_1_2][tid]++;
|
||||
// PROF_START(seed_1);
|
||||
int match_end[2] = {0};
|
||||
uint64_t ref_pos_arr[2] = {hyb_sa_to_ref_pos(hyb->sa, offset), hyb_sa_to_ref_pos(hyb->sa, offset + 1)};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, x, HYB_KMER_LEN,
|
||||
ref_pos_arr[i], &right_match);
|
||||
match_end[i] = x + right_match;
|
||||
}
|
||||
max_reach = MAX(match_end[0], match_end[1]);
|
||||
if (max_reach - x >= min_seed_len) {
|
||||
if (match_end[0] == match_end[1]) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, max_reach);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
seed->first_len = match_end[0];
|
||||
// seed->first_len = HYB_KMER_LEN;
|
||||
} else if (match_end[0] > match_end[1]) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, match_end[0]);
|
||||
seed->first_len = match_end[1];
|
||||
// seed->first_len = HYB_KMER_LEN;
|
||||
} else {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[1], x, match_end[1]);
|
||||
seed->first_len = match_end[0];
|
||||
// seed->first_len = HYB_KMER_LEN;
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_2][tid], seed_1);
|
||||
} else {
|
||||
// tdat[TD_SEED_1_3][tid]++;
|
||||
uint32_t seq_pos = x + HYB_KMER_LEN;
|
||||
uint32_t hits = type_hits;
|
||||
uint64_t sa_start = 0;
|
||||
uint8_t cmp_ref = 0;
|
||||
// PROF_START(seed_1);
|
||||
get_leaf_node(hyb->index_data + offset, read_seq->for_bits, read_seq->seq, read_range->end, &seq_pos, &hits, &sa_start,
|
||||
&cmp_ref, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_1][tid], seed_1);
|
||||
if (seq_pos == read_range->end || !cmp_ref) {
|
||||
max_reach = seq_pos;
|
||||
if (max_reach - x >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, hyb_sa_to_ref_pos(hyb->sa, sa_start), x, max_reach);
|
||||
if (hits == 1)
|
||||
seed->first_len = seq_pos - x;
|
||||
for (i = 1; i < hits; ++i) {
|
||||
kv_push(uint64_t, seed->ref_pos_arr, hyb_sa_to_ref_pos(hyb->sa, sa_start + i));
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ref_pos = hyb_sa_to_ref_pos(hyb->sa, sa_start);
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, x, seq_pos - x, ref_pos,
|
||||
&right_match);
|
||||
max_reach = x + right_match;
|
||||
if (right_match >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos, x, max_reach);
|
||||
seed->first_len = seq_pos - x;
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_3][tid], seed_1);
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_ALL][tid], seed_1);
|
||||
// PROF_END(tprof[T_SEED_1_1][tid], seed_1);
|
||||
return MAX(max_reach, x + min_seed_len);
|
||||
}
|
||||
|
||||
//////////////
|
||||
// 用hybrid-index来寻找smem(seeding-1),要求种子 hits >= min_hits_thres(>0)
|
||||
void hyb_first_seeding(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, const int min_seed_len,
|
||||
HybSeedArr* seeds, int tid) {
|
||||
int x = seeding_from_start(hyb, read_seq, read_range, min_seed_len, seeds, tid);
|
||||
int rx = 0; // 对应的反向位置
|
||||
Range rr = {read_seq->len - read_range->end, read_seq->len - read_range->start};
|
||||
uint64_t seeds_m = seeds->m;
|
||||
uint64_t* seeds_cap = &seeds_m; // 记录当前seeds的长度, 如果扩容,则需要初始化ref_pos_arr.m, n, a 为0
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
int extra_tend = MAX(0, min_seed_len - HYB_KMER_LEN) + 1;
|
||||
// PROF_START(seed_1);
|
||||
while (x < read_range->end) {
|
||||
// 反向搜索, 此时x距离start超过16
|
||||
rx = read_seq->len - x - 1; // 反向位置, 因为正向包含x, 所以这里需要减1
|
||||
// PROF_START(seed_1);
|
||||
get_kmer_data(hyb, read_seq->back_bits, rx, &type_hits, &offset);
|
||||
// PROF_END(tprof[T_SEED_1_0][tid], seed_1);
|
||||
if (type_hits == 0) {
|
||||
x += extra_tend;
|
||||
// tdat[TD_SEED_1_0][tid]++;
|
||||
} else if (type_hits == 1) {
|
||||
// tdat[TD_SEED_1_1][tid]++;
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_1(hyb, read_seq, read_range, &rr, x, rx, HYB_KMER_LEN, offset, min_seed_len, seeds, seeds_cap);
|
||||
// PROF_END(tprof[T_SEED_1_1][tid], seed_1);
|
||||
} else if (type_hits == 2) {
|
||||
// tdat[TD_SEED_1_2][tid]++;
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_2(hyb, read_seq, read_range, &rr, x, rx, HYB_KMER_LEN, offset, min_seed_len, seeds, seeds_cap, tid);
|
||||
// PROF_END(tprof[T_SEED_1_2][tid], seed_1);
|
||||
} else {
|
||||
// tdat[TD_SEED_1_3][tid]++;
|
||||
// PROF_START(seed_1);
|
||||
uint32_t seq_pos = rx + HYB_KMER_LEN;
|
||||
uint32_t hits = type_hits;
|
||||
uint64_t sa_start = 0;
|
||||
uint8_t cmp_ref = 0;
|
||||
get_leaf_node(hyb->index_data + offset, read_seq->back_bits, read_seq->rseq, rr.end, &seq_pos, &hits, &sa_start,
|
||||
&cmp_ref, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_1][tid], seed_1);
|
||||
// tdat[(seq_pos - rx - HYB_KMER_LEN + 2) / 3][tid]++;
|
||||
// tdat[TD_SEED_1_0][tid]++;
|
||||
// if (hits == 1) {
|
||||
// tdat[TD_SEED_1_1][tid]++;
|
||||
// } else if (hits == 2) {
|
||||
// tdat[TD_SEED_1_2][tid]++;
|
||||
// } else if (hits == 3) {
|
||||
// tdat[TD_SEED_1_3][tid]++;
|
||||
// } else if (hits == 4) {
|
||||
// tdat[TD_SEED_1_4][tid]++;
|
||||
// } else {
|
||||
// tdat[TD_SEED_1_5][tid]++;
|
||||
// }
|
||||
if (seq_pos == rr.end || !cmp_ref) {
|
||||
if (hits == 1) {
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_1(hyb, read_seq, read_range, &rr, x, rx, seq_pos - rx, hyb_sa_to_ref_pos(hyb->sa, sa_start),
|
||||
min_seed_len, seeds, seeds_cap);
|
||||
// PROF_END(tprof[T_SEED_1_3_2][tid], seed_1);
|
||||
} else if (hits == 2) {
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_2(hyb, read_seq, read_range, &rr, x, rx, seq_pos - rx, sa_start, min_seed_len, seeds,
|
||||
seeds_cap, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_3][tid], seed_1);
|
||||
} else {
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_much(hyb, read_seq, read_range, x + rx - seq_pos + 1, min_seed_len, seeds, seeds_cap, tid);
|
||||
// PROF_END(tprof[T_SEED_1_3_4][tid], seed_1);
|
||||
}
|
||||
} else { // hits == 1
|
||||
// PROF_START(seed_1);
|
||||
x = handle_hits_1(hyb, read_seq, read_range, &rr, x, rx, seq_pos - rx, hyb_sa_to_ref_pos(hyb->sa, sa_start),
|
||||
min_seed_len, seeds, seeds_cap);
|
||||
// PROF_END(tprof[T_SEED_1_3_5][tid], seed_1);
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_3][tid], seed_1);
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_1_ALL][tid], seed_1);
|
||||
// PROF_END(tprof[T_SEED_1_0][tid], seed_1);
|
||||
}
|
||||
208
hyb_seeding_2.c
208
hyb_seeding_2.c
|
|
@ -0,0 +1,208 @@
|
|||
#include "hyb_idx.h"
|
||||
#include "profiling.h"
|
||||
|
||||
#define CALC_STAT 0
|
||||
// 需要给定初始化的hits和seq_pos
|
||||
static void get_min_hits_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int min_hits,
|
||||
uint32_t* seq_pos_p, uint32_t* hits_p, uint64_t* sa_start_p, int tid) {
|
||||
uint8_t cmp_ref_val = 0;
|
||||
int is_head_node = 1;
|
||||
uint8_t* cmp_ref = &cmp_ref_val;
|
||||
uint8_t* prev_addr = idata;
|
||||
uint32_t prev_seq_pos = *seq_pos_p;
|
||||
uint32_t prev_hits = *hits_p;
|
||||
uint64_t prev_sa_start = *sa_start_p;
|
||||
uint8_t* next_addr = parse_first_hyb_node(idata, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
#if CALC_STAT
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
while (next_addr != NULL && *hits_p >= min_hits && *seq_pos_p < seq_end) {
|
||||
prev_addr = next_addr;
|
||||
prev_seq_pos = *seq_pos_p;
|
||||
prev_hits = *hits_p;
|
||||
prev_sa_start = *sa_start_p;
|
||||
next_addr = parse_one_hyb_node(next_addr, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
is_head_node = 0;
|
||||
#if CALC_STAT
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
if (*hits_p < min_hits) {
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
next_addr = prev_addr;
|
||||
parse_one_hyb_node_min_hits(next_addr, seq_bits, seq_bp, seq_end, min_hits, is_head_node, seq_pos_p, sa_start_p, hits_p,
|
||||
tid);
|
||||
}
|
||||
}
|
||||
|
||||
// for seeding-2 , 先反向,后正向
|
||||
int hyb_second_seeding(const HybridIndex* hyb, const ReadSeq* read_seq, int seed_start, int seed_end, int read_start,
|
||||
int read_end, uint64_t first_ref, int min_hits, int pre_pivot, int pre_start, int pre_end,
|
||||
const int min_seed_len, HybSeedArr* seeds, int tid) {
|
||||
uint64_t seeds_m = seeds->m;
|
||||
uint64_t* seeds_cap = &seeds_m;
|
||||
int pivot = (seed_start + seed_end) >> 1;
|
||||
int x = MAX(MAX(pivot, read_start + min_seed_len - 1), pre_pivot);
|
||||
int rx = 0;
|
||||
Range fr = {read_start, read_end};
|
||||
Range rr = {read_seq->len - read_end, read_seq->len - read_start};
|
||||
Range* read_range = &fr;
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
int extra_tend = MAX(0, min_seed_len - HYB_KMER_LEN) + 1;
|
||||
int next_pivot = x;
|
||||
int cur_left = 0;
|
||||
int old_n = seeds->n;
|
||||
int i = 0;
|
||||
// PROF_START(seed_2);
|
||||
#if 1
|
||||
if (pre_end > pre_start && seeds->a[pre_end - 1].seed_end > pivot) {
|
||||
for (i = pre_start; i < pre_end; ++i) {
|
||||
HybSeed* seed = &kv_A(*seeds, i);
|
||||
if (seed->seed_end > pivot) {
|
||||
__check_add_seed(new_seed);
|
||||
seed = &kv_A(*seeds, i);
|
||||
__copy_seed(*seed, *new_seed);
|
||||
}
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_0][tid], seed_2);
|
||||
#endif
|
||||
while (cur_left <= pivot && x < fr.end) {
|
||||
next_pivot = x;
|
||||
rx = read_seq->len - x - 1; // 反向位置, 因为正向包含x, 所以这里需要减1
|
||||
// PROF_START(seed_2);
|
||||
get_kmer_data(hyb, read_seq->back_bits, rx, &type_hits, &offset);
|
||||
// PROF_END(tprof[T_SEED_2_0][tid], seed_2);
|
||||
if (type_hits == 0) {
|
||||
cur_left = x - HYB_KMER_LEN + 2;
|
||||
x += extra_tend;
|
||||
} else if (type_hits == 1) { // min_hits肯定大于1
|
||||
cur_left = x - HYB_KMER_LEN + 2;
|
||||
x += extra_tend;
|
||||
} else if (type_hits == 2) {
|
||||
// PROF_START(seed_2);
|
||||
if (min_hits > 2) {
|
||||
cur_left = x - HYB_KMER_LEN + 2;
|
||||
x += extra_tend;
|
||||
} else {
|
||||
uint64_t ref_pos_arr[2];
|
||||
int left_match_arr[2] = {0}, right_match_arr[2] = {0};
|
||||
Range mr_arr[2] = {0};
|
||||
int new_x = x - HYB_KMER_LEN + 1;
|
||||
get_kmer_data(hyb, read_seq->for_bits, new_x, &type_hits, &offset);
|
||||
ref_pos_arr[0] = hyb_sa_to_ref_pos(hyb->sa, offset);
|
||||
ref_pos_arr[1] = hyb_sa_to_ref_pos(hyb->sa, offset + 1);
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (first_ref + new_x - seed_start == ref_pos_arr[i]) {
|
||||
left_match_arr[i] = new_x - seed_start;
|
||||
right_match_arr[i] = seed_end - new_x;
|
||||
} else {
|
||||
both_end_match(hyb, read_seq->len, &fr, read_seq->for_bits, read_seq->back_bits, new_x, HYB_KMER_LEN,
|
||||
ref_pos_arr[i], &left_match_arr[i], &right_match_arr[i]);
|
||||
}
|
||||
_set_range(mr_arr[i], new_x - left_match_arr[i], new_x + right_match_arr[i]);
|
||||
}
|
||||
Range sr = {MAX(mr_arr[0].start, mr_arr[1].start), MIN(mr_arr[0].end, mr_arr[1].end)};
|
||||
if (sr.end - sr.start >= min_seed_len && sr.start <= pivot) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0] - new_x + sr.start, sr.start, sr.end);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1] - new_x + sr.start);
|
||||
}
|
||||
cur_left = sr.start;
|
||||
x = MAX(x + 1, MAX(sr.end, cur_left + min_seed_len));
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_1][tid], seed_2);
|
||||
} else {
|
||||
// PROF_START(seed_2);
|
||||
if (type_hits <= HYB_HIT_THRESH && type_hits < min_hits) {
|
||||
cur_left = x - HYB_KMER_LEN + 2;
|
||||
x += extra_tend;
|
||||
} else {
|
||||
uint32_t seq_pos = rx + HYB_KMER_LEN;
|
||||
uint32_t hits = type_hits;
|
||||
uint64_t sa_start = 0;
|
||||
// PROF_START(seed_2);
|
||||
get_min_hits_node(hyb->index_data + offset, read_seq->back_bits, read_seq->rseq, rr.end, min_hits, &seq_pos,
|
||||
&hits, &sa_start, tid);
|
||||
// PROF_END(tprof[T_SEED_2_2_0][tid], seed_2);
|
||||
// tdat[(seq_pos - rx - HYB_KMER_LEN + 2) / 3][tid]++;
|
||||
// forward search
|
||||
int new_x = x - (seq_pos - rx) + 1;
|
||||
if (new_x <= pivot) {
|
||||
// PROF_START(seed_2);
|
||||
get_kmer_data(hyb, read_seq->for_bits, new_x, &type_hits, &offset);
|
||||
// PROF_END(tprof[T_SEED_2_2_1][tid], seed_2);
|
||||
if (type_hits == 2) {
|
||||
// PROF_START(seed_2);
|
||||
int right_match = 0;
|
||||
int match_end[2] = {0};
|
||||
uint64_t ref_pos_arr[2] = {hyb_sa_to_ref_pos(hyb->sa, offset), hyb_sa_to_ref_pos(hyb->sa, offset + 1)};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (first_ref + new_x - seed_start == ref_pos_arr[i]) {
|
||||
right_match = seed_end - new_x;
|
||||
} else {
|
||||
right_end_match(hyb, read_seq->len, read_range, read_seq->for_bits, read_seq->back_bits, new_x,
|
||||
HYB_KMER_LEN, ref_pos_arr[i], &right_match);
|
||||
}
|
||||
match_end[i] = new_x + right_match;
|
||||
}
|
||||
seq_pos = MIN(match_end[0], match_end[1]);
|
||||
if (seq_pos - new_x >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], new_x, seq_pos);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_2_2][tid], seed_2);
|
||||
} else {
|
||||
hits = type_hits;
|
||||
seq_pos = new_x + HYB_KMER_LEN;
|
||||
// PROF_START(seed_2);
|
||||
get_min_hits_node(hyb->index_data + offset, read_seq->for_bits, read_seq->seq, fr.end, min_hits,
|
||||
&seq_pos, &hits, &sa_start, tid);
|
||||
// PROF_END(tprof[T_SEED_2_2_0][tid], seed_2);
|
||||
// tdat[(seq_pos - new_x - HYB_KMER_LEN + 2) / 3][tid]++;
|
||||
if (seq_pos - new_x >= min_seed_len) {
|
||||
__add_seed_one_pos(seed, hyb_sa_to_ref_pos(hyb->sa, sa_start), new_x, seq_pos);
|
||||
for (i = 1; i < hits; ++i) {
|
||||
kv_push(uint64_t, seed->ref_pos_arr, hyb_sa_to_ref_pos(hyb->sa, sa_start + i));
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_2_3][tid], seed_2);
|
||||
}
|
||||
}
|
||||
cur_left = new_x;
|
||||
x = MAX(seq_pos, cur_left + min_seed_len);
|
||||
// x = seq_pos;
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_2][tid], seed_2);
|
||||
}
|
||||
}
|
||||
if (old_n < seeds->n) {
|
||||
next_pivot = seeds->a[seeds->n - 1].seed_end;
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_2_ALL][tid], seed_2);
|
||||
return next_pivot;
|
||||
}
|
||||
203
hyb_seeding_3.c
203
hyb_seeding_3.c
|
|
@ -0,0 +1,203 @@
|
|||
#include "hyb_idx.h"
|
||||
#include "profiling.h"
|
||||
|
||||
#define CALC_STAT 0
|
||||
|
||||
static void get_seed_end_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int max_hits, int seed_end,
|
||||
uint32_t* seq_pos_p, uint32_t* hits_p, uint64_t* sa_start_p, int tid) {
|
||||
uint8_t cmp_ref_val = 0;
|
||||
int is_head_node = 1;
|
||||
uint8_t* cmp_ref = &cmp_ref_val;
|
||||
uint8_t* prev_addr = idata;
|
||||
uint32_t prev_seq_pos = *seq_pos_p;
|
||||
uint32_t prev_hits = *hits_p;
|
||||
uint64_t prev_sa_start = *sa_start_p;
|
||||
uint8_t* next_addr = parse_first_hyb_node(idata, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
#if CALC_STAT
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
while (next_addr != NULL && *hits_p > 1 && (*seq_pos_p < seed_end || *hits_p >= max_hits)) {
|
||||
prev_addr = next_addr;
|
||||
prev_seq_pos = *seq_pos_p;
|
||||
prev_hits = *hits_p;
|
||||
prev_sa_start = *sa_start_p;
|
||||
next_addr = parse_one_hyb_node(next_addr, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
is_head_node = 0;
|
||||
#if CALC_STAT
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
uint32_t hold_seq_pos = *seq_pos_p;
|
||||
uint32_t hold_hits = *hits_p;
|
||||
uint64_t hold_sa_start = *sa_start_p;
|
||||
|
||||
if (*seq_pos_p > seed_end && *hits_p < max_hits) {
|
||||
// 检查前一个节点
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
next_addr = prev_addr;
|
||||
parse_one_hyb_node_max_hits(next_addr, seq_bits, seq_bp, seq_end, max_hits, seed_end - prev_seq_pos, is_head_node,
|
||||
seq_pos_p, sa_start_p, hits_p, tid);
|
||||
if (*hits_p >= max_hits) {
|
||||
*seq_pos_p = hold_seq_pos;
|
||||
*hits_p = hold_hits;
|
||||
*sa_start_p = hold_sa_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// assume max_hits > 2
|
||||
void hyb_third_seeding(const HybridIndex* hyb, const ReadSeq* read_seq, const Range* read_range, const Range* seeds_range,
|
||||
const int min_seed_len, const int max_hits, HybSeedArr* seeds, int tid) {
|
||||
if (seeds_range->start == seeds_range->end) {
|
||||
return;
|
||||
}
|
||||
uint64_t seeds_m = seeds->m;
|
||||
uint64_t* seeds_cap = &seeds_m;
|
||||
int new_seed_len = min_seed_len + 1;
|
||||
|
||||
int i = 0;
|
||||
int right_match_arr[2] = {0};
|
||||
Range ff = *read_range;
|
||||
uint8_t type_hits = 0;
|
||||
uint64_t offset = 0;
|
||||
|
||||
int seeds_i = seeds_range->start;
|
||||
int x = read_range->start;
|
||||
int x_end = x + new_seed_len;
|
||||
int flag_found_x_end = 0;
|
||||
int flag_i = 0;
|
||||
// PROF_START(seed_3);
|
||||
HybSeed s = kv_A(*seeds, seeds_i);
|
||||
if (s.first_len > 0 && s.first_len < new_seed_len && s.seed_start == x && s.seed_end >= x_end && s.ref_pos_arr.n <= 2) {
|
||||
__add_seed_one_pos(seed, s.ref_pos_arr.a[0], x, x_end);
|
||||
if (s.ref_pos_arr.n == 2)
|
||||
kv_push(uint64_t, seed->ref_pos_arr, s.ref_pos_arr.a[1]);
|
||||
x = x_end;
|
||||
}
|
||||
while (x + min_seed_len < read_range->end) {
|
||||
while (seeds_i < seeds_range->end && kv_A(*seeds, seeds_i).seed_end < x) ++seeds_i;
|
||||
if (seeds_i == seeds_range->end)
|
||||
break;
|
||||
if (seeds->a[seeds_i].seed_start > x) {
|
||||
x += new_seed_len;
|
||||
continue;
|
||||
}
|
||||
x_end = x + new_seed_len;
|
||||
flag_found_x_end = 0;
|
||||
flag_i = 0;
|
||||
for (i = seeds_i; i < seeds_range->end; ++i) {
|
||||
HybSeed* s = &kv_A(*seeds, i);
|
||||
if (s->seed_start >= x_end)
|
||||
break;
|
||||
if (s->seed_start <= x && s->seed_end >= x_end) {
|
||||
flag_found_x_end = 1; // x_end点存在seed
|
||||
flag_i = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!flag_found_x_end) {
|
||||
x = x_end;
|
||||
continue;
|
||||
}
|
||||
|
||||
// PROF_START(seed_3);
|
||||
get_kmer_data(hyb, read_seq->for_bits, x, &type_hits, &offset);
|
||||
// PROF_END(tprof[T_SEED_3_0][tid], seed_3);
|
||||
if (type_hits == 0) {
|
||||
x += new_seed_len;
|
||||
} else if (type_hits == 1) {
|
||||
// PROF_START(seed_3);
|
||||
__add_seed_one_pos(seed, offset, x, x_end);
|
||||
x = x_end;
|
||||
// PROF_END(tprof[T_SEED_3_1][tid], seed_3);
|
||||
} else if (type_hits == 2) {
|
||||
// PROF_START(seed_3);
|
||||
HybSeed s = kv_A(*seeds, flag_i);
|
||||
if (s.ref_pos_arr.n == 2) {
|
||||
__add_seed_one_pos(seed, s.ref_pos_arr.a[0] + x - s.seed_start, x, x_end);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, s.ref_pos_arr.a[1] + x - s.seed_start);
|
||||
} else { // 只有一个ref_pos
|
||||
ff.end = x_end;
|
||||
uint64_t ref_pos_arr[2] = {hyb_sa_to_ref_pos(hyb->sa, offset), hyb_sa_to_ref_pos(hyb->sa, offset + 1)};
|
||||
for (i = 0; i < 2; ++i) {
|
||||
if (s.ref_pos_arr.a[0] + x - s.seed_start == ref_pos_arr[i]) {
|
||||
right_match_arr[i] = MIN(s.seed_end - x, new_seed_len);
|
||||
} else {
|
||||
right_end_match(hyb, read_seq->len, &ff, read_seq->for_bits, read_seq->back_bits, x, HYB_KMER_LEN,
|
||||
ref_pos_arr[i], &right_match_arr[i]);
|
||||
}
|
||||
}
|
||||
if (right_match_arr[0] == right_match_arr[1]) {
|
||||
if (right_match_arr[0] == new_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, x_end);
|
||||
kv_push(uint64_t, seed->ref_pos_arr, ref_pos_arr[1]);
|
||||
}
|
||||
} else {
|
||||
if (right_match_arr[0] == new_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[0], x, x_end);
|
||||
} else if (right_match_arr[1] == new_seed_len) {
|
||||
__add_seed_one_pos(seed, ref_pos_arr[1], x, x_end);
|
||||
}
|
||||
}
|
||||
}
|
||||
x = x_end;
|
||||
// PROF_END(tprof[T_SEED_3_2][tid], seed_3);
|
||||
} else {
|
||||
uint32_t seq_pos = x + HYB_KMER_LEN;
|
||||
uint32_t hits = type_hits;
|
||||
uint64_t sa_start = 0;
|
||||
// PROF_START(seed_3);
|
||||
get_seed_end_node(hyb->index_data + offset, read_seq->for_bits, read_seq->seq, read_range->end, max_hits,
|
||||
x + new_seed_len, &seq_pos, &hits, &sa_start, tid);
|
||||
// PROF_END(tprof[T_SEED_3_3_0][tid], seed_3);
|
||||
// tdat[(seq_pos - x - HYB_KMER_LEN + 2) / 3][tid]++;
|
||||
if (seq_pos - x < new_seed_len) {
|
||||
// PROF_START(seed_3);
|
||||
HybSeed s = kv_A(*seeds, flag_i);
|
||||
__add_seed_one_pos(seed, s.ref_pos_arr.a[0] + x - s.seed_start, x, x_end);
|
||||
x = x_end;
|
||||
// PROF_END(tprof[T_SEED_3_3_1][tid], seed_3);
|
||||
} else {
|
||||
// PROF_START(seed_3);
|
||||
if (hits < max_hits) {
|
||||
__add_seed_one_pos(seed, hyb_sa_to_ref_pos(hyb->sa, sa_start), x, seq_pos);
|
||||
int i = 0;
|
||||
for (i = 1; i < hits; ++i) {
|
||||
kv_push(uint64_t, seed->ref_pos_arr, hyb_sa_to_ref_pos(hyb->sa, sa_start + i));
|
||||
}
|
||||
x = seq_pos;
|
||||
} else {
|
||||
x = seq_pos + 1;
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_3_3_2][tid], seed_3);
|
||||
}
|
||||
|
||||
// PROF_END(tprof[T_SEED_3_3][tid], seed_3);
|
||||
}
|
||||
}
|
||||
// PROF_END(tprof[T_SEED_3_ALL][tid], seed_3);
|
||||
}
|
||||
817
hyb_utils.c
817
hyb_utils.c
|
|
@ -0,0 +1,817 @@
|
|||
#include <assert.h>
|
||||
#include <emmintrin.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "hyb_idx.h"
|
||||
#include "share_mem.h"
|
||||
#include "utils.h"
|
||||
|
||||
/////////////////////////////////////////////////////
|
||||
// 使用hybrid-index的工具函数
|
||||
|
||||
// 加载hybrid index
|
||||
HybridIndex* load_hybrid_idx(const char* prefix) {
|
||||
HybridIndex* hyb = NULL;
|
||||
hyb = (HybridIndex*)calloc(1, sizeof(HybridIndex));
|
||||
|
||||
// return hyb;
|
||||
|
||||
int prefix_len = strlen(prefix);
|
||||
char* fn = (char*)malloc(prefix_len + 30);
|
||||
FILE* fp = NULL;
|
||||
struct stat st;
|
||||
|
||||
#define __load_hybrid_idx_code(suffix, data) \
|
||||
sprintf(fn, "%s%s", prefix, suffix); \
|
||||
err_check_true(stat(fn, &st), 0); \
|
||||
fp = xopen(fn, "r"); \
|
||||
data = (uint8_t*)malloc(st.st_size); \
|
||||
err_fread_noeof(data, 1, st.st_size, fp); \
|
||||
err_fclose(fp);
|
||||
|
||||
// load ref-len
|
||||
sprintf(fn, "%s.ref-len", prefix);
|
||||
// fprintf(stderr, "fn: %s\n", fn);
|
||||
fp = xopen(fn, "r");
|
||||
err_check_false(fscanf(fp, "%ld", &hyb->ref_len), EOF);
|
||||
err_fclose(fp);
|
||||
// fprintf(stderr, "ref-len: %ld\n", hyb->ref_len);
|
||||
|
||||
char* kmer_suffix = ".hybrid.kmer";
|
||||
char* data_suffix = ".hybrid.data";
|
||||
// char *kmer_suffix = ".hybrid.learned.kmer";
|
||||
// char *data_suffix = ".hybrid.learned.data";
|
||||
|
||||
#if 0
|
||||
// shm_clear_hyb();
|
||||
// load 2-bit ref
|
||||
__load_hybrid_idx_code(".hybrid.pac", hyb->ref_bits);
|
||||
// load hyb byte-sa
|
||||
__load_hybrid_idx_code(".hybrid.sa", hyb->sa);
|
||||
// load hyb kmer data
|
||||
__load_hybrid_idx_code(kmer_suffix, hyb->kmer_data);
|
||||
// load hyb index data
|
||||
__load_hybrid_idx_code(data_suffix, hyb->index_data);
|
||||
#else
|
||||
shm_keep_hyb(prefix);
|
||||
// load 2-bit ref
|
||||
hyb->ref_bits = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.pac"));
|
||||
// load hyb byte-sa
|
||||
hyb->sa = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), ".hybrid.sa"));
|
||||
// load hyb kmer data
|
||||
hyb->kmer_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), kmer_suffix));
|
||||
// load hyb index data
|
||||
hyb->index_data = (uint8_t*)shm_get_index(strcat(strcpy(fn, prefix), data_suffix));
|
||||
#endif
|
||||
// fprintf(stderr, "文件大小为: %ld 字节, %.2f GB\n", st.st_size, (double)st.st_size / (1024 * 1024 * 1024));
|
||||
return hyb;
|
||||
}
|
||||
|
||||
// 创建正向反向互补bits
|
||||
void create_seq_fb_bits(uint8_t* bs, int len, uint8_t* fs, uint8_t* rs) {
|
||||
if (len > 0) {
|
||||
uint8_t fbp = 0, rbp = 0;
|
||||
int i = 0, j = 0, idxf = 0, idxr = 0;
|
||||
for (; i + 3 < len; i += 4) {
|
||||
fbp = (bs[i] & 3) | (bs[i + 1] & 3) << 2 | (bs[i + 2] & 3) << 4 | (bs[i + 3] & 3) << 6;
|
||||
rbp = (3 - (bs[len - i - 1] & 3)) | (3 - (bs[len - i - 2] & 3)) << 2 | (3 - (bs[len - i - 3] & 3)) << 4 |
|
||||
(3 - (bs[len - i - 4] & 3)) << 6;
|
||||
fs[idxf++] = fbp;
|
||||
rs[idxr++] = rbp;
|
||||
}
|
||||
fbp = 0;
|
||||
rbp = 0;
|
||||
for (; i < len; ++i, ++j) {
|
||||
fbp |= (bs[i] & 3) << j * 2;
|
||||
rbp |= (3 - (bs[len - i - 1] & 3)) << j * 2;
|
||||
}
|
||||
fs[idxf++] = fbp;
|
||||
rs[idxr++] = rbp;
|
||||
}
|
||||
}
|
||||
|
||||
// 将seq和ref正向比对,看最多有多少匹配的bp,seq和ref都是2-bit编码的
|
||||
inline int forward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_end, uint8_t* ref, int64_t ref_pos, int64_t ref_len) {
|
||||
if (seq_pos >= seq_end)
|
||||
return 0;
|
||||
int64_t max_match_len = MIN(ref_len - ref_pos, seq_end - seq_pos);
|
||||
|
||||
int ref_odd = ref_pos & 3;
|
||||
int seq_odd = seq_pos & 3;
|
||||
int64_t i = seq_pos;
|
||||
int64_t j = ref_pos;
|
||||
int match_len = 0;
|
||||
/////////////
|
||||
#define __forward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \
|
||||
uint64_t bp32ref = first_ref; \
|
||||
uint64_t bp32seq = first_seq; \
|
||||
uint64_t cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(__builtin_ctzll(cmp) >> 1, max_match_len); \
|
||||
int first_cmp_len = first_len; \
|
||||
match_len = MIN(first_cmp_len, max_match_len); \
|
||||
i += first_cmp_len; \
|
||||
j += first_cmp_len; \
|
||||
seq_odd = i & 3; \
|
||||
ref_odd = j & 3; \
|
||||
for (; i + 31 < seq_end; i += 32, j += 32, match_len += 32) { \
|
||||
bp32ref = ref_bits; \
|
||||
bp32seq = seq_bits; \
|
||||
cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(match_len + (__builtin_ctzll(cmp) >> 1), max_match_len); \
|
||||
} \
|
||||
if (i < seq_end) { \
|
||||
bp32ref = ref_bits; \
|
||||
bp32seq = seq_bits; \
|
||||
cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(match_len + MIN(__builtin_ctzll(cmp) >> 1, seq_end - i), max_match_len); \
|
||||
match_len = max_match_len; /*match_len += seq_end - i;*/ \
|
||||
}
|
||||
/////////
|
||||
if (seq_odd < ref_odd) { // 调整到ref的整数字节
|
||||
__forward_match_code(32 - ref_odd, (*(uint64_t*)&ref[j >> 2]) >> (ref_odd << 1),
|
||||
(*(uint64_t*)&seq[i >> 2]) << ((ref_odd - seq_odd) << 1) >> (ref_odd << 1),
|
||||
(*(uint64_t*)&ref[j >> 2]),
|
||||
seq[i >> 2] >> (seq_odd << 1) | (*(uint64_t*)&seq[(i >> 2) + 1]) << ((4 - seq_odd) << 1));
|
||||
} else if (seq_odd > ref_odd) { // 调整到seq的整数字节
|
||||
__forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) << ((seq_odd - ref_odd) << 1) >> (seq_odd << 1),
|
||||
(*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1),
|
||||
ref[j >> 2] >> (ref_odd << 1) | (*(uint64_t*)&ref[(j >> 2) + 1]) << ((4 - ref_odd) << 1),
|
||||
(*(uint64_t*)&seq[i >> 2]));
|
||||
} else { // 可以调整到相同的64位地址进行比较了
|
||||
__forward_match_code(32 - seq_odd, (*(uint64_t*)&ref[j >> 2]) >> (seq_odd << 1),
|
||||
(*(uint64_t*)&seq[i >> 2]) >> (seq_odd << 1), (*(uint64_t*)&ref[j >> 2]),
|
||||
(*(uint64_t*)&seq[i >> 2]));
|
||||
}
|
||||
|
||||
return MIN(match_len, max_match_len);
|
||||
}
|
||||
|
||||
// 将seq和ref反向比对,看最多有多少匹配的bp
|
||||
inline int backward_match_len(uint8_t* seq, int64_t seq_pos, int64_t seq_start, uint8_t* ref, int64_t ref_pos) {
|
||||
if (seq_pos < seq_start)
|
||||
return 0;
|
||||
int64_t max_match_len = MIN(ref_pos + 1, seq_pos - seq_start + 1);
|
||||
int64_t i = seq_pos;
|
||||
int64_t j = ref_pos;
|
||||
int seq_odd = 3 - (i & 3);
|
||||
int ref_odd = 3 - (j & 3);
|
||||
int match_len = 0;
|
||||
/////////////
|
||||
#define __backward_tail_code(last_code) \
|
||||
int ext_bp = (7 - (i >> 2)) << 2; \
|
||||
uint64_t bp32ref = *(uint64_t*)(ref + (j >> 2) - 8) >> ((4 - ref_odd) << 1) | (uint64_t)ref[j >> 2] \
|
||||
<< ((ref_odd + 28) << 1); \
|
||||
uint64_t bp32seq = (*(uint64_t*)seq) << ((seq_odd + ext_bp) << 1); \
|
||||
uint64_t cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(match_len + MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \
|
||||
last_code
|
||||
|
||||
#define __backward_match_code(first_len, first_ref, first_seq, ref_bits, seq_bits) \
|
||||
uint64_t bp32ref = first_ref; \
|
||||
uint64_t bp32seq = first_seq; \
|
||||
uint64_t cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(MIN(__builtin_clzll(cmp) >> 1, (int)i + 1 - seq_start), max_match_len); \
|
||||
int first_cmp_len = first_len; \
|
||||
match_len = MIN(first_cmp_len, max_match_len); \
|
||||
i -= first_cmp_len; \
|
||||
j -= first_cmp_len; \
|
||||
seq_odd = 3 - (i & 3); \
|
||||
ref_odd = 3 - (j & 3); \
|
||||
for (; i - 31 >= 0; i -= 32, j -= 32, match_len += 32) { \
|
||||
bp32ref = ref_bits; \
|
||||
bp32seq = seq_bits; \
|
||||
cmp = bp32ref ^ bp32seq; \
|
||||
if (cmp > 0) \
|
||||
return MIN(match_len + (__builtin_clzll(cmp) >> 1), max_match_len); \
|
||||
} \
|
||||
if (i >= seq_start) { \
|
||||
__backward_tail_code(match_len = max_match_len); \
|
||||
}
|
||||
////////////
|
||||
if (i < 32) { // 只需要一次比较
|
||||
__backward_tail_code(return max_match_len);
|
||||
}
|
||||
if (seq_odd < ref_odd) { // 调整到ref的整数字节
|
||||
__backward_match_code(
|
||||
32 - ref_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (ref_odd << 1),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 7]) >> ((ref_odd - seq_odd) << 1) << (ref_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 8] >> ((4 - seq_odd) << 1)) | ((uint64_t)seq[(i >> 2)] << ((seq_odd + 28) << 1)));
|
||||
} else if (seq_odd > ref_odd) { // 调整到seq的整数字节
|
||||
__backward_match_code(
|
||||
32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) >> ((seq_odd - ref_odd) << 1) << (seq_odd << 1),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1),
|
||||
(*(uint64_t*)&ref[(j >> 2) - 8] >> ((4 - ref_odd) << 1)) | ((uint64_t)ref[(j >> 2)] << ((ref_odd + 28) << 1)),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 7]));
|
||||
} else { // 可以调整到相同的64位地址进行比较了
|
||||
__backward_match_code(32 - seq_odd, (*(uint64_t*)&ref[(j >> 2) - 7]) << (seq_odd << 1),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 7]) << (seq_odd << 1), (*(uint64_t*)&ref[(j >> 2) - 7]),
|
||||
(*(uint64_t*)&seq[(i >> 2) - 7]));
|
||||
}
|
||||
|
||||
return MIN(match_len, max_match_len);
|
||||
}
|
||||
|
||||
// 根据sa的行获取对应的ref position(小端模式)
|
||||
uint64_t hyb_sa_to_ref_pos(uint8_t* sa_arr, uint64_t row) {
|
||||
const uint64_t start_byte = ((row << 5) + row) >> 3; // 存储这个sa数据的起始字节
|
||||
uint64_t val = *(uint64_t*)(sa_arr + start_byte);
|
||||
val = (val >> (row & 7)) & 8589934591ULL; // 33-bits mask
|
||||
return val;
|
||||
}
|
||||
|
||||
#define __parse_node_start_no_addr(idata) \
|
||||
*cmp_ref = 1; \
|
||||
uint32_t seq_pos = *seq_pos_p; \
|
||||
uint8_t header = *idata; \
|
||||
idata += 1; \
|
||||
uint8_t node_type = (header >> 6) & 3; \
|
||||
uint8_t hits_neq = header >> 5 & 1; \
|
||||
uint32_t hits_bytes = ((header >> 3) & 3) + 1; \
|
||||
uint32_t off_bytes = header & 7; \
|
||||
uint32_t child_ptr_bytes = hits_bytes + off_bytes;
|
||||
|
||||
// 解析hyb node初始化变量信息
|
||||
#define __parse_node_start_code(idata) \
|
||||
uint8_t* addr = NULL; \
|
||||
__parse_node_start_no_addr(idata)
|
||||
|
||||
// 解析单一碱基序列节点
|
||||
#define __parse_path_node_code(path_len) \
|
||||
uint32_t path_len = (header & 1); \
|
||||
path_len = path_len << 8 | *idata; \
|
||||
idata += 1; \
|
||||
int match_len = forward_match_len(seq_bits, seq_pos, seq_end, idata, 0, path_len); \
|
||||
*seq_pos_p = seq_pos + match_len; \
|
||||
if (match_len == (int)path_len) { \
|
||||
addr = idata + (((path_len << 1) + 7) >> 3); \
|
||||
if (hits_neq) { \
|
||||
*sa_start_p += 1; \
|
||||
*hits_p -= 1; \
|
||||
} \
|
||||
} else \
|
||||
*cmp_ref = 0;
|
||||
|
||||
// 解析正常hyb节点
|
||||
#define __parse_child_node_code(kmer_len, mark_bytes, int_type, kmer_code, bits_count, one) \
|
||||
uint8_t kmer = kmer_code; \
|
||||
int_type mark = *(int_type*)idata; \
|
||||
int_type child_num = mark & (one << kmer); \
|
||||
if (child_num) { \
|
||||
*seq_pos_p += kmer_len; \
|
||||
uint32_t nth_child = bits_count(mark & ((one << kmer) - 1)); \
|
||||
uint8_t has_next_child = bits_count(mark >> kmer >> 1); \
|
||||
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) { \
|
||||
*cmp_ref = 0; \
|
||||
} \
|
||||
if (off_bytes == HYB_LEAF_NODE) { \
|
||||
*hits_p -= nth_child + hits_neq + has_next_child; \
|
||||
*sa_start_p += nth_child + hits_neq; \
|
||||
} else { \
|
||||
if (nth_child == 0) { \
|
||||
idata += mark_bytes; \
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
|
||||
addr = idata + has_next_child * child_ptr_bytes; \
|
||||
*hits_p = hits_start - hits_neq; \
|
||||
*sa_start_p += hits_neq; \
|
||||
} else { \
|
||||
idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
|
||||
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes]; \
|
||||
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes; \
|
||||
if (has_next_child) { \
|
||||
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
|
||||
} else { \
|
||||
*hits_p -= hits_start + hits_neq; \
|
||||
} \
|
||||
*sa_start_p += hits_start; \
|
||||
} \
|
||||
} \
|
||||
} else { \
|
||||
*cmp_ref = 0; \
|
||||
}
|
||||
|
||||
// 当节点不能完全匹配时候,检查是否能匹配该节点包含的部分碱基序列
|
||||
#define __parse_part_node_code(kmer_len, mark_bytes, int_type, kmer_base_code, bits_range, bits_count, one) \
|
||||
uint8_t kmer_base = kmer_base_code; \
|
||||
int_type mark = *(int_type*)idata; \
|
||||
int_type kmer_mask = ((one << bits_range) - 1) << kmer_base; \
|
||||
int_type child_num = mark & kmer_mask; \
|
||||
if (child_num) { \
|
||||
*seq_pos_p += kmer_len; \
|
||||
int_type kmer_pre_mask = (one << kmer_base) - 1; \
|
||||
uint32_t nth_child = bits_count(mark & kmer_pre_mask); \
|
||||
uint8_t has_next_child = bits_count(mark >> kmer_base >> bits_range); \
|
||||
if (off_bytes == HYB_LEAF_NODE) { \
|
||||
*hits_p -= nth_child + hits_neq + has_next_child; \
|
||||
*sa_start_p += nth_child + hits_neq; \
|
||||
} else { \
|
||||
if (nth_child == 0) { \
|
||||
idata += mark_bytes; \
|
||||
uint32_t hits_start = hits_neq; \
|
||||
if (has_next_child) { \
|
||||
child_num = bits_count(child_num); \
|
||||
*hits_p = \
|
||||
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
|
||||
} else { \
|
||||
*hits_p -= hits_start; \
|
||||
} \
|
||||
*sa_start_p += hits_start; \
|
||||
} else { \
|
||||
idata += mark_bytes + (nth_child - 1) * child_ptr_bytes; \
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes]; \
|
||||
*sa_start_p += hits_start; \
|
||||
if (has_next_child) { \
|
||||
child_num = bits_count(child_num); \
|
||||
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start; \
|
||||
} else { \
|
||||
*hits_p -= hits_start + hits_neq; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
*cmp_ref = 0;
|
||||
|
||||
// 解析节点主要代码
|
||||
#define __parse_hyb_node_code(return_code) \
|
||||
if (node_type == HYB_BP_PATH) { \
|
||||
__parse_path_node_code(path_len); \
|
||||
} else if (node_type == HYB_BP_1) { \
|
||||
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1); \
|
||||
} else if (node_type == HYB_BP_2) { \
|
||||
if (seq_pos + 1 < seq_end) { \
|
||||
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1); \
|
||||
if (!child_num) { \
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \
|
||||
} \
|
||||
} else { \
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1); \
|
||||
} \
|
||||
} else { \
|
||||
if (seq_pos + 2 < seq_end) { \
|
||||
__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2], \
|
||||
__builtin_popcountll, 1ULL); \
|
||||
if (!child_num) { \
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, \
|
||||
__builtin_popcountll, 1ULL); \
|
||||
if (!child_num) { \
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
|
||||
} \
|
||||
} \
|
||||
} else if (seq_pos + 1 < seq_end) { \
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll, \
|
||||
1ULL); \
|
||||
if (!child_num) { \
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
|
||||
} \
|
||||
} else { \
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL); \
|
||||
} \
|
||||
} \
|
||||
/*__builtin_prefetch(addr, 0, 3); */ \
|
||||
return_code
|
||||
/////////
|
||||
|
||||
// 解析第一个节点, 返回后续对应的节点地址
|
||||
uint8_t* parse_first_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p,
|
||||
uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) {
|
||||
if (*seq_pos_p == seq_end)
|
||||
return NULL;
|
||||
__parse_node_start_code(idata);
|
||||
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
|
||||
idata += 5;
|
||||
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
|
||||
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
|
||||
idata += hits_bytes;
|
||||
}
|
||||
// __parse_hyb_node_code(return addr);
|
||||
if (node_type == HYB_BP_PATH) {
|
||||
__parse_path_node_code(path_len);
|
||||
} else if (node_type == HYB_BP_1) {
|
||||
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1);
|
||||
} else if (node_type == HYB_BP_2) {
|
||||
if (seq_pos + 1 < seq_end) {
|
||||
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1);
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
}
|
||||
} else {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
}
|
||||
} else {
|
||||
if (seq_pos + 2 < seq_end) {
|
||||
//__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2],
|
||||
// __builtin_popcountll, 1ULL);
|
||||
uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2];
|
||||
uint64_t mark = *(uint64_t*)idata;
|
||||
uint64_t child_num = mark & (1ULL << kmer);
|
||||
if (child_num) {
|
||||
*seq_pos_p += 3;
|
||||
uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1));
|
||||
uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1);
|
||||
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) {
|
||||
*cmp_ref = 0;
|
||||
}
|
||||
if (off_bytes == HYB_LEAF_NODE) {
|
||||
*hits_p -= nth_child + hits_neq + has_next_child;
|
||||
*sa_start_p += nth_child + hits_neq;
|
||||
} else {
|
||||
if (nth_child == 0) {
|
||||
idata += 8;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
addr = idata + has_next_child * child_ptr_bytes;
|
||||
*hits_p = hits_start - hits_neq;
|
||||
*sa_start_p += hits_neq;
|
||||
} else {
|
||||
idata += 8 + (nth_child - 1) * child_ptr_bytes;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes];
|
||||
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes;
|
||||
if (has_next_child) {
|
||||
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start + hits_neq;
|
||||
}
|
||||
*sa_start_p += hits_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*cmp_ref = 0;
|
||||
}
|
||||
|
||||
if (!child_num) {
|
||||
//__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4,
|
||||
//__builtin_popcountll,
|
||||
// 1ULL);
|
||||
uint8_t kmer_base = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2;
|
||||
uint64_t mark = *(uint64_t*)idata;
|
||||
uint64_t kmer_mask = ((1ULL << 4) - 1) << kmer_base;
|
||||
uint64_t child_num = mark & kmer_mask;
|
||||
if (child_num) {
|
||||
*seq_pos_p += 2;
|
||||
uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1;
|
||||
uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask);
|
||||
uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 4);
|
||||
if (off_bytes == HYB_LEAF_NODE) {
|
||||
*hits_p -= nth_child + hits_neq + has_next_child;
|
||||
*sa_start_p += nth_child + hits_neq;
|
||||
} else {
|
||||
if (nth_child == 0) {
|
||||
idata += 8;
|
||||
uint32_t hits_start = hits_neq;
|
||||
if (has_next_child) {
|
||||
child_num = __builtin_popcountll(child_num);
|
||||
*hits_p =
|
||||
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
|
||||
hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start;
|
||||
}
|
||||
*sa_start_p += hits_start;
|
||||
} else {
|
||||
idata += 8 + (nth_child - 1) * child_ptr_bytes;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
*sa_start_p += hits_start;
|
||||
if (has_next_child) {
|
||||
child_num = __builtin_popcountll(child_num);
|
||||
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
|
||||
hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start + hits_neq;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*cmp_ref = 0;
|
||||
if (!child_num) {
|
||||
// __parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
uint8_t kmer_base = seq_bp[seq_pos] << 4;
|
||||
uint64_t mark = *(uint64_t*)idata;
|
||||
uint64_t kmer_mask = ((1ULL << 16) - 1) << kmer_base;
|
||||
uint64_t child_num = mark & kmer_mask;
|
||||
if (child_num) {
|
||||
*seq_pos_p += 1;
|
||||
uint64_t kmer_pre_mask = (1ULL << kmer_base) - 1;
|
||||
uint32_t nth_child = __builtin_popcountll(mark & kmer_pre_mask);
|
||||
uint8_t has_next_child = __builtin_popcountll(mark >> kmer_base >> 16);
|
||||
if (off_bytes == HYB_LEAF_NODE) {
|
||||
*hits_p -= nth_child + hits_neq + has_next_child;
|
||||
*sa_start_p += nth_child + hits_neq;
|
||||
} else {
|
||||
if (nth_child == 0) {
|
||||
idata += 8;
|
||||
uint32_t hits_start = hits_neq;
|
||||
if (has_next_child) {
|
||||
child_num = __builtin_popcountll(child_num);
|
||||
*hits_p =
|
||||
(*(uint32_t*)(idata + (child_num - 1) * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
|
||||
hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start;
|
||||
}
|
||||
*sa_start_p += hits_start;
|
||||
} else {
|
||||
idata += 8 + (nth_child - 1) * child_ptr_bytes;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
*sa_start_p += hits_start;
|
||||
if (has_next_child) {
|
||||
child_num = __builtin_popcountll(child_num);
|
||||
*hits_p = (*(uint32_t*)(idata + child_num * child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) -
|
||||
hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start + hits_neq;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
*cmp_ref = 0;
|
||||
}
|
||||
}
|
||||
} else if (seq_pos + 1 < seq_end) {
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
}
|
||||
} else {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
}
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
// 解析后续的正常节点
|
||||
uint8_t* parse_one_hyb_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p,
|
||||
uint64_t* sa_start_p, uint32_t* hits_p, uint8_t* cmp_ref, int tid) {
|
||||
if (*seq_pos_p == seq_end)
|
||||
return NULL;
|
||||
__parse_node_start_code(idata);
|
||||
// __parse_hyb_node_code(return addr);
|
||||
|
||||
if (node_type == HYB_BP_PATH) {
|
||||
__parse_path_node_code(path_len);
|
||||
} else if (node_type == HYB_BP_1) {
|
||||
__parse_child_node_code(1, 1, uint8_t, seq_bp[seq_pos], __builtin_popcount, 1);
|
||||
} else if (node_type == HYB_BP_2) {
|
||||
if (seq_pos + 1 < seq_end) {
|
||||
__parse_child_node_code(2, 2, uint16_t, seq_bp[seq_pos] << 2 | seq_bp[seq_pos + 1], __builtin_popcount, 1);
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
}
|
||||
} else {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
}
|
||||
} else {
|
||||
if (seq_pos + 2 < seq_end) {
|
||||
//__parse_child_node_code(3, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2],
|
||||
// __builtin_popcountll, 1ULL);
|
||||
uint8_t kmer = seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2 | seq_bp[seq_pos + 2];
|
||||
uint64_t mark = *(uint64_t*)idata;
|
||||
uint64_t child_num = mark & (1ULL << kmer);
|
||||
if (child_num) {
|
||||
*seq_pos_p += 3;
|
||||
uint32_t nth_child = __builtin_popcountll(mark & ((1ULL << kmer) - 1));
|
||||
uint8_t has_next_child = __builtin_popcountll(mark >> kmer >> 1);
|
||||
if (*seq_pos_p >= HYB_MAX_SEQ_LEN) {
|
||||
*cmp_ref = 0;
|
||||
}
|
||||
if (off_bytes == HYB_LEAF_NODE) {
|
||||
*hits_p -= nth_child + hits_neq + has_next_child;
|
||||
*sa_start_p += nth_child + hits_neq;
|
||||
} else {
|
||||
if (nth_child == 0) {
|
||||
idata += 8;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
addr = idata + has_next_child * child_ptr_bytes;
|
||||
*hits_p = hits_start - hits_neq;
|
||||
*sa_start_p += hits_neq;
|
||||
} else {
|
||||
idata += 8 + (nth_child - 1) * child_ptr_bytes;
|
||||
uint32_t hits_start = *(uint32_t*)idata & ga_hybHitsMask[hits_bytes];
|
||||
uint32_t child_offset = *(uint32_t*)(idata + hits_bytes) & ga_hybOffMask[off_bytes];
|
||||
addr = idata + child_offset + (has_next_child + 1) * child_ptr_bytes;
|
||||
if (has_next_child) {
|
||||
*hits_p = (*(uint32_t*)(idata + child_ptr_bytes) & ga_hybHitsMask[hits_bytes]) - hits_start;
|
||||
} else {
|
||||
*hits_p -= hits_start + hits_neq;
|
||||
}
|
||||
*sa_start_p += hits_start;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
*cmp_ref = 0;
|
||||
}
|
||||
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
}
|
||||
}
|
||||
} else if (seq_pos + 1 < seq_end) {
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
if (!child_num) {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
}
|
||||
} else {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
}
|
||||
}
|
||||
return addr;
|
||||
}
|
||||
|
||||
void parse_one_hyb_node_min_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int min_hits,
|
||||
int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) {
|
||||
if (*seq_pos_p == seq_end)
|
||||
return;
|
||||
uint8_t cmp_ref_val = 0;
|
||||
uint8_t* cmp_ref = &cmp_ref_val;
|
||||
__parse_node_start_no_addr(idata);
|
||||
|
||||
if (is_head) {
|
||||
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
|
||||
idata += 5;
|
||||
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
|
||||
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
|
||||
idata += hits_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* prev_idata = idata;
|
||||
uint32_t prev_seq_pos = *seq_pos_p;
|
||||
uint32_t prev_hits = *hits_p;
|
||||
uint64_t prev_sa_start = *sa_start_p;
|
||||
if (node_type == HYB_BP_2) {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
if (*hits_p < min_hits) {
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
}
|
||||
} else if (node_type == HYB_BP_3) {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
if (*hits_p < min_hits) {
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
} else if (seq_pos + 1 < seq_end) {
|
||||
uint32_t pp_seq_pos = prev_seq_pos;
|
||||
uint32_t pp_hits = prev_hits;
|
||||
uint64_t pp_sa_start = prev_sa_start;
|
||||
prev_seq_pos = *seq_pos_p;
|
||||
prev_hits = *hits_p;
|
||||
prev_sa_start = *sa_start_p;
|
||||
*seq_pos_p = pp_seq_pos;
|
||||
*hits_p = pp_hits;
|
||||
*sa_start_p = pp_sa_start;
|
||||
|
||||
idata = prev_idata; // 恢复到上一个节点
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
if (*hits_p < min_hits) {
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void parse_one_hyb_node_max_hits(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, int max_hits, int min_bp,
|
||||
int is_head, uint32_t* seq_pos_p, uint64_t* sa_start_p, uint32_t* hits_p, int tid) {
|
||||
if (*seq_pos_p == seq_end)
|
||||
return;
|
||||
uint8_t cmp_ref_val = 0;
|
||||
uint8_t* cmp_ref = &cmp_ref_val;
|
||||
__parse_node_start_no_addr(idata);
|
||||
|
||||
if (is_head) {
|
||||
*sa_start_p = (*(uint64_t*)idata) & HYB_NODE_SA_MASK;
|
||||
idata += 5;
|
||||
if (*hits_p > HYB_HIT_THRESH) { // 更新hits
|
||||
*hits_p = *((uint32_t*)idata) & ga_hybHitsMask[hits_bytes]; // hits数量
|
||||
idata += hits_bytes;
|
||||
}
|
||||
}
|
||||
|
||||
uint8_t* prev_idata = idata;
|
||||
uint32_t prev_seq_pos = *seq_pos_p;
|
||||
uint32_t prev_hits = *hits_p;
|
||||
uint64_t prev_sa_start = *sa_start_p;
|
||||
if (node_type == HYB_BP_2) {
|
||||
__parse_part_node_code(1, 2, uint16_t, seq_bp[seq_pos] << 2, 4, __builtin_popcount, 1);
|
||||
} else if (node_type == HYB_BP_3) {
|
||||
if (min_bp == 2) {
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
} else {
|
||||
__parse_part_node_code(1, 8, uint64_t, seq_bp[seq_pos] << 4, 16, __builtin_popcountll, 1ULL);
|
||||
if (*hits_p >= max_hits) {
|
||||
*seq_pos_p = prev_seq_pos;
|
||||
*hits_p = prev_hits;
|
||||
*sa_start_p = prev_sa_start;
|
||||
idata = prev_idata; // 恢复到上一个节点
|
||||
__parse_part_node_code(2, 8, uint64_t, seq_bp[seq_pos] << 4 | seq_bp[seq_pos + 1] << 2, 4, __builtin_popcountll,
|
||||
1ULL);
|
||||
}
|
||||
}
|
||||
} else { // path node
|
||||
if (min_bp > 0)
|
||||
*seq_pos_p += min_bp;
|
||||
}
|
||||
}
|
||||
|
||||
// 需要给定初始化的hits和seq_pos
|
||||
#define CALC_STAT 0
|
||||
void get_leaf_node(uint8_t* idata, uint8_t* seq_bits, uint8_t* seq_bp, uint32_t seq_end, uint32_t* seq_pos_p, uint32_t* hits_p,
|
||||
uint64_t* sa_start_p, uint8_t* cmp_ref, int tid) {
|
||||
uint8_t* next_addr = parse_first_hyb_node(idata, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
#if CALC_STAT
|
||||
uint8_t* prev_addr = idata;
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
while (next_addr != NULL && *hits_p > 1) {
|
||||
#if CALC_STAT
|
||||
prev_addr = next_addr;
|
||||
#endif
|
||||
next_addr = parse_one_hyb_node(next_addr, seq_bits, seq_bp, seq_end, seq_pos_p, sa_start_p, hits_p, cmp_ref, tid);
|
||||
#if CALC_STAT
|
||||
if (next_addr != NULL) {
|
||||
// fprintf(stderr, "addr dist: %ld\n", next_addr - prev_addr);
|
||||
uint64_t dist = next_addr - prev_addr;
|
||||
if (dist < 32)
|
||||
gdat[0]++;
|
||||
else if (dist < 64)
|
||||
gdat[1]++;
|
||||
else if (dist < 128)
|
||||
gdat[2]++;
|
||||
else
|
||||
gdat[3]++;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
void get_kmer_data(const HybridIndex* hyb, uint8_t* seq_bits, int kmer_pos, uint8_t* type_hits, uint64_t* offset) {
|
||||
uint64_t kmer = _kmer_from_pos(seq_bits, kmer_pos);
|
||||
uint8_t* kmer_data_addr = hyb->kmer_data + kmer * HYB_KMER_DATA_BYTES;
|
||||
*type_hits = *kmer_data_addr & HYB_KMER_DATA_TYPE_MASK;
|
||||
*offset = (*(uint64_t*)kmer_data_addr & HYB_KMER_DATA_MASK) >> HYB_KMER_DATA_TYPE_BITS;
|
||||
}
|
||||
|
||||
void right_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
|
||||
int kmer_start, int init_match_len, uint64_t ref_pos, int* right_match) {
|
||||
if (ref_pos < hyb->ref_len) {
|
||||
*right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits,
|
||||
ref_pos + init_match_len, hyb->ref_len);
|
||||
} else {
|
||||
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
|
||||
*right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end,
|
||||
hyb->ref_bits, ref_pos - init_match_len);
|
||||
}
|
||||
*right_match += init_match_len; // 包括kmer的长度
|
||||
}
|
||||
|
||||
void left_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
|
||||
int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match) {
|
||||
if (ref_pos < hyb->ref_len) {
|
||||
*left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1);
|
||||
} else {
|
||||
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
|
||||
*left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits,
|
||||
ref_pos + 1, hyb->ref_len);
|
||||
}
|
||||
}
|
||||
|
||||
void both_end_match(const HybridIndex* hyb, const int seq_len, const Range* read_range, uint8_t* for_bits, uint8_t* back_bits,
|
||||
int kmer_start, int init_match_len, uint64_t ref_pos, int* left_match, int* right_match) {
|
||||
if (ref_pos < hyb->ref_len) {
|
||||
*right_match = forward_match_len(for_bits, kmer_start + init_match_len, read_range->end, hyb->ref_bits,
|
||||
ref_pos + init_match_len, hyb->ref_len);
|
||||
*left_match = backward_match_len(for_bits, kmer_start - 1, read_range->start, hyb->ref_bits, ref_pos - 1);
|
||||
} else {
|
||||
ref_pos = (hyb->ref_len << 1) - 1 - ref_pos;
|
||||
*right_match = backward_match_len(back_bits, seq_len - kmer_start - init_match_len - 1, seq_len - read_range->end,
|
||||
hyb->ref_bits, ref_pos - init_match_len);
|
||||
*left_match = forward_match_len(back_bits, seq_len - kmer_start, seq_len - read_range->start, hyb->ref_bits,
|
||||
ref_pos + 1, hyb->ref_len);
|
||||
}
|
||||
*right_match += init_match_len; // 包括kmer的长度
|
||||
}
|
||||
10
kseq.h
10
kseq.h
|
|
@ -221,11 +221,11 @@ typedef struct __kstring_t {
|
|||
kstream_t *f; \
|
||||
} kseq_t;
|
||||
|
||||
#define KSEQ_INIT2(SCOPE, type_t, __read) \
|
||||
KSTREAM_INIT(type_t, __read, 16384) \
|
||||
__KSEQ_TYPE(type_t) \
|
||||
__KSEQ_BASIC(SCOPE, type_t) \
|
||||
__KSEQ_READ(SCOPE)
|
||||
#define KSEQ_INIT2(SCOPE, type_t, __read) \
|
||||
KSTREAM_INIT(type_t, __read, 16777216) /* 16384 */ \
|
||||
__KSEQ_TYPE(type_t) \
|
||||
__KSEQ_BASIC(SCOPE, type_t) \
|
||||
__KSEQ_READ(SCOPE)
|
||||
|
||||
#define KSEQ_INIT(type_t, __read) KSEQ_INIT2(static, type_t, __read)
|
||||
|
||||
|
|
|
|||
7
ksw.h
7
ksw.h
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#define KSW_XBYTE 0x10000
|
||||
#define KSW_XSTOP 0x20000
|
||||
#define KSW_XSUBO 0x40000
|
||||
|
|
@ -106,9 +108,12 @@ extern "C" {
|
|||
*/
|
||||
int ksw_extend(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int gapo, int gape, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
|
||||
int ksw_extend2(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del, int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *qle, int *tle, int *gtle, int *gscore, int *max_off);
|
||||
int ksw_extend2_avx2(int qlen, const uint8_t* query, int tlen, const uint8_t* target, int is_left, int m, const int8_t* mat, int o_del, int e_del,
|
||||
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int* _qle, int* _tle, int* _gtle, int* _gscore,
|
||||
int* _max_off, buf_t* buf);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -0,0 +1,816 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include "utils.h"
|
||||
#include "debug.h"
|
||||
|
||||
#define ELIMINATE_DIFF_1
|
||||
// #define ELIMINATE_DIFF_3
|
||||
|
||||
#define NO_VAL -1
|
||||
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
extern int ksw_extend2_avx2_u8(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
|
||||
int o_ins, int e_ins, int a, int b, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off, buf_t *buf);
|
||||
|
||||
int ksw_extend2_origin(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int is_left, int m, const int8_t *mat, int o_del, int e_del,
|
||||
int o_ins, int e_ins, int w, int end_bonus, int zdrop, int h0, int *_qle, int *_tle, int *_gtle, int *_gscore, int *_max_off);
|
||||
|
||||
static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
||||
{0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0},
|
||||
{0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff}
|
||||
};
|
||||
|
||||
#define permute_mask _MM_SHUFFLE(0, 1, 2, 3)
|
||||
|
||||
// 初始化变量
|
||||
#define SIMD_INIT \
|
||||
int oe_del = o_del + e_del, oe_ins = o_ins + e_ins; \
|
||||
__m256i zero_vec; \
|
||||
__m256i max_vec; \
|
||||
__m256i oe_del_vec; \
|
||||
__m256i oe_ins_vec; \
|
||||
__m256i e_del_vec; \
|
||||
__m256i e_ins_vec; \
|
||||
__m256i h_vec_mask[SIMD_WIDTH]; \
|
||||
zero_vec = _mm256_setzero_si256(); \
|
||||
oe_del_vec = _mm256_set1_epi16(-oe_del); \
|
||||
oe_ins_vec = _mm256_set1_epi16(-oe_ins); \
|
||||
e_del_vec = _mm256_set1_epi16(-e_del); \
|
||||
e_ins_vec = _mm256_set1_epi16(-e_ins); \
|
||||
__m256i match_sc_vec = _mm256_set1_epi16(a); \
|
||||
__m256i mis_sc_vec = _mm256_set1_epi16(-b); \
|
||||
__m256i amb_sc_vec = _mm256_set1_epi16(-1); \
|
||||
__m256i amb_vec = _mm256_set1_epi16(4); \
|
||||
for (i=0; i<SIMD_WIDTH; ++i) h_vec_mask[i] = _mm256_loadu_si256((__m256i*) (&h_vec_int_mask[i]));
|
||||
|
||||
/*
|
||||
* e 表示当前ref的碱基被删除
|
||||
* f 表示当前seq的碱基插入
|
||||
* m 表示当前碱基匹配(可以相等,也可以不想等)
|
||||
* h 表示最大值
|
||||
*/
|
||||
// load向量化数据
|
||||
#define SIMD_LOAD \
|
||||
__m256i m1 = _mm256_loadu_si256((__m256i*) (&mA1[j])); \
|
||||
__m256i e1 = _mm256_loadu_si256((__m256i*) (&eA1[j])); \
|
||||
__m256i m1j1 = _mm256_loadu_si256((__m256i*) (&mA1[j-1])); \
|
||||
__m256i f1j1 = _mm256_loadu_si256((__m256i*) (&fA1[j-1])); \
|
||||
__m256i h0j1 = _mm256_loadu_si256((__m256i*) (&hA0[j-1])); \
|
||||
__m256i qs_vec = _mm256_loadu_si256((__m256i*) (&seq[j-1])); \
|
||||
__m256i ts_vec = _mm256_loadu_si256((__m256i*) (&ref[tlen - i]));
|
||||
|
||||
// 比对ref和seq的序列,计算罚分
|
||||
#define SIMD_CMP_SEQ \
|
||||
__m256i match_mask_vec = _mm256_cmpeq_epi16(qs_vec, ts_vec); \
|
||||
__m256i mis_score_vec = _mm256_andnot_si256(match_mask_vec, mis_sc_vec); \
|
||||
__m256i score_vec = _mm256_and_si256(match_sc_vec, match_mask_vec); \
|
||||
score_vec = _mm256_or_si256(score_vec, mis_score_vec); \
|
||||
__m256i q_amb_mask_vec = _mm256_cmpeq_epi16(qs_vec, amb_vec); \
|
||||
__m256i t_amb_mask_vec = _mm256_cmpeq_epi16(ts_vec, amb_vec); \
|
||||
__m256i amb_mask_vec = _mm256_or_si256(q_amb_mask_vec, t_amb_mask_vec); \
|
||||
score_vec = _mm256_andnot_si256(amb_mask_vec, score_vec); \
|
||||
__m256i amb_score_vec = _mm256_and_si256(amb_mask_vec, amb_sc_vec); \
|
||||
score_vec = _mm256_or_si256(score_vec, amb_score_vec);
|
||||
|
||||
// 向量化计算h, e, f, m
|
||||
#define SIMD_COMPUTE \
|
||||
__m256i en_vec0 = _mm256_add_epi16(m1, oe_del_vec); \
|
||||
__m256i en_vec1 = _mm256_add_epi16(e1, e_del_vec); \
|
||||
__m256i en_vec = _mm256_max_epi16(en_vec0, en_vec1); \
|
||||
__m256i fn_vec0 = _mm256_add_epi16(m1j1, oe_ins_vec); \
|
||||
__m256i fn_vec1 = _mm256_add_epi16(f1j1, e_ins_vec); \
|
||||
__m256i fn_vec = _mm256_max_epi16(fn_vec0, fn_vec1); \
|
||||
__m256i mn_vec0 = _mm256_add_epi16(h0j1, score_vec); \
|
||||
__m256i mn_mask = _mm256_cmpgt_epi16(h0j1, zero_vec); \
|
||||
__m256i mn_vec = _mm256_and_si256(mn_vec0, mn_mask); \
|
||||
__m256i hn_vec0 = _mm256_max_epi16(en_vec, fn_vec); \
|
||||
__m256i hn_vec = _mm256_max_epi16(hn_vec0, mn_vec); \
|
||||
en_vec = _mm256_max_epi16(en_vec, zero_vec); \
|
||||
fn_vec = _mm256_max_epi16(fn_vec, zero_vec); \
|
||||
mn_vec = _mm256_max_epi16(mn_vec, zero_vec); \
|
||||
hn_vec = _mm256_max_epi16(hn_vec, zero_vec);
|
||||
|
||||
// 存储向量化结果
|
||||
#define SIMD_STORE \
|
||||
max_vec = _mm256_max_epi16(max_vec, hn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&eA2[j], en_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&fA2[j], fn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&mA2[j], mn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&hA2[j], hn_vec);
|
||||
|
||||
// 去除多余的部分
|
||||
#define SIMD_REMOVE_EXTRA \
|
||||
en_vec = _mm256_and_si256(en_vec, h_vec_mask[end-j]); \
|
||||
fn_vec = _mm256_and_si256(fn_vec, h_vec_mask[end-j]); \
|
||||
mn_vec = _mm256_and_si256(mn_vec, h_vec_mask[end-j]); \
|
||||
hn_vec = _mm256_and_si256(hn_vec, h_vec_mask[end-j]);
|
||||
|
||||
// 找最大值和位置
|
||||
#define SIMD_FIND_MAX \
|
||||
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 2)); \
|
||||
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 4)); \
|
||||
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 6)); \
|
||||
max_vec = _mm256_max_epu16(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||
max_vec = _mm256_max_epu16(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||
int16_t *maxVal = (int16_t*)&max_vec; \
|
||||
m = MAX(m, maxVal[0]); /*用来解决与BSW结果不一样的第二种情况(上边界)*/ \
|
||||
if (maxVal[0] > 0 && m >= max) { \
|
||||
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
|
||||
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
|
||||
__m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \
|
||||
uint32_t mask = _mm256_movemask_epi8(vcmp); \
|
||||
if (mask > 0) { \
|
||||
int pos = SIMD_WIDTH - 1 - (( __builtin_clz(mask)) >> 1); \
|
||||
mj = j - 1 + pos; \
|
||||
mi = i - 1 - pos; \
|
||||
/*if (m >= max) fprintf(stderr, "%d %d %d %d %d %d %d\n", iend, beg, mi, mj, mask, pos, m);*/ \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// 每轮迭代后,交换数组
|
||||
#define SWAP_DATA_POINTER \
|
||||
int16_t * tmp=hA0; \
|
||||
hA0 = hA1; hA1 = hA2; hA2 = tmp; \
|
||||
tmp = eA1; eA1 = eA2; eA2 = tmp; \
|
||||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||
|
||||
static void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||
{
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||
FILE *query_f = gfq[fnum],
|
||||
*target_f = gft[fnum],
|
||||
*info_f = gfi[fnum];
|
||||
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||
int i;
|
||||
// 处理query
|
||||
for (i = 0; i < qlen; ++i)
|
||||
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||
fprintf(query_f, "\n");
|
||||
// 处理target
|
||||
for (i = 0; i < tlen; ++i)
|
||||
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||
fprintf(target_f, "\n");
|
||||
// 处理其他信息
|
||||
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||
#endif
|
||||
}
|
||||
|
||||
int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
|
||||
const uint8_t *query, // read碱基序列
|
||||
int tlen, // target length reference的长度
|
||||
const uint8_t *target, // reference序列
|
||||
int is_left, // 是不是向左扩展
|
||||
int m, // 碱基种类 (5)
|
||||
const int8_t *mat, // 每个位置的query和target的匹配得分 m*m
|
||||
int o_del, // deletion 错配开始的惩罚系数
|
||||
int e_del, // deletion extension的惩罚系数
|
||||
int o_ins, // insertion 错配开始的惩罚系数
|
||||
int e_ins, // insertion extension的惩罚系数SIMD_BTYES
|
||||
int a, // 碱基match时的分数
|
||||
int b, // 碱基mismatch时的惩罚分数(正数)
|
||||
int w, // 提前剪枝系数,w =100 匹配位置和beg的最大距离
|
||||
int end_bonus,
|
||||
int zdrop,
|
||||
int h0, // 该seed的初始得分(完全匹配query的碱基数)
|
||||
int *_qle, // 匹配得到全局最大得分的碱基在query的位置
|
||||
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
|
||||
int *_gtle, // query全部匹配上的target的长度
|
||||
int *_gscore, // query的端到端匹配得分
|
||||
int *_max_off, // 取得最大得分时在query和reference上位置差的 最大值
|
||||
buf_t *buf) // 之前已经开辟过的缓存
|
||||
{
|
||||
// return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
//fprintf(gf[0], "%d\n", qlen);
|
||||
#ifdef GET_DIFFERENT_EXTENSION_LENGTH
|
||||
if (qlen <= 30) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 0);
|
||||
} else if (qlen < 60) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 1);
|
||||
} else if (qlen < 90) {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 2);
|
||||
} else {
|
||||
write_query_target_sequence(qlen, query, tlen, target, h0, 3);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if (qlen * a + h0 < 255) return ksw_extend2_avx2_u8(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, a, b, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off, buf);
|
||||
|
||||
int16_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M
|
||||
int16_t *seq, *ref;
|
||||
uint8_t *mem;
|
||||
int16_t *qtmem, *vmem;
|
||||
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
|
||||
int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int Dloop = tlen + qlen; // 循环跳出条件
|
||||
int span, beg1, end1; // 边界条件计算
|
||||
int col_size = qlen + 2 + SIMD_WIDTH;
|
||||
int val_mem_size = (col_size * 9 * 2 + 31) >> 5 << 5; // 32字节的整数倍
|
||||
int mem_size = (seq_size + ref_size) * 2 + val_mem_size;
|
||||
|
||||
SIMD_INIT; // 初始化simd用的数据
|
||||
|
||||
assert(h0 > 0);
|
||||
|
||||
// allocate memory
|
||||
//mem = malloc(mem_size);
|
||||
|
||||
if (buf->m < mem_size) {
|
||||
buf->m = mem_size;
|
||||
buf->addr = (uint8_t *)realloc(buf->addr, mem_size);
|
||||
}
|
||||
mem = buf->addr;
|
||||
|
||||
qtmem = (int16_t *)&mem[0];
|
||||
seq=&qtmem[0]; ref=&qtmem[seq_size];
|
||||
if (is_left) {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[qlen - 1 - i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[i];
|
||||
} else {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[tlen - 1 - i];
|
||||
}
|
||||
|
||||
vmem = &ref[ref_size];
|
||||
for (i=0; i<(val_mem_size>>1); i+=SIMD_WIDTH) {
|
||||
_mm256_storeu_si256((__m256i*)&vmem[i], zero_vec);
|
||||
}
|
||||
hA = &vmem[0];
|
||||
mA = &vmem[col_size * 3];
|
||||
eA = &vmem[col_size * 5];
|
||||
fA = &vmem[col_size * 7];
|
||||
|
||||
hA0 = &hA[0]; hA1 = &hA[col_size]; hA2 = &hA1[col_size];
|
||||
mA1 = &mA[0]; mA2 = &mA[col_size];
|
||||
eA1 = &eA[0]; eA2 = &eA[col_size];
|
||||
fA1 = &fA[0]; fA2 = &fA[col_size];
|
||||
|
||||
// adjust $w if it is too large
|
||||
k = m * m;
|
||||
// get the max score
|
||||
for (i = 0, max = 0; i < k; ++i) max = max > mat[i]? max : mat[i];
|
||||
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
|
||||
max_ins = max_ins > 1? max_ins : 1;
|
||||
w = w < max_ins? w : max_ins;
|
||||
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
|
||||
max_del = max_del > 1? max_del : 1;
|
||||
w = w < max_del? w : max_del; // TODO: is this necessary?
|
||||
if (tlen < qlen) w = MIN(tlen - 1, w);
|
||||
|
||||
// DP loop
|
||||
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;;
|
||||
max_off = 0;
|
||||
beg = 1; end = qlen;
|
||||
// init h0
|
||||
hA0[0] = h0; // 左上角
|
||||
|
||||
if (qlen == 0 || tlen == 0) Dloop = 0; // 防止意外情况
|
||||
if (w >= qlen) { max_ie = 0; gscore = 0; }
|
||||
|
||||
int m_last=0;
|
||||
int iend;
|
||||
|
||||
#ifdef ELIMINATE_DIFF_1
|
||||
int midx = 1, icheck = 0, checkspecial = 1;
|
||||
int m3 = 0, m2 = 0, m1 = 0;
|
||||
//int marr[10] = {0};
|
||||
//int marr[b]; memset(marr, 0, 4 * b);
|
||||
#endif
|
||||
|
||||
//int print_flag = 0; //(qlen == 64 && tlen == 123);
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
int dii, djj;
|
||||
int16_t ins[tlen + 1][qlen + 2];
|
||||
int16_t del[tlen + 1][qlen + 2];
|
||||
int16_t score[tlen + 1][qlen + 2];
|
||||
for (dii = 0; dii <= tlen; ++dii)
|
||||
{
|
||||
for (djj = 0; djj <= qlen; ++djj)
|
||||
{
|
||||
ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL;
|
||||
}
|
||||
}
|
||||
for (dii = 1; dii <= tlen; ++dii)
|
||||
{
|
||||
del[dii][0] = MAX(0, h0 - o_del - e_del * dii);
|
||||
score[dii][0] = del[dii][0];
|
||||
}
|
||||
for (djj = 1; djj <= qlen; ++djj)
|
||||
{
|
||||
ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj);
|
||||
score[0][djj] = ins[0][djj];
|
||||
}
|
||||
ins[0][0] = del[0][0] = score[0][0] = h0;
|
||||
#endif
|
||||
|
||||
for (D = 1; LIKELY(D < Dloop); ++D) {
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen) {
|
||||
span = MIN(Dloop-D, w);
|
||||
beg1 = MAX(D-tlen+1, ((D-w) / 2) + 1);
|
||||
} else {
|
||||
span = MIN(D-1, w);
|
||||
beg1 = MAX(1, ((D-w) / 2) + 1);
|
||||
}
|
||||
end1 = MIN(qlen, beg1+span);
|
||||
|
||||
if (beg < beg1) beg = beg1;
|
||||
if (end > end1) end = end1;
|
||||
if (beg > end) break; // 不用计算了,直接跳出,否则hA2没有被赋值,里边是上一轮hA0的值,会出bug
|
||||
|
||||
iend = D - (beg - 1); // ref开始计算的位置,倒序
|
||||
span = end - beg;
|
||||
ibeg = iend - span - 1; // 0开始的ref索引位置
|
||||
|
||||
// 每一轮需要记录的数据
|
||||
int m = 0, mj = -1, mi = -1;
|
||||
max_vec = zero_vec;
|
||||
//if (print_flag)
|
||||
//{
|
||||
//fprintf(stderr, "D: %d, iend: %d, jbeg: %d\n", D, iend, beg);
|
||||
//}
|
||||
// 要处理边界
|
||||
// 左边界 处理f (insert)
|
||||
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end];}
|
||||
// 上边界
|
||||
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
||||
else if (D & 1) {
|
||||
hA1[beg - 1] = 0;
|
||||
hA2[beg - 1] = 0;
|
||||
}
|
||||
|
||||
for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) {
|
||||
// 取数据
|
||||
SIMD_LOAD;
|
||||
// 比对seq,计算罚分
|
||||
SIMD_CMP_SEQ;
|
||||
// 计算
|
||||
SIMD_COMPUTE;
|
||||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
// 剩下的计算单元
|
||||
if (j <= end) {
|
||||
// 取数据
|
||||
SIMD_LOAD;
|
||||
// 比对seq,计算罚分
|
||||
SIMD_CMP_SEQ;
|
||||
// 计算
|
||||
SIMD_COMPUTE;
|
||||
// 去除多余计算的部分
|
||||
SIMD_REMOVE_EXTRA;
|
||||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
|
||||
SIMD_FIND_MAX;
|
||||
|
||||
#ifdef ELIMINATE_DIFF_1
|
||||
// 用来解决与BSW结果不一样的第一种情况(左边界)
|
||||
#if 0
|
||||
if (hA1[0] < b && checkspecial) {
|
||||
int mi;
|
||||
if (hA1[0] == b - 1) {
|
||||
icheck = iend + 1;
|
||||
}
|
||||
for (mi = 0; mi < b - 1; ++mi) {
|
||||
if (midx - mi > 0)
|
||||
marr[mi] = MAX(marr[mi], hA2[midx - mi]);
|
||||
}
|
||||
midx += 1;
|
||||
if (ibeg > icheck)
|
||||
{
|
||||
int stopCalc = 0;
|
||||
for (mi = 0; mi < b - 1; ++mi)
|
||||
{
|
||||
stopCalc |= !marr[mi];
|
||||
}
|
||||
if (stopCalc)
|
||||
break;
|
||||
else
|
||||
checkspecial = 0;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (hA1[0] < 4 && checkspecial) { // b == 4
|
||||
if (hA1[0] == 3) {
|
||||
icheck = iend + 1;
|
||||
} else if (midx == 2) {
|
||||
m2 = MAX(m2, hA2[midx - 1]);
|
||||
} else {
|
||||
m2 = MAX(m2, hA2[midx - 1]);
|
||||
m1 = MAX(m1, hA2[midx - 2]);
|
||||
}
|
||||
m3 = MAX(m3, hA2[midx]);
|
||||
midx += 1;
|
||||
if (ibeg > icheck)
|
||||
{
|
||||
if (!m1 || !m2 || !m3)
|
||||
break;
|
||||
else
|
||||
checkspecial = 0;
|
||||
}
|
||||
|
||||
//if (print_flag) {
|
||||
//fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA2[midx + 1], hA2[midx + 2], hA2[midx + 3], midx);
|
||||
//if (midx > 2) fprintf(stderr, "%d, %d, %d\n", hA2[midx-1], hA2[midx-2], hA2[midx-3]);
|
||||
//fprintf(stderr, "jbeg: %d, ibeg: %d, iend: %d, icheck: %d, hA1: %d, score: %d %d %d, j: %d\n", beg, ibeg, iend, icheck, hA1[0], m1, m2, m3, midx);
|
||||
//}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
for (djj = beg; djj <= end; ++djj)
|
||||
{
|
||||
dii = D - djj + 1;
|
||||
ins[dii][djj] = fA2[djj];
|
||||
del[dii][djj] = eA2[djj];
|
||||
score[dii][djj] = hA2[djj];
|
||||
}
|
||||
//if (print_flag)
|
||||
//{
|
||||
//fprintf(stderr, "score: %d %d %d\n", hA2[beg], hA2[beg+1], hA2[beg+2]);
|
||||
//}
|
||||
#endif
|
||||
|
||||
// 注意最后跳出循环j的值
|
||||
j = end + 1;
|
||||
|
||||
if (j == qlen + 1) {
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
|
||||
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
|
||||
}
|
||||
if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点
|
||||
//if (m == 0 && m_last < 2) break;
|
||||
if (m > max) {
|
||||
max = m, max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
|
||||
} else if (m == max && max_i >= mi && mj > max_j) {
|
||||
max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
|
||||
}
|
||||
else if (zdrop > 0 && mi > -1) {
|
||||
if (mi - max_i > mj - max_j) {
|
||||
if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) break;
|
||||
} else {
|
||||
if (max - m - ((mj - max_j) - (mi - max_i)) * e_ins > zdrop) break;
|
||||
}
|
||||
}
|
||||
|
||||
// 调整计算的边界
|
||||
for (j = beg; LIKELY(j <= end); ++j) { int has_val = hA1[j-1] | hA2[j]; if (has_val) break; }
|
||||
beg = j;
|
||||
for (j = end+1; LIKELY(j >= beg); --j) { int has_val = hA1[j-1] | hA2[j]; if (has_val) break; else hA0[j-1]=0; }
|
||||
end = j + 1 <= qlen? j + 1 : qlen;
|
||||
|
||||
m_last = m;
|
||||
// swap m, h, e, f
|
||||
SWAP_DATA_POINTER;
|
||||
}
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
fprintf(gf[0], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
fprintf(gf[1], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
fprintf(gf[2], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
for (djj = 0; djj < qlen; ++djj) {
|
||||
fprintf(gf[0], "%-4c", "ACGTN"[query[djj]]);
|
||||
fprintf(gf[1], "%-4c", "ACGTN"[query[djj]]);
|
||||
fprintf(gf[2], "%-4c", "ACGTN"[query[djj]]);
|
||||
}
|
||||
fprintf(gf[0], "\n");
|
||||
fprintf(gf[1], "\n");
|
||||
fprintf(gf[2], "\n");
|
||||
for (dii = 0; dii <= tlen; ++dii)
|
||||
{
|
||||
if (dii > 0) {
|
||||
fprintf(gf[0], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
fprintf(gf[1], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
fprintf(gf[2], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
} else {
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
}
|
||||
for (djj = 0; djj <= qlen; ++djj)
|
||||
{
|
||||
fprintf(gf[0], "%-4d", score[dii][djj]);
|
||||
fprintf(gf[1], "%-4d", ins[dii][djj]);
|
||||
fprintf(gf[2], "%-4d", del[dii][djj]);
|
||||
}
|
||||
fprintf(gf[0], "\n");
|
||||
fprintf(gf[1], "\n");
|
||||
fprintf(gf[2], "\n");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
// free(mem);
|
||||
if (_qle) *_qle = max_j + 1;
|
||||
if (_tle) *_tle = max_i + 1;
|
||||
if (_gtle) *_gtle = max_ie + 1;
|
||||
if (_gscore) *_gscore = gscore;
|
||||
if (_max_off) *_max_off = max_off;
|
||||
return max;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
int32_t h, e;
|
||||
} eh_t;
|
||||
|
||||
int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长度
|
||||
const uint8_t *query, // read碱基序列
|
||||
int tlen, // target length reference的长度
|
||||
const uint8_t *target, // reference序列
|
||||
int is_left, // 是不是向左扩展
|
||||
int m, // 碱基种类 (5)
|
||||
const int8_t *mat, // 每个位置的query和target的匹配得分 m*m
|
||||
int o_del, // deletion 错配开始的惩罚系数
|
||||
int e_del, // deletion extension的惩罚系数
|
||||
int o_ins, // insertion 错配开始的惩罚系数
|
||||
int e_ins, // insertion extension的惩罚系数
|
||||
int w, // 提前剪枝系数,w =100 匹配位置和beg的最大距离
|
||||
int end_bonus,
|
||||
int zdrop,
|
||||
int h0, // 该seed的初始得分(完全匹配query的碱基数)
|
||||
int *_qle, // 匹配得到全局最大得分的碱基在query的位置
|
||||
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
|
||||
int *_gtle, // query全部匹配上的target的长度
|
||||
int *_gscore, // query的端到端匹配得分
|
||||
int *_max_off) // 取得最大得分时在query和reference上位置差的 最大值
|
||||
{
|
||||
eh_t *eh; // score array
|
||||
int8_t *qp; // query profile
|
||||
int i, j, k, oe_del = o_del + e_del, oe_ins = o_ins + e_ins, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
uint8_t *qmem, *ref, *seq;
|
||||
assert(h0 > 0);
|
||||
// allocate memory
|
||||
qp = (int8_t *)malloc(qlen * m);
|
||||
eh = (eh_t *)calloc(qlen + 1, 8);
|
||||
qmem = (uint8_t *)malloc(qlen + tlen);
|
||||
seq=(uint8_t*)&qmem[0]; ref=(uint8_t*)&qmem[qlen];
|
||||
if (is_left) {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[qlen - 1 - i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[tlen - 1 - i];
|
||||
} else {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[i];
|
||||
}
|
||||
// generate the query profile
|
||||
for (k = i = 0; k < m; ++k) {
|
||||
const int8_t *p = &mat[k * m];
|
||||
for (j = 0; j < qlen; ++j) qp[i++] = p[seq[j]];
|
||||
}
|
||||
// fill the first row
|
||||
eh[0].h = h0; eh[1].h = h0 > oe_ins? h0 - oe_ins : 0;
|
||||
for (j = 2; j <= qlen && eh[j-1].h > e_ins; ++j)
|
||||
eh[j].h = eh[j-1].h - e_ins;
|
||||
// adjust $w if it is too large
|
||||
k = m * m;
|
||||
for (i = 0, max = 0; i < k; ++i) // get the max score
|
||||
max = max > mat[i]? max : mat[i];
|
||||
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
|
||||
max_ins = max_ins > 1? max_ins : 1;
|
||||
w = w < max_ins? w : max_ins;
|
||||
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
|
||||
max_del = max_del > 1? max_del : 1;
|
||||
w = w < max_del? w : max_del; // TODO: is this necessary?
|
||||
//fprintf(stderr, "%d\n", w);
|
||||
// DP loop
|
||||
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;
|
||||
max_off = 0;
|
||||
beg = 0, end = qlen;
|
||||
|
||||
//int print_flag = 0; //(qlen == 116 && tlen == 241);
|
||||
//fprintf(stderr, "%d %d %d\n", print_flag, qlen, tlen);
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
int dii, djj;
|
||||
int16_t ins[tlen + 1][qlen + 2];
|
||||
int16_t del[tlen + 1][qlen + 2];
|
||||
int16_t score[tlen + 1][qlen + 2];
|
||||
for (dii = 0; dii <= tlen; ++dii)
|
||||
{
|
||||
for (djj = 0; djj <= qlen; ++djj)
|
||||
{
|
||||
ins[dii][djj] = del[dii][djj] = score[dii][djj] = NO_VAL;
|
||||
}
|
||||
}
|
||||
for (dii = 1; dii <= tlen; ++dii)
|
||||
{
|
||||
del[dii][0] = MAX(0, h0 - o_del - e_del * dii);
|
||||
score[dii][0] = del[dii][0];
|
||||
}
|
||||
for (djj = 1; djj <= qlen; ++djj)
|
||||
{
|
||||
ins[0][djj] = MAX(0, h0 - o_ins - e_ins * djj);
|
||||
score[0][djj] = ins[0][djj];
|
||||
}
|
||||
ins[0][0] = del[0][0] = score[0][0] = h0;
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
#ifdef COUNT_CALC_NUM
|
||||
int bsw_cal_num = 0;
|
||||
int real_cal_num = 0;
|
||||
for (i = 0; i < tlen; ++i)
|
||||
{
|
||||
int beg = MAX(0, i - w);
|
||||
int end = MIN(qlen, i + w + 1);
|
||||
if (beg >= end) break;
|
||||
bsw_cal_num += end - beg;
|
||||
}
|
||||
fprintf(gf[0], "start\n%d\n", bsw_cal_num);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef ELIMINATE_DIFF_3
|
||||
int prun_end = qlen; // for test diff_3
|
||||
#endif
|
||||
|
||||
for (i = 0; LIKELY(i < tlen); ++i) {
|
||||
int t, f = 0, h1, m = 0, mj = -1;
|
||||
int8_t *q = &qp[ref[i] * qlen];
|
||||
// apply the band and the constraint (if provided)
|
||||
if (beg < i - w) beg = i - w;
|
||||
if (end > i + w + 1) end = i + w + 1;
|
||||
if (end > qlen) end = qlen; // 没用
|
||||
// compute the first column
|
||||
if (beg == 0) {
|
||||
h1 = h0 - (o_del + e_del * (i + 1));
|
||||
if (h1 < 0) h1 = 0;
|
||||
} else h1 = 0;
|
||||
//m = h1; // 用来解决和VP-BSW结果不一样的第一种情况(左边界)
|
||||
for (j = beg; LIKELY(j < end); ++j) {
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
#ifdef COUNT_CALC_NUM
|
||||
real_cal_num++;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
ins[i+1][j+1] = f;
|
||||
#endif
|
||||
// At the beginning of the loop: eh[j] = { H(i-1,j-1), E(i,j) }, f = F(i,j) and h1 = H(i,j-1)
|
||||
// Similar to SSE2-SW, cells are computed in the following order:
|
||||
// H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||
// E(i+1,j) = max{H(i,j)-gapo, E(i,j)} - gape
|
||||
// F(i,j+1) = max{H(i,j)-gapo, F(i,j)} - gape
|
||||
eh_t *p = &eh[j];
|
||||
int h, M = p->h, e = p->e; // get H(i-1,j-1) and E(i-1,j)
|
||||
p->h = h1; // set H(i,j-1) for the next row
|
||||
M = M? M + q[j] : 0;// separating H and M to disallow a cigar like "100M3I3D20M",保证分值不小于0,sw和nw的区别
|
||||
h = M > e? M : e; // e and f are guaranteed to be non-negative, so h>=0 even if M<0
|
||||
h = h > f? h : f;
|
||||
#ifdef ELIMINATE_DIFF_3
|
||||
if (j >= prun_end && h==0) break; // for test diff_3
|
||||
#endif
|
||||
h1 = h; // save H(i,j) to h1 for the next column
|
||||
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
score[i+1][j+1] = h;
|
||||
#endif
|
||||
mj = m > h? mj : j; // record the position where max score is achieved
|
||||
m = m > h? m : h; // m is stored at eh[mj+1]
|
||||
t = M - oe_del;
|
||||
t = t > 0? t : 0;
|
||||
e -= e_del;
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
del[i + 1][j + 1] = e;
|
||||
#endif
|
||||
e = e > t? e : t; // computed E(i+1,j)
|
||||
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
// del[i+1][j+1] = e;
|
||||
#endif
|
||||
p->e = e; // save E(i+1,j) for the next row
|
||||
t = M - oe_ins;
|
||||
t = t > 0? t : 0;
|
||||
f -= e_ins;
|
||||
f = f > t? f : t; // computed F(i,j+1)
|
||||
}
|
||||
eh[end].h = h1; eh[end].e = 0;
|
||||
if (j == qlen) {
|
||||
max_ie = gscore > h1? max_ie : i;
|
||||
gscore = gscore > h1? gscore : h1;
|
||||
}
|
||||
if (m == 0) break;
|
||||
if (m > max) {
|
||||
max = m, max_i = i, max_j = mj;
|
||||
max_off = max_off > abs(mj - i)? max_off : abs(mj - i);
|
||||
//fprintf(stderr, "%d %d %d %d\n", i, mj, max_off, m);
|
||||
} else if (zdrop > 0) {
|
||||
if (i - max_i > mj - max_j) {
|
||||
if (max - m - ((i - max_i) - (mj - max_j)) * e_del > zdrop) break;
|
||||
} else {
|
||||
if (max - m - ((mj - max_j) - (i - max_i)) * e_ins > zdrop) break;
|
||||
}
|
||||
}
|
||||
// update beg and end for the next round
|
||||
for (j = beg; LIKELY(j < end) && eh[j].h == 0 && eh[j].e == 0; ++j); // 这里为什么不考虑f(insert score)
|
||||
beg = j;
|
||||
for (j = end; LIKELY(j >= beg) && eh[j].h == 0 && eh[j].e == 0; --j);
|
||||
#ifdef ELIMINATE_DIFF_3
|
||||
prun_end = j + 2 < qlen ? j + 2 : qlen; end = qlen; // for test diff_3
|
||||
#else
|
||||
end = j + 2 < qlen? j + 2 : qlen;
|
||||
#endif
|
||||
// beg = 0; end = qlen; // uncomment this line for debugging
|
||||
// if (print_flag) {
|
||||
// fprintf(stderr, "beg: %d; end: %d\n", beg, end);
|
||||
// }
|
||||
}
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
#ifdef DEBUG_SW_EXTEND
|
||||
fprintf(gf[0], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
fprintf(gf[1], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
fprintf(gf[2], "qlen: %d, tlen: %d, h0: %d, w: %d, mi: %d, mj: %d, mie: %d, max_off: %d, score: %d, max: %d\n", qlen, tlen, h0, w, max_i + 1, max_j + 1, max_ie + 1, max_off, gscore, max);
|
||||
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
for (djj = 0; djj < qlen; ++djj)
|
||||
{
|
||||
fprintf(gf[0], "%-4c", "ACGTN"[query[djj]]);
|
||||
fprintf(gf[1], "%-4c", "ACGTN"[query[djj]]);
|
||||
fprintf(gf[2], "%-4c", "ACGTN"[query[djj]]);
|
||||
}
|
||||
fprintf(gf[0], "\n");
|
||||
fprintf(gf[1], "\n");
|
||||
fprintf(gf[2], "\n");
|
||||
for (dii = 0; dii <= tlen; ++dii)
|
||||
{
|
||||
if (dii > 0)
|
||||
{
|
||||
fprintf(gf[0], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
fprintf(gf[1], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
fprintf(gf[2], "%-4c", "ACGTN"[target[dii - 1]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(gf[0], "%-4d", -1);
|
||||
fprintf(gf[1], "%-4d", -1);
|
||||
fprintf(gf[2], "%-4d", -1);
|
||||
}
|
||||
for (djj = 0; djj <= qlen; ++djj)
|
||||
{
|
||||
fprintf(gf[0], "%-4d", score[dii][djj]);
|
||||
fprintf(gf[1], "%-4d", ins[dii][djj]);
|
||||
fprintf(gf[2], "%-4d", del[dii][djj]);
|
||||
}
|
||||
fprintf(gf[0], "\n");
|
||||
fprintf(gf[1], "\n");
|
||||
fprintf(gf[2], "\n");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
#ifdef COUNT_CALC_NUM
|
||||
fprintf(gf[0], "%d\nend\n", real_cal_num);
|
||||
#endif
|
||||
#endif
|
||||
|
||||
free(eh); free(qp); free(qmem);
|
||||
if (_qle) *_qle = max_j + 1;
|
||||
if (_tle) *_tle = max_i + 1;
|
||||
if (_gtle) *_gtle = max_ie + 1;
|
||||
if (_gscore) *_gscore = gscore;
|
||||
if (_max_off) *_max_off = max_off;
|
||||
return max;
|
||||
}
|
||||
|
|
@ -0,0 +1,454 @@
|
|||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <immintrin.h>
|
||||
#include <emmintrin.h>
|
||||
#include "utils.h"
|
||||
|
||||
#define ELIMINATE_DIFF_1
|
||||
|
||||
#define SIMD_WIDTH 32
|
||||
|
||||
static const uint8_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
||||
{0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}
|
||||
};
|
||||
|
||||
//static const uint8_t reverse_mask[SIMD_WIDTH] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
|
||||
#define permute_mask _MM_SHUFFLE(0, 1, 2, 3)
|
||||
//const int permute_mask = _MM_SHUFFLE(0, 1, 2, 3);
|
||||
// 初始化变量
|
||||
#define SIMD_INIT \
|
||||
int oe_del = o_del + e_del, oe_ins = o_ins + e_ins; \
|
||||
__m256i zero_vec; \
|
||||
__m256i max_vec; \
|
||||
__m256i oe_del_vec; \
|
||||
__m256i oe_ins_vec; \
|
||||
__m256i e_del_vec; \
|
||||
__m256i e_ins_vec; \
|
||||
__m256i h_vec_mask[SIMD_WIDTH]; \
|
||||
zero_vec = _mm256_setzero_si256(); \
|
||||
oe_del_vec = _mm256_set1_epi8(oe_del); \
|
||||
oe_ins_vec = _mm256_set1_epi8(oe_ins); \
|
||||
e_del_vec = _mm256_set1_epi8(e_del); \
|
||||
e_ins_vec = _mm256_set1_epi8(e_ins); \
|
||||
__m256i match_sc_vec = _mm256_set1_epi8(a); \
|
||||
__m256i mis_sc_vec = _mm256_set1_epi8(b); \
|
||||
__m256i amb_sc_vec = _mm256_set1_epi8(1); \
|
||||
__m256i amb_vec = _mm256_set1_epi8(4); \
|
||||
for (i = 0; i < SIMD_WIDTH; ++i) h_vec_mask[i] = _mm256_loadu_si256((__m256i *)(&h_vec_int_mask[i]));
|
||||
|
||||
/*
|
||||
* e 表示当前ref的碱基被删除
|
||||
* f 表示当前seq的碱基插入
|
||||
* m 表示当前碱基匹配(可以相等,也可以不想等)
|
||||
* h 表示最大值
|
||||
*/
|
||||
// load向量化数据
|
||||
#define SIMD_LOAD \
|
||||
__m256i m1 = _mm256_loadu_si256((__m256i*) (&mA1[j])); \
|
||||
__m256i e1 = _mm256_loadu_si256((__m256i*) (&eA1[j])); \
|
||||
__m256i m1j1 = _mm256_loadu_si256((__m256i*) (&mA1[j-1])); \
|
||||
__m256i f1j1 = _mm256_loadu_si256((__m256i*) (&fA1[j-1])); \
|
||||
__m256i h0j1 = _mm256_loadu_si256((__m256i*) (&hA0[j-1])); \
|
||||
__m256i qs_vec = _mm256_loadu_si256((__m256i*) (&seq[j-1])); \
|
||||
__m256i ts_vec = _mm256_loadu_si256((__m256i*) (&ref[tlen - i]));
|
||||
|
||||
// 比对ref和seq的序列,计算罚分
|
||||
#define SIMD_CMP_SEQ \
|
||||
__m256i match_mask_vec = _mm256_cmpeq_epi8(qs_vec, ts_vec); \
|
||||
__m256i mis_score_vec = _mm256_andnot_si256(match_mask_vec, mis_sc_vec); \
|
||||
__m256i match_score_vec = _mm256_and_si256(match_sc_vec, match_mask_vec); \
|
||||
__m256i q_amb_mask_vec = _mm256_cmpeq_epi8(qs_vec, amb_vec); \
|
||||
__m256i t_amb_mask_vec = _mm256_cmpeq_epi8(ts_vec, amb_vec); \
|
||||
__m256i amb_mask_vec = _mm256_or_si256(q_amb_mask_vec, t_amb_mask_vec); \
|
||||
__m256i amb_score_vec = _mm256_and_si256(amb_mask_vec, amb_sc_vec); \
|
||||
mis_score_vec = _mm256_andnot_si256(amb_mask_vec, mis_score_vec); \
|
||||
mis_score_vec = _mm256_or_si256(amb_score_vec, mis_score_vec); \
|
||||
match_score_vec = _mm256_andnot_si256(amb_mask_vec, match_score_vec);
|
||||
|
||||
// 向量化计算h, e, f, m
|
||||
#define SIMD_COMPUTE \
|
||||
__m256i en_vec0 = _mm256_max_epu8(m1, oe_del_vec); \
|
||||
en_vec0 = _mm256_subs_epu8(en_vec0, oe_del_vec); \
|
||||
__m256i en_vec1 = _mm256_max_epu8(e1, e_del_vec); \
|
||||
en_vec1 = _mm256_subs_epu8(en_vec1, e_del_vec); \
|
||||
__m256i en_vec = _mm256_max_epu8(en_vec0, en_vec1); \
|
||||
__m256i fn_vec0 = _mm256_max_epu8(m1j1, oe_ins_vec); \
|
||||
fn_vec0 = _mm256_subs_epu8(fn_vec0, oe_ins_vec); \
|
||||
__m256i fn_vec1 = _mm256_max_epu8(f1j1, e_ins_vec); \
|
||||
fn_vec1 = _mm256_subs_epu8(fn_vec1, e_ins_vec); \
|
||||
__m256i fn_vec = _mm256_max_epu8(fn_vec0, fn_vec1); \
|
||||
__m256i mn_vec0 = _mm256_adds_epu8(h0j1, match_score_vec); \
|
||||
mn_vec0 = _mm256_max_epu8(mn_vec0, mis_score_vec); \
|
||||
mn_vec0 = _mm256_subs_epu8(mn_vec0, mis_score_vec); \
|
||||
__m256i mn_mask = _mm256_cmpeq_epi8(h0j1, zero_vec); \
|
||||
__m256i mn_vec = _mm256_andnot_si256(mn_mask, mn_vec0); \
|
||||
__m256i hn_vec0 = _mm256_max_epu8(en_vec, fn_vec); \
|
||||
__m256i hn_vec = _mm256_max_epu8(hn_vec0, mn_vec);
|
||||
|
||||
// 存储向量化结果
|
||||
#define SIMD_STORE \
|
||||
max_vec = _mm256_max_epu8(max_vec, hn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&eA2[j], en_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&fA2[j], fn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&mA2[j], mn_vec); \
|
||||
_mm256_storeu_si256((__m256i*)&hA2[j], hn_vec);
|
||||
|
||||
// 去除多余的部分
|
||||
#define SIMD_REMOVE_EXTRA \
|
||||
en_vec = _mm256_and_si256(en_vec, h_vec_mask[end-j]); \
|
||||
fn_vec = _mm256_and_si256(fn_vec, h_vec_mask[end-j]); \
|
||||
mn_vec = _mm256_and_si256(mn_vec, h_vec_mask[end-j]); \
|
||||
hn_vec = _mm256_and_si256(hn_vec, h_vec_mask[end-j]);
|
||||
|
||||
#define __max_32(xx) \
|
||||
do { \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 8)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 4)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 2)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 1)); \
|
||||
maxVal[0] = MAX(maxVal[0], maxVal[16]); \
|
||||
} while (0)
|
||||
|
||||
// 找最大值和位置
|
||||
#define SIMD_FIND_MAX_NEW \
|
||||
uint8_t *maxVal = (uint8_t *)&(max_vec); \
|
||||
__max_32(max_vec); \
|
||||
m = MAX(m, maxVal[0]); \
|
||||
if (maxVal[0] > 0 && m >= max) { \
|
||||
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) { \
|
||||
__m256i h2_vec = _mm256_loadu_si256((__m256i *)(&hA2[j])); \
|
||||
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
|
||||
uint32_t mask = _mm256_movemask_epi8(vcmp); \
|
||||
if (mask > 0) { \
|
||||
int pos = SIMD_WIDTH - 1 - __builtin_clz(mask); \
|
||||
mj = j - 1 + pos; \
|
||||
mi = i - 1 - pos; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SIMD_FIND_MAX \
|
||||
uint8_t *maxVal = (uint8_t *)&max_vec; \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 1)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 2)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 3)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 4)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 5)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 6)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 7)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_alignr_epi8(max_vec, max_vec, 8)); \
|
||||
max_vec = _mm256_max_epu8(max_vec, _mm256_permute2x128_si256(max_vec, max_vec, 0x01)); \
|
||||
m = MAX(m, maxVal[0]); \
|
||||
if (maxVal[0] > 0 && m >= max) { \
|
||||
for (j = beg, i = iend; j <= end; j += SIMD_WIDTH, i -= SIMD_WIDTH) { \
|
||||
__m256i h2_vec = _mm256_loadu_si256((__m256i *)(&hA2[j])); \
|
||||
__m256i vcmp = _mm256_cmpeq_epi8(h2_vec, max_vec); \
|
||||
uint32_t mask = _mm256_movemask_epi8(vcmp); \
|
||||
if (mask > 0) { \
|
||||
int pos = SIMD_WIDTH - 1 - __builtin_clz(mask); \
|
||||
mj = j - 1 + pos; \
|
||||
mi = i - 1 - pos; \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
// 每轮迭代后,交换数组
|
||||
#define SWAP_DATA_POINTER \
|
||||
uint8_t * tmp=hA0; \
|
||||
hA0 = hA1; hA1 = hA2; hA2 = tmp; \
|
||||
tmp = eA1; eA1 = eA2; eA2 = tmp; \
|
||||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||
|
||||
|
||||
int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长度
|
||||
const uint8_t *query, // read碱基序列
|
||||
int tlen, // target length reference的长度
|
||||
const uint8_t *target, // reference序列
|
||||
int is_left, // 是不是向左扩展
|
||||
int m, // 碱基种类 (5)
|
||||
const int8_t *mat, // 每个位置的query和target的匹配得分 m*m
|
||||
int o_del, // deletion 错配开始的惩罚系数
|
||||
int e_del, // deletion extension的惩罚系数
|
||||
int o_ins, // insertion 错配开始的惩罚系数
|
||||
int e_ins, // insertion extension的惩罚系数
|
||||
int a, // 碱基match时的分数
|
||||
int b, // 碱基mismatch时的惩罚分数(正数)
|
||||
int w, // 提前剪枝系数,w =100 匹配位置和beg的最大距离
|
||||
int end_bonus,
|
||||
int zdrop,
|
||||
int h0, // 该seed的初始得分(完全匹配query的碱基数)
|
||||
int *_qle, // 匹配得到全局最大得分的碱基在query的位置
|
||||
int *_tle, // 匹配得到全局最大得分的碱基在reference的位置
|
||||
int *_gtle, // query全部匹配上的target的长度
|
||||
int *_gscore, // query的端到端匹配得分
|
||||
int *_max_off, // 取得最大得分时在query和reference上位置差的 最大值
|
||||
buf_t *buf) // 之前已经开辟过的缓存
|
||||
{
|
||||
uint8_t *mA,*hA, *eA, *fA, *mA1, *mA2, *hA0, *hA1, *eA1, *fA1, *hA2, *eA2, *fA2; // hA0保存上上个col的H,其他的保存上个H E F M
|
||||
uint8_t *seq, *ref;
|
||||
uint8_t *mem, *qtmem, *vmem;
|
||||
int seq_size = qlen + SIMD_WIDTH, ref_size = tlen + SIMD_WIDTH;
|
||||
int i, ibeg, D, j, k, beg, end, max, max_i, max_j, max_ins, max_del, max_ie, gscore, max_off;
|
||||
int Dloop = tlen + qlen; // 循环跳出条件
|
||||
int span, beg1, end1; // 边界条件计算
|
||||
int col_size = qlen + 2 + SIMD_WIDTH;
|
||||
int val_mem_size = (col_size * 9 + 31) >> 5 << 5; // 32字节的整数倍
|
||||
int mem_size = seq_size + ref_size + val_mem_size;
|
||||
|
||||
SIMD_INIT; // 初始化simd用的数据
|
||||
|
||||
assert(h0 > 0);
|
||||
|
||||
// allocate memory
|
||||
//mem = malloc(mem_size);
|
||||
if (buf->m < mem_size) {
|
||||
buf->m = mem_size;
|
||||
buf->addr = (uint8_t *)realloc(buf->addr, mem_size);
|
||||
}
|
||||
mem = buf->addr;
|
||||
|
||||
qtmem = &mem[0];
|
||||
seq=(uint8_t*)&qtmem[0]; ref=(uint8_t*)&qtmem[seq_size];
|
||||
if (is_left) {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[qlen - 1 - i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[i];
|
||||
} else {
|
||||
for (i=0; i<qlen; ++i) seq[i] = query[i];
|
||||
for (i=0; i<tlen; ++i) ref[i] = target[tlen - 1 - i];
|
||||
}
|
||||
|
||||
vmem = &ref[ref_size];
|
||||
for (i=0; i<val_mem_size; i+=SIMD_WIDTH) {
|
||||
_mm256_storeu_si256((__m256i*)&vmem[i], zero_vec);
|
||||
}
|
||||
|
||||
hA = &vmem[0];
|
||||
mA = &vmem[col_size * 3];
|
||||
eA = &vmem[col_size * 5];
|
||||
fA = &vmem[col_size * 7];
|
||||
|
||||
hA0 = &hA[0]; hA1 = &hA[col_size]; hA2 = &hA1[col_size];
|
||||
mA1 = &mA[0]; mA2 = &mA[col_size];
|
||||
eA1 = &eA[0]; eA2 = &eA[col_size];
|
||||
fA1 = &fA[0]; fA2 = &fA[col_size];
|
||||
|
||||
// adjust $w if it is too large
|
||||
k = m * m;
|
||||
// get the max score
|
||||
for (i = 0, max = 0; i < k; ++i) max = max > mat[i]? max : mat[i];
|
||||
max_ins = (int)((double)(qlen * max + end_bonus - o_ins) / e_ins + 1.);
|
||||
max_ins = max_ins > 1? max_ins : 1;
|
||||
w = w < max_ins? w : max_ins;
|
||||
max_del = (int)((double)(qlen * max + end_bonus - o_del) / e_del + 1.);
|
||||
max_del = max_del > 1? max_del : 1;
|
||||
w = w < max_del? w : max_del; // TODO: is this necessary?
|
||||
if (tlen < qlen) w = MIN(tlen - 1, w);
|
||||
|
||||
// DP loop
|
||||
max = h0, max_i = max_j = -1; max_ie = -1, gscore = -1;;
|
||||
max_off = 0;
|
||||
beg = 1; end = qlen;
|
||||
// init h0
|
||||
hA0[0] = h0; // 左上角
|
||||
|
||||
if (qlen == 0 || tlen == 0) Dloop = 0; // 防止意外情况
|
||||
if (w >= qlen) { max_ie = 0; gscore = 0; }
|
||||
|
||||
int m_last=0;
|
||||
int iend;
|
||||
#ifdef ELIMINATE_DIFF_1
|
||||
int midx = 1, icheck = 0, checkspecial = 1;
|
||||
int m3 = 0, m2 = 0, m1 = 0;
|
||||
// int marr[10] = {0};
|
||||
// int marr[b]; memset(marr, 0, 4 * b);
|
||||
#endif
|
||||
for (D = 1; LIKELY(D < Dloop); ++D) {
|
||||
// 边界条件一定要注意! tlen 大于,等于,小于 qlen时的情况
|
||||
if (D > tlen) {
|
||||
span = MIN(Dloop-D, w);
|
||||
beg1 = MAX(D-tlen+1, ((D-w) / 2) + 1);
|
||||
} else {
|
||||
span = MIN(D-1, w);
|
||||
beg1 = MAX(1, ((D-w) / 2) + 1);
|
||||
}
|
||||
end1 = MIN(qlen, beg1+span);
|
||||
|
||||
if (beg < beg1) beg = beg1;
|
||||
if (end > end1) end = end1;
|
||||
if (beg > end) break; // 不用计算了,直接跳出,否则hA2没有被赋值,里边是上一轮hA0的值,会出bug
|
||||
|
||||
iend = D - (beg - 1); // ref开始计算的位置,倒序
|
||||
span = end - beg;
|
||||
ibeg = iend - span - 1; // 0开始的ref索引位置
|
||||
|
||||
// 每一轮需要记录的数据
|
||||
int m = 0, mj = -1, mi = -1;
|
||||
max_vec = zero_vec;
|
||||
|
||||
// 要处理边界
|
||||
// 左边界 处理f (insert)
|
||||
if (ibeg == 0) { hA1[end] = MAX(0, h0 - (o_ins + e_ins * end)); m = hA1[end]; }
|
||||
// 上边界
|
||||
if (beg == 1) { hA1[0] = MAX(0, h0 - (o_del + e_del * iend)); }
|
||||
else if (D & 1) {
|
||||
hA1[beg - 1] = 0;
|
||||
hA2[beg - 1] = 0;
|
||||
}
|
||||
|
||||
for (j=beg, i=iend; j<=end+1-SIMD_WIDTH; j+=SIMD_WIDTH, i-=SIMD_WIDTH) {
|
||||
// 取数据
|
||||
SIMD_LOAD;
|
||||
// 比对seq,计算罚分
|
||||
SIMD_CMP_SEQ;
|
||||
// 计算
|
||||
SIMD_COMPUTE;
|
||||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
// 剩下的计算单元
|
||||
if (j <= end) {
|
||||
// 取数据
|
||||
SIMD_LOAD;
|
||||
// 比对seq,计算罚分
|
||||
SIMD_CMP_SEQ;
|
||||
// 计算
|
||||
SIMD_COMPUTE;
|
||||
// 去除多余计算的部分
|
||||
SIMD_REMOVE_EXTRA;
|
||||
// 存储结果
|
||||
SIMD_STORE;
|
||||
}
|
||||
|
||||
SIMD_FIND_MAX;
|
||||
|
||||
#ifdef ELIMINATE_DIFF_1
|
||||
#if 0
|
||||
if (hA1[0] < b && checkspecial) {
|
||||
int mi;
|
||||
if (hA1[0] == b - 1) {
|
||||
icheck = iend + 1;
|
||||
}
|
||||
for (mi = 0; mi < b - 1; ++mi) {
|
||||
if (midx - mi > 0)
|
||||
marr[mi] = MAX(marr[mi], hA2[midx - mi]);
|
||||
}
|
||||
midx += 1;
|
||||
if (ibeg > icheck)
|
||||
{
|
||||
int stopCalc = 0;
|
||||
for (mi = 0; mi < b - 1; ++mi)
|
||||
{
|
||||
stopCalc |= !marr[mi];
|
||||
}
|
||||
if (stopCalc)
|
||||
break;
|
||||
else
|
||||
checkspecial = 0;
|
||||
}
|
||||
}
|
||||
#else
|
||||
if (hA1[0] < 4 && checkspecial)
|
||||
{ // b == 4
|
||||
if (hA1[0] == 3)
|
||||
{
|
||||
icheck = iend + 1;
|
||||
}
|
||||
else if (midx == 2)
|
||||
{
|
||||
m2 = MAX(m2, hA2[midx - 1]);
|
||||
}
|
||||
else
|
||||
{
|
||||
m2 = MAX(m2, hA2[midx - 1]);
|
||||
m1 = MAX(m1, hA2[midx - 2]);
|
||||
}
|
||||
m3 = MAX(m3, hA2[midx]);
|
||||
midx += 1;
|
||||
if (ibeg > icheck)
|
||||
{
|
||||
if (!m1 || !m2 || !m3)
|
||||
break;
|
||||
else
|
||||
checkspecial = 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// 注意最后跳出循环j的值
|
||||
j = end + 1;
|
||||
|
||||
if (j == qlen + 1) {
|
||||
max_ie = gscore > hA2[qlen] ? max_ie : ibeg;
|
||||
gscore = gscore > hA2[qlen] ? gscore : hA2[qlen];
|
||||
}
|
||||
if (m == 0 && m_last==0) break; // 一定要注意,斜对角遍历和按列遍历的不同点
|
||||
if (m > max) {
|
||||
max = m, max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi)? max_off : abs(mj - mi);
|
||||
}
|
||||
else if (m == max && max_i >= mi && mj > max_j) {
|
||||
max_i = mi, max_j = mj;
|
||||
max_off = max_off > abs(mj - mi) ? max_off : abs(mj - mi);
|
||||
}
|
||||
else if (zdrop > 0 && mi > -1) {
|
||||
if (mi - max_i > mj - max_j) {
|
||||
if (max - m - ((mi - max_i) - (mj - max_j)) * e_del > zdrop) break;
|
||||
} else {
|
||||
if (max - m - ((mj - max_j) - (mi - max_i)) * e_ins > zdrop) break;
|
||||
}
|
||||
}
|
||||
|
||||
// 调整计算的边界
|
||||
for (j = beg; LIKELY(j <= end); ++j) { int has_val = hA1[j-1] | hA2[j]; if (has_val) break; }
|
||||
beg = j;
|
||||
for (j = end+1; LIKELY(j >= beg); --j) { int has_val = hA1[j-1] | hA2[j]; if (has_val) break; else hA0[j-1]=0; }
|
||||
end = j + 1 <= qlen? j + 1 : qlen;
|
||||
|
||||
m_last = m;
|
||||
// swap m, h, e, f
|
||||
SWAP_DATA_POINTER;
|
||||
}
|
||||
|
||||
//free(mem);
|
||||
if (_qle) *_qle = max_j + 1;
|
||||
if (_tle) *_tle = max_i + 1;
|
||||
if (_gtle) *_gtle = max_ie + 1;
|
||||
if (_gscore) *_gscore = gscore;
|
||||
if (_max_off) *_max_off = max_off;
|
||||
return max;
|
||||
}
|
||||
8
main.c
8
main.c
|
|
@ -56,8 +56,10 @@ int main_maxk(int argc, char *argv[]);
|
|||
int bwa_bwt2kmer(int argc, char* argv[]); // create kmer-index from bwt
|
||||
int bwa_bwt2fullbytesa(int argc, char* argv[]); // create full byte-based Suffix-Array
|
||||
int bwa_bwt2hyb(int argc, char* argv[]); // create hybrid-index
|
||||
int bwa_pac2hybpac(int argc, char* argv[]); // convert pac to hyb.pac
|
||||
int bwa_extract_sa(int argc, char* argv[]); // extract suffix array from non-sampled suffix array
|
||||
int bwa_extract_byte_sa(int argc, char* argv[]); // extract suffix array from non-sampled suffix array
|
||||
int main_shm_hyb(int argc, char* argv[]); // manage hybrid index in shared memory
|
||||
|
||||
int hyb_test(int argc, char* argv[]); // for test
|
||||
|
||||
|
|
@ -86,8 +88,10 @@ static int usage()
|
|||
fprintf(stderr, " bwt2fullbytesa generate SA(using byte array) from BWT and Occ\n");
|
||||
fprintf(stderr, " bwt2kmer generate kmer hash index from bwt to accelarate the first 14 bases in seeding process.\n");
|
||||
fprintf(stderr, " bwt2hyb generate hybrid index from BWT\n");
|
||||
fprintf(stderr, " pac2hybpac convert pac to hyb.pac\n");
|
||||
fprintf(stderr, " extractsa generate sa from full byte suffix array\n");
|
||||
fprintf(stderr, " extractbytesa generate byte sa from full byte suffix array\n");
|
||||
fprintf(stderr, " hybshm manage hybrid index in shared memory\n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr,
|
||||
"Note: To use BWA, you need to first index the genome with `bwa index'.\n"
|
||||
|
|
@ -128,9 +132,11 @@ int main(int argc, char *argv[])
|
|||
else if (strcmp(argv[1], "bwt2fullbytesa") == 0) ret = bwa_bwt2fullbytesa(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "bwt2kmer") == 0) ret = bwa_bwt2kmer(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "bwt2hyb") == 0) ret = bwa_bwt2hyb(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "pac2hybpac") == 0) ret = bwa_pac2hybpac(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "extractsa") == 0) ret = bwa_extract_sa(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "extractbytesa") == 0) ret = bwa_extract_byte_sa(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "hybtest") == 0) ret = hyb_test(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "hybshm") == 0) ret = main_shm_hyb(argc - 1, argv + 1);
|
||||
else if (strcmp(argv[1], "hybtest") == 0) ret = hyb_test(argc - 1, argv + 1);
|
||||
else {
|
||||
fprintf(stderr, "[main] unrecognized command '%s'\n", argv[1]);
|
||||
return 1;
|
||||
|
|
|
|||
13
pemerge.c
13
pemerge.c
|
|
@ -222,8 +222,10 @@ int main_pemerge(int argc, char *argv[])
|
|||
gzFile fp, fp2 = 0;
|
||||
kseq_t *ks, *ks2 = 0;
|
||||
pem_opt_t *opt;
|
||||
int64_t seq_size = 0;
|
||||
int m = 0;
|
||||
|
||||
opt = pem_opt_init();
|
||||
opt = pem_opt_init();
|
||||
while ((c = getopt(argc, argv, "muQ:t:T:")) >= 0) {
|
||||
if (c == 'm') flag |= 1;
|
||||
else if (c == 'u') flag |= 2;
|
||||
|
|
@ -269,10 +271,11 @@ int main_pemerge(int argc, char *argv[])
|
|||
}
|
||||
|
||||
memset(cnt, 0, 8 * (MAX_ERR+1));
|
||||
while ((bseq = bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||
process_seqs(opt, n, bseq, cnt);
|
||||
free(bseq);
|
||||
}
|
||||
bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2, 1, &seq_size, &m, &bseq);
|
||||
while (n > 0) {
|
||||
process_seqs(opt, n, bseq, cnt);
|
||||
bseq_read(opt->n_threads * opt->chunk_size, &n, ks, ks2, 1, &seq_size, &m, &bseq);
|
||||
}
|
||||
|
||||
fprintf(stderr, "%12ld %s\n", (long)cnt[0], err_msg[0]);
|
||||
for (i = 1; i <= MAX_ERR; ++i)
|
||||
|
|
|
|||
|
|
@ -133,9 +133,11 @@ int display_stats(int nthreads)
|
|||
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
|
||||
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
|
||||
|
||||
#if SHOW_DATA_PERF
|
||||
fprintf(stderr, "seq num: %ld\n", gdat[0]);
|
||||
fprintf(stderr, "full num: %ld\n", gdat[1]);
|
||||
fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%');
|
||||
#endif
|
||||
|
||||
fprintf(stderr, "all_match_len: %ld\n", all_match_len);
|
||||
fprintf(stderr, "all_seq_num: %ld\n", all_seq_num);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ Date : 2024/04/06
|
|||
#ifndef PROFILING_H_
|
||||
#define PROFILING_H_
|
||||
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define USE_RDTSC 1
|
||||
|
|
|
|||
|
|
@ -0,0 +1,167 @@
|
|||
#include "share_mem.h"
|
||||
|
||||
#include <fcntl.h>
|
||||
#include <string.h>
|
||||
#include <sys/mman.h>
|
||||
#include <sys/stat.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#define SHM_NAME_LIST "/shm_hybbwa_name_list"
|
||||
#define SHM_HYB_PREFIX "/shm_hybbwa_"
|
||||
|
||||
#define SHM_NAME_LIST_SIZE 65535
|
||||
|
||||
static inline double get_GB(double bytes) { return bytes / 1024 / 1024 / 1024; }
|
||||
|
||||
// 根据文件路径获取文件名
|
||||
const char* get_fn_from_path(const char* file_path) {
|
||||
const char* fn = strrchr(file_path, '/');
|
||||
if (fn != NULL)
|
||||
return fn + 1;
|
||||
return file_path;
|
||||
}
|
||||
|
||||
// 将hybrid-index保存到share memrory里
|
||||
int shm_keep_hyb(const char* idx_prefix) {
|
||||
char full_path[MAX_PATH];
|
||||
const char* file_name = NULL;
|
||||
char share_name[MAX_PATH];
|
||||
FILE* fp = NULL;
|
||||
struct stat st;
|
||||
int shmid, init_shm = 0, idx_name_len;
|
||||
uint8_t *shm_idx_list, *mem;
|
||||
uint16_t* shm_idx_cnt; // share memory中index数量
|
||||
uint16_t* shm_byte_cnt; // 和占用的总内存数
|
||||
double sec_time;
|
||||
|
||||
/////////////////
|
||||
#define __shm_keep_hyb_code(suffix) \
|
||||
sec_time = realtime(); \
|
||||
strcat(strcpy(full_path, idx_prefix), suffix); \
|
||||
file_name = get_fn_from_path(full_path); \
|
||||
strcat(strcpy(share_name, SHM_HYB_PREFIX), get_fn_from_path(full_path)); \
|
||||
if ((shmid = shm_open(share_name, O_CREAT | O_RDWR | O_EXCL, 0644)) < 0) { \
|
||||
perror("shm_open()"); \
|
||||
return -1; \
|
||||
} \
|
||||
err_check_true(stat(full_path, &st), 0); \
|
||||
if (ftruncate(shmid, st.st_size) < 0) \
|
||||
return -1; \
|
||||
idx_name_len = 8 + strlen(file_name) + 1; \
|
||||
if (idx_name_len + *shm_byte_cnt > SHM_NAME_LIST_SIZE) \
|
||||
return -1; \
|
||||
memcpy(shm_idx_list + *shm_byte_cnt, &st.st_size, 8); \
|
||||
memcpy(shm_idx_list + *shm_byte_cnt + 8, file_name, idx_name_len - 8); \
|
||||
*shm_byte_cnt += idx_name_len; \
|
||||
*shm_idx_cnt += 1; \
|
||||
mem = (uint8_t*)mmap(0, st.st_size, PROT_READ | PROT_WRITE, MAP_SHARED, shmid, 0); \
|
||||
fp = xopen(full_path, "r"); \
|
||||
err_fread_noeof(mem, 1, st.st_size, fp); \
|
||||
err_fclose(fp); \
|
||||
munmap(mem, st.st_size); \
|
||||
fprintf(stderr, "%s, %0.2f GB, %0.2f s\n", file_name, get_GB(st.st_size), realtime() - sec_time);
|
||||
//////////////////////
|
||||
|
||||
// 打开保存索引名称的共享内存
|
||||
if ((shmid = shm_open(SHM_NAME_LIST, O_RDWR, 0)) < 0) {
|
||||
// 之前没有创建过,那就创建并初始化
|
||||
shmid = shm_open(SHM_NAME_LIST, O_CREAT | O_RDWR | O_EXCL, 0644);
|
||||
init_shm = 1;
|
||||
}
|
||||
if (shmid < 0 || ftruncate(shmid, SHM_NAME_LIST_SIZE) < 0)
|
||||
return -1;
|
||||
shm_idx_list = (uint8_t*)mmap(0, SHM_NAME_LIST_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shmid, 0);
|
||||
shm_idx_cnt = (uint16_t*)shm_idx_list;
|
||||
shm_byte_cnt = (uint16_t*)(shm_idx_list + 2);
|
||||
|
||||
if (init_shm) { // 需要初始化share mem中的索引列表
|
||||
memset(shm_idx_list, 0, SHM_NAME_LIST_SIZE);
|
||||
*shm_byte_cnt = 4;
|
||||
}
|
||||
|
||||
__shm_keep_hyb_code(HYB_PAC_SUFFIX);
|
||||
__shm_keep_hyb_code(HYB_SA_SUFFIX);
|
||||
__shm_keep_hyb_code(HYB_KMER_SUFFIX);
|
||||
__shm_keep_hyb_code(HYB_DATA_SUFFIX);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 清理所有保存在share memory中的索引数据
|
||||
int shm_clear_hyb() {
|
||||
char share_name[MAX_PATH];
|
||||
int shmid;
|
||||
uint16_t *shm_idx_cnt, i;
|
||||
char* shm_idx_list;
|
||||
char* ptr;
|
||||
|
||||
if ((shmid = shm_open(SHM_NAME_LIST, O_RDONLY, 0)) < 0)
|
||||
return -1;
|
||||
shm_idx_list = (char*)mmap(0, SHM_NAME_LIST_SIZE, PROT_READ, MAP_SHARED, shmid, 0);
|
||||
shm_idx_cnt = (uint16_t*)shm_idx_list;
|
||||
for (i = 0, ptr = shm_idx_list + 4; i < *shm_idx_cnt; ++i) {
|
||||
ptr += 8;
|
||||
strcat(strcpy(share_name, SHM_HYB_PREFIX), ptr);
|
||||
fprintf(stderr, "clear: %s\n", ptr);
|
||||
shm_unlink(share_name);
|
||||
ptr += strlen(ptr) + 1;
|
||||
}
|
||||
munmap(shm_idx_list, SHM_NAME_LIST_SIZE);
|
||||
shm_unlink(SHM_NAME_LIST);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// 从share mem中获取对应的索引数据
|
||||
void* shm_get_index(const char* full_path) {
|
||||
char share_name[MAX_PATH];
|
||||
int shmid;
|
||||
uint16_t *shm_idx_cnt, i;
|
||||
char* shm_idx_list;
|
||||
uint64_t idx_bytes;
|
||||
char* ptr;
|
||||
const char* file_name = get_fn_from_path(full_path);
|
||||
|
||||
if ((shmid = shm_open(SHM_NAME_LIST, O_RDONLY, 0)) < 0)
|
||||
return NULL;
|
||||
shm_idx_list = (char*)mmap(0, SHM_NAME_LIST_SIZE, PROT_READ, MAP_SHARED, shmid, 0);
|
||||
shm_idx_cnt = (uint16_t*)shm_idx_list;
|
||||
for (i = 0, ptr = shm_idx_list + 4; i < *shm_idx_cnt; ++i) {
|
||||
memcpy(&idx_bytes, ptr, 8);
|
||||
ptr += 8;
|
||||
if (strcmp(ptr, file_name) == 0)
|
||||
break;
|
||||
ptr += strlen(ptr) + 1;
|
||||
}
|
||||
if (i == *shm_idx_cnt)
|
||||
return NULL;
|
||||
munmap(shm_idx_list, SHM_NAME_LIST_SIZE);
|
||||
strcat(strcpy(share_name, SHM_HYB_PREFIX), file_name);
|
||||
if ((shmid = shm_open(share_name, O_RDONLY, 0)) < 0)
|
||||
return NULL;
|
||||
return mmap(0, idx_bytes, PROT_READ, MAP_SHARED, shmid, 0);
|
||||
}
|
||||
|
||||
// 列出共享内存中的hybrid-index
|
||||
int list_shm_hyb_indices() {
|
||||
int shmid;
|
||||
uint16_t *shm_idx_cnt, i;
|
||||
char* shm_idx_list;
|
||||
char* ptr;
|
||||
|
||||
if ((shmid = shm_open(SHM_NAME_LIST, O_RDONLY, 0)) < 0) {
|
||||
fprintf(stderr, "No shared hybrid index found.\n");
|
||||
return -1;
|
||||
}
|
||||
shm_idx_list = (char*)mmap(0, SHM_NAME_LIST_SIZE, PROT_READ, MAP_SHARED, shmid, 0);
|
||||
shm_idx_cnt = (uint16_t*)shm_idx_list;
|
||||
fprintf(stderr, "Shared hybrid indices (%d):\n", *shm_idx_cnt);
|
||||
for (i = 0, ptr = shm_idx_list + 4; i < *shm_idx_cnt; ++i) {
|
||||
uint64_t idx_bytes;
|
||||
memcpy(&idx_bytes, ptr, 8);
|
||||
ptr += 8;
|
||||
fprintf(stderr, "%s, %0.2f GB\n", ptr, get_GB(idx_bytes));
|
||||
ptr += strlen(ptr) + 1;
|
||||
}
|
||||
munmap(shm_idx_list, SHM_NAME_LIST_SIZE);
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -0,0 +1,20 @@
|
|||
#pragma once
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#if 0
|
||||
#define HYB_PAC_SUFFIX ".hyb.pac"
|
||||
#define HYB_SA_SUFFIX ".hyb.bytesa"
|
||||
#define HYB_KMER_SUFFIX ".hyb.kmer"
|
||||
#define HYB_DATA_SUFFIX ".hyb.data"
|
||||
#else
|
||||
#define HYB_PAC_SUFFIX ".hybrid.pac"
|
||||
#define HYB_SA_SUFFIX ".hybrid.sa"
|
||||
#define HYB_KMER_SUFFIX ".hybrid.kmer"
|
||||
#define HYB_DATA_SUFFIX ".hybrid.data"
|
||||
#endif
|
||||
|
||||
int shm_keep_hyb(const char* idx_prefix);
|
||||
int shm_clear_hyb();
|
||||
void* shm_get_index(const char* full_path);
|
||||
int list_shm_hyb_indices();
|
||||
115
utils.c
115
utils.c
|
|
@ -39,9 +39,28 @@
|
|||
#endif
|
||||
#include <sys/resource.h>
|
||||
#include <sys/time.h>
|
||||
#include "utils.h"
|
||||
|
||||
#include "khash.h"
|
||||
#include "ksort.h"
|
||||
#include "kvec.h"
|
||||
#include "utils.h"
|
||||
#include "yarn.h"
|
||||
|
||||
#define USE_ASYNC_READ
|
||||
|
||||
typedef struct {
|
||||
pthread_t tid;
|
||||
void* buf[2];
|
||||
volatile int readSize[2];
|
||||
uint64_t getIdx;
|
||||
uint64_t putIdx;
|
||||
volatile int finish;
|
||||
lock_t* mtx;
|
||||
} FileKV;
|
||||
|
||||
KHASH_MAP_INIT_INT64(fkv, FileKV);
|
||||
static khash_t(fkv) * fHash = 0;
|
||||
|
||||
#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
|
||||
KSORT_INIT(128, pair64_t, pair64_lt)
|
||||
KSORT_INIT(64, uint64_t, ks_lt_generic)
|
||||
|
|
@ -141,9 +160,38 @@ size_t err_fread_noeof(void *ptr, size_t size, size_t nmemb, FILE *stream)
|
|||
|
||||
int err_gzread(gzFile file, void *ptr, unsigned int len)
|
||||
{
|
||||
int ret = gzread(file, ptr, len);
|
||||
int ret = 0;
|
||||
PROF_START(read);
|
||||
#ifdef USE_ASYNC_READ
|
||||
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
||||
FileKV* val = &kh_value(fHash, k);
|
||||
POSSESS(val->mtx);
|
||||
WAIT_FOR(val->mtx, NOT_TO_BE, 0); // 等待有数据
|
||||
RELEASE(val->mtx);
|
||||
|
||||
if (ret < 0)
|
||||
int curIdx = val->getIdx % 2;
|
||||
if (val->finish) {
|
||||
if (val->getIdx < val->putIdx) {
|
||||
ret = val->readSize[curIdx];
|
||||
if (ret > 0)
|
||||
memcpy(ptr, val->buf[curIdx], ret);
|
||||
++val->getIdx;
|
||||
return ret;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
ret = val->readSize[curIdx];
|
||||
memcpy(ptr, val->buf[curIdx], ret);
|
||||
|
||||
POSSESS(val->mtx);
|
||||
++val->getIdx;
|
||||
TWIST(val->mtx, BY, -1);
|
||||
#else
|
||||
ret = gzread(file, ptr, len);
|
||||
#endif
|
||||
PROF_END(gprof[G_read_seq], read);
|
||||
|
||||
if (ret < 0)
|
||||
{
|
||||
int errnum = 0;
|
||||
const char *msg = gzerror(file, &errnum);
|
||||
|
|
@ -304,3 +352,64 @@ long peakrss(void)
|
|||
return r.ru_maxrss;
|
||||
#endif
|
||||
}
|
||||
|
||||
static int64_t kBufSize = 16777216;
|
||||
|
||||
static void* async_gzread(void* data) {
|
||||
gzFile file = (gzFile)data;
|
||||
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
||||
FileKV* val = &kh_value(fHash, k);
|
||||
|
||||
int ret = 0;
|
||||
while (1) {
|
||||
POSSESS(val->mtx);
|
||||
WAIT_FOR(val->mtx, NOT_TO_BE, 2); // 等待有数据
|
||||
RELEASE(val->mtx);
|
||||
|
||||
int curIdx = val->putIdx % 2;
|
||||
ret = gzread(file, val->buf[curIdx], kBufSize);
|
||||
val->readSize[curIdx] = ret;
|
||||
|
||||
if (ret <= 0) {
|
||||
POSSESS(val->mtx);
|
||||
val->finish = 1;
|
||||
TWIST(val->mtx, BY, 1);
|
||||
break;
|
||||
}
|
||||
|
||||
POSSESS(val->mtx);
|
||||
val->putIdx += 1;
|
||||
TWIST(val->mtx, BY, 1);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int start_async_read(gzFile file) {
|
||||
int ret = 0;
|
||||
#ifdef USE_ASYNC_READ
|
||||
if (fHash == 0) {
|
||||
fHash = kh_init(fkv);
|
||||
}
|
||||
khiter_t k = kh_put(fkv, fHash, (int64_t)file, &ret);
|
||||
kh_key(fHash, k) = (int64_t)file;
|
||||
FileKV* fv = &kh_value(fHash, k);
|
||||
|
||||
fv->mtx = NEW_LOCK(0);
|
||||
fv->getIdx = fv->putIdx = fv->finish = 0;
|
||||
fv->readSize[0] = fv->readSize[1] = 0;
|
||||
fv->buf[0] = malloc(kBufSize);
|
||||
fv->buf[1] = malloc(kBufSize);
|
||||
ret = pthread_create(&fv->tid, 0, async_gzread, file);
|
||||
#endif
|
||||
return ret;
|
||||
}
|
||||
|
||||
int stop_async_read(gzFile file) {
|
||||
#ifdef USE_ASYNC_READ
|
||||
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
|
||||
FileKV* val = &kh_value(fHash, k);
|
||||
pthread_join(val->tid, 0);
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
14
utils.h
14
utils.h
|
|
@ -28,11 +28,15 @@
|
|||
#define LH3_UTILS_H
|
||||
|
||||
#include <getopt.h>
|
||||
#include <pthread.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <sys/stat.h>
|
||||
#include <zlib.h>
|
||||
|
||||
#include "debug.h"
|
||||
#include "profiling.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
// Tell GCC to validate printf format string and args
|
||||
#define ATTRIBUTE(list) __attribute__ (list)
|
||||
|
|
@ -121,6 +125,11 @@ typedef struct {
|
|||
typedef struct { size_t n, m; uint64_t *a; } uint64_v;
|
||||
typedef struct { size_t n, m; pair64_t *a; } pair64_v;
|
||||
|
||||
typedef struct {
|
||||
size_t m;
|
||||
uint8_t* addr;
|
||||
} buf_t;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
|
@ -158,8 +167,11 @@ extern "C" {
|
|||
void ks_introsort_64 (size_t n, uint64_t *a);
|
||||
void ks_introsort_128(size_t n, pair64_t *a);
|
||||
|
||||
int start_async_read(gzFile file);
|
||||
int stop_async_read(gzFile file);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static inline uint64_t hash_64(uint64_t key)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,398 @@
|
|||
/* yarn.c -- generic thread operations implemented using pthread functions
|
||||
* Copyright (C) 2008, 2011, 2012, 2015, 2018, 2019, 2020 Mark Adler
|
||||
* Version 1.7 12 Apr 2020 Mark Adler
|
||||
* For conditions of distribution and use, see copyright notice in yarn.h
|
||||
*/
|
||||
|
||||
/* Basic thread operations implemented using the POSIX pthread library. All
|
||||
pthread references are isolated within this module to allow alternate
|
||||
implementations with other thread libraries. See yarn.h for the description
|
||||
of these operations. */
|
||||
|
||||
/* Version history:
|
||||
1.0 19 Oct 2008 First version
|
||||
1.1 26 Oct 2008 No need to set the stack size -- remove
|
||||
Add yarn_abort() function for clean-up on error exit
|
||||
1.2 19 Dec 2011 (changes reversed in 1.3)
|
||||
1.3 13 Jan 2012 Add large file #define for consistency with pigz.c
|
||||
Update thread portability #defines per IEEE 1003.1-2008
|
||||
Fix documentation in yarn.h for yarn_prefix
|
||||
1.4 19 Jan 2015 Allow yarn_abort() to avoid error message to stderr
|
||||
Accept and do nothing for NULL argument to free_lock()
|
||||
1.5 8 May 2018 Remove destruct() to avoid use of pthread_cancel()
|
||||
Normalize the code style
|
||||
1.6 3 Apr 2019 Add debugging information to fail() error messages
|
||||
1.7 12 Apr 2020 Fix use after free bug in ignition()
|
||||
*/
|
||||
|
||||
// For thread portability.
|
||||
#define _XOPEN_SOURCE 700
|
||||
#define _POSIX_C_SOURCE 200809L
|
||||
#define _THREAD_SAFE
|
||||
|
||||
// Use large file functions if available.
|
||||
#define _FILE_OFFSET_BITS 64
|
||||
|
||||
// External libraries and entities referenced.
|
||||
#include <stdio.h> // fprintf(), stderr
|
||||
#include <stdlib.h> // exit(), malloc(), free(), NULL
|
||||
#include <pthread.h> // pthread_t, pthread_create(), pthread_join(),
|
||||
// pthread_attr_t, pthread_attr_init(), pthread_attr_destroy(),
|
||||
// PTHREAD_CREATE_JOINABLE, pthread_attr_setdetachstate(),
|
||||
// pthread_self(), pthread_equal(),
|
||||
// pthread_mutex_t, PTHREAD_MUTEX_INITIALIZER, pthread_mutex_init(),
|
||||
// pthread_mutex_lock(), pthread_mutex_unlock(), pthread_mutex_destroy(),
|
||||
// pthread_cond_t, PTHREAD_COND_INITIALIZER, pthread_cond_init(),
|
||||
// pthread_cond_broadcast(), pthread_cond_wait(), pthread_cond_destroy()
|
||||
#include <errno.h> // EPERM, ESRCH, EDEADLK, ENOMEM, EBUSY, EINVAL, EAGAIN
|
||||
|
||||
// Interface definition.
|
||||
#include "yarn.h"
|
||||
|
||||
// Constants.
|
||||
#define local static // for non-exported functions and globals
|
||||
|
||||
// Error handling external globals, resettable by application.
|
||||
char *yarn_prefix = (char*)"yarn";
|
||||
void (*yarn_abort)(int) = NULL;
|
||||
|
||||
|
||||
// Immediately exit -- use for errors that shouldn't ever happen.
|
||||
local void fail(int err, char const *file, long line, char const *func) {
|
||||
fprintf(stderr, "%s: ", yarn_prefix);
|
||||
switch (err) {
|
||||
case EPERM:
|
||||
fputs("already unlocked", stderr);
|
||||
break;
|
||||
case ESRCH:
|
||||
fputs("no such thread", stderr);
|
||||
break;
|
||||
case EDEADLK:
|
||||
fputs("resource deadlock", stderr);
|
||||
break;
|
||||
case ENOMEM:
|
||||
fputs("out of memory", stderr);
|
||||
break;
|
||||
case EBUSY:
|
||||
fputs("can't destroy locked resource", stderr);
|
||||
break;
|
||||
case EINVAL:
|
||||
fputs("invalid request", stderr);
|
||||
break;
|
||||
case EAGAIN:
|
||||
fputs("resource unavailable", stderr);
|
||||
break;
|
||||
default:
|
||||
fprintf(stderr, "internal error %d", err);
|
||||
}
|
||||
fprintf(stderr, " (%s:%ld:%s)\n", file, line, func);
|
||||
if (yarn_abort != NULL)
|
||||
yarn_abort(err);
|
||||
exit(err);
|
||||
}
|
||||
|
||||
// Memory handling routines provided by user. If none are provided, malloc()
|
||||
// and free() are used, which are therefore assumed to be thread-safe.
|
||||
typedef void *(*malloc_t)(size_t);
|
||||
typedef void (*free_t)(void *);
|
||||
local malloc_t my_malloc_f = malloc;
|
||||
local free_t my_free = free;
|
||||
|
||||
// Use user-supplied allocation routines instead of malloc() and free().
|
||||
void yarn_mem(malloc_t lease, free_t vacate) {
|
||||
my_malloc_f = lease;
|
||||
my_free = vacate;
|
||||
}
|
||||
|
||||
// Memory allocation that cannot fail (from the point of view of the caller).
|
||||
local void *my_malloc(size_t size, char const *file, long line) {
|
||||
void *block;
|
||||
|
||||
if ((block = my_malloc_f(size)) == NULL)
|
||||
fail(ENOMEM, file, line, "malloc");
|
||||
return block;
|
||||
}
|
||||
|
||||
// -- Lock functions --
|
||||
|
||||
struct lock_s {
|
||||
pthread_mutex_t mutex;
|
||||
pthread_cond_t cond;
|
||||
long value;
|
||||
};
|
||||
|
||||
lock_t *new_lock_(long initial, char const *file, long line) {
|
||||
lock_t *bolt = (lock_t *)my_malloc(sizeof(struct lock_s), file, line);
|
||||
int ret = pthread_mutex_init(&(bolt->mutex), NULL);
|
||||
if (ret)
|
||||
fail(ret, file, line, "mutex_init");
|
||||
ret = pthread_cond_init(&(bolt->cond), NULL);
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_init");
|
||||
bolt->value = initial;
|
||||
return bolt;
|
||||
}
|
||||
|
||||
void possess_(lock_t *bolt, char const *file, long line) {
|
||||
int ret = pthread_mutex_lock(&(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "mutex_lock");
|
||||
}
|
||||
|
||||
void release_(lock_t *bolt, char const *file, long line) {
|
||||
int ret = pthread_mutex_unlock(&(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "mutex_unlock");
|
||||
}
|
||||
|
||||
void twist_(lock_t *bolt, enum twist_op op, long val,
|
||||
char const *file, long line) {
|
||||
if (op == TO)
|
||||
bolt->value = val;
|
||||
else if (op == BY)
|
||||
bolt->value += val;
|
||||
int ret = pthread_cond_broadcast(&(bolt->cond));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_broadcast");
|
||||
ret = pthread_mutex_unlock(&(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "mutex_unlock");
|
||||
}
|
||||
|
||||
#define until(a) while(!(a))
|
||||
|
||||
void wait_for_(lock_t *bolt, enum wait_op op, long val,
|
||||
char const *file, long line) {
|
||||
switch (op) {
|
||||
case TO_BE:
|
||||
until (bolt->value == val) {
|
||||
int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_wait");
|
||||
}
|
||||
break;
|
||||
case NOT_TO_BE:
|
||||
until (bolt->value != val) {
|
||||
int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_wait");
|
||||
}
|
||||
break;
|
||||
case TO_BE_MORE_THAN:
|
||||
until (bolt->value > val) {
|
||||
int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_wait");
|
||||
}
|
||||
break;
|
||||
case TO_BE_LESS_THAN:
|
||||
until (bolt->value < val) {
|
||||
int ret = pthread_cond_wait(&(bolt->cond), &(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_wait");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
long peek_lock(lock_t *bolt) {
|
||||
return bolt->value;
|
||||
}
|
||||
|
||||
void free_lock_(lock_t *bolt, char const *file, long line) {
|
||||
if (bolt == NULL)
|
||||
return;
|
||||
int ret = pthread_cond_destroy(&(bolt->cond));
|
||||
if (ret)
|
||||
fail(ret, file, line, "cond_destroy");
|
||||
ret = pthread_mutex_destroy(&(bolt->mutex));
|
||||
if (ret)
|
||||
fail(ret, file, line, "mutex_destroy");
|
||||
my_free(bolt);
|
||||
}
|
||||
|
||||
// -- Thread functions (uses the lock_t functions above) --
|
||||
|
||||
struct thread_s {
|
||||
pthread_t id;
|
||||
int done; // true if this thread has exited
|
||||
thread *next; // for list of all launched threads
|
||||
};
|
||||
|
||||
// List of threads launched but not joined, count of threads exited but not
|
||||
// joined (incremented by ignition() just before exiting).
|
||||
local lock_t threads_lock = {
|
||||
PTHREAD_MUTEX_INITIALIZER,
|
||||
PTHREAD_COND_INITIALIZER,
|
||||
0 // number of threads exited but not joined
|
||||
};
|
||||
local thread *threads = NULL; // list of extant threads
|
||||
|
||||
// Structure in which to pass the probe and its payload to ignition().
|
||||
struct capsule {
|
||||
void (*probe)(void *);
|
||||
void *payload;
|
||||
char const *file;
|
||||
long line;
|
||||
};
|
||||
|
||||
// Mark the calling thread as done and alert join_all().
|
||||
local void reenter(void *arg) {
|
||||
struct capsule *capsule = (struct capsule *)arg;
|
||||
|
||||
// find this thread in the threads list by matching the thread id
|
||||
pthread_t me = pthread_self();
|
||||
possess_(&(threads_lock), capsule->file, capsule->line);
|
||||
thread **prior = &(threads);
|
||||
thread *match;
|
||||
while ((match = *prior) != NULL) {
|
||||
if (pthread_equal(match->id, me))
|
||||
break;
|
||||
prior = &(match->next);
|
||||
}
|
||||
if (match == NULL)
|
||||
fail(ESRCH, capsule->file, capsule->line, "reenter lost");
|
||||
|
||||
// mark this thread as done and move it to the head of the list
|
||||
match->done = 1;
|
||||
if (threads != match) {
|
||||
*prior = match->next;
|
||||
match->next = threads;
|
||||
threads = match;
|
||||
}
|
||||
|
||||
// update the count of threads to be joined and alert join_all()
|
||||
twist_(&(threads_lock), BY, +1, capsule->file, capsule->line);
|
||||
|
||||
// free the capsule resource, even if the thread is cancelled (though yarn
|
||||
// doesn't use pthread_cancel() -- you never know)
|
||||
my_free(capsule);
|
||||
}
|
||||
|
||||
// All threads go through this routine. Just before a thread exits, it marks
|
||||
// itself as done in the threads list and alerts join_all() so that the thread
|
||||
// resources can be released. Use a cleanup stack so that the marking occurs
|
||||
// even if the thread is cancelled.
|
||||
local void *ignition(void *arg) {
|
||||
struct capsule *capsule = (struct capsule *)arg;
|
||||
|
||||
// run reenter() before leaving
|
||||
pthread_cleanup_push(reenter, arg);
|
||||
|
||||
// execute the requested function with argument
|
||||
capsule->probe(capsule->payload);
|
||||
|
||||
// mark this thread as done, letting join_all() know, and free capsule
|
||||
pthread_cleanup_pop(1);
|
||||
|
||||
// exit thread
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Not all POSIX implementations create threads as joinable by default, so that
|
||||
// is made explicit here.
|
||||
thread *launch_(void (*probe)(void *), void *payload,
|
||||
char const *file, long line) {
|
||||
// construct the requested call and argument for the ignition() routine
|
||||
// (allocated instead of automatic so that we're sure this will still be
|
||||
// there when ignition() actually starts up -- ignition() will free this
|
||||
// allocation)
|
||||
struct capsule *capsule = (struct capsule *)my_malloc(sizeof(struct capsule), file, line);
|
||||
capsule->probe = probe;
|
||||
capsule->payload = payload;
|
||||
capsule->file = file;
|
||||
capsule->line = line;
|
||||
|
||||
// assure this thread is in the list before join_all() or ignition() looks
|
||||
// for it
|
||||
possess_(&(threads_lock), file, line);
|
||||
|
||||
// create the thread and call ignition() from that thread
|
||||
thread *th = (thread *)my_malloc(sizeof(struct thread_s), file, line);
|
||||
pthread_attr_t attr;
|
||||
int ret = pthread_attr_init(&attr);
|
||||
if (ret)
|
||||
fail(ret, file, line, "attr_init");
|
||||
ret = pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
|
||||
if (ret)
|
||||
fail(ret, file, line, "attr_setdetachstate");
|
||||
ret = pthread_create(&(th->id), &attr, ignition, capsule);
|
||||
if (ret)
|
||||
fail(ret, file, line, "create");
|
||||
ret = pthread_attr_destroy(&attr);
|
||||
if (ret)
|
||||
fail(ret, file, line, "attr_destroy");
|
||||
|
||||
// put the thread in the threads list for join_all()
|
||||
th->done = 0;
|
||||
th->next = threads;
|
||||
threads = th;
|
||||
release_(&(threads_lock), file, line);
|
||||
return th;
|
||||
}
|
||||
|
||||
void join_(thread *ally, char const *file, long line) {
|
||||
// wait for thread to exit and return its resources
|
||||
int ret = pthread_join(ally->id, NULL);
|
||||
if (ret)
|
||||
fail(ret, file, line, "join");
|
||||
|
||||
// find the thread in the threads list
|
||||
possess_(&(threads_lock), file, line);
|
||||
thread **prior = &(threads);
|
||||
thread *match;
|
||||
while ((match = *prior) != NULL) {
|
||||
if (match == ally)
|
||||
break;
|
||||
prior = &(match->next);
|
||||
}
|
||||
if (match == NULL)
|
||||
fail(ESRCH, file, line, "join lost");
|
||||
|
||||
// remove thread from list and update exited count, free thread
|
||||
if (match->done)
|
||||
threads_lock.value--;
|
||||
*prior = match->next;
|
||||
release_(&(threads_lock), file, line);
|
||||
my_free(ally);
|
||||
}
|
||||
|
||||
// This implementation of join_all() only attempts to join threads that have
|
||||
// announced that they have exited (see ignition()). When there are many
|
||||
// threads, this is faster than waiting for some random thread to exit while a
|
||||
// bunch of other threads have already exited.
|
||||
int join_all_(char const *file, long line) {
|
||||
// grab the threads list and initialize the joined count
|
||||
int count = 0;
|
||||
possess_(&(threads_lock), file, line);
|
||||
|
||||
// do until threads list is empty
|
||||
while (threads != NULL) {
|
||||
// wait until at least one thread has reentered
|
||||
wait_for_(&(threads_lock), NOT_TO_BE, 0, file, line);
|
||||
|
||||
// find the first thread marked done (should be at or near the top)
|
||||
thread **prior = &(threads);
|
||||
thread *match;
|
||||
while ((match = *prior) != NULL) {
|
||||
if (match->done)
|
||||
break;
|
||||
prior = &(match->next);
|
||||
}
|
||||
if (match == NULL)
|
||||
fail(ESRCH, file, line, "join_all lost");
|
||||
|
||||
// join the thread (will be almost immediate), remove from the threads
|
||||
// list, update the reenter count, and free the thread
|
||||
int ret = pthread_join(match->id, NULL);
|
||||
if (ret)
|
||||
fail(ret, file, line, "join");
|
||||
threads_lock.value--;
|
||||
*prior = match->next;
|
||||
my_free(match);
|
||||
count++;
|
||||
}
|
||||
|
||||
// let go of the threads list and return the number of threads joined
|
||||
release_(&(threads_lock), file, line);
|
||||
return count;
|
||||
}
|
||||
|
|
@ -0,0 +1,139 @@
|
|||
/* yarn.h -- generic interface for thread operations
|
||||
* Copyright (C) 2008, 2011, 2012, 2015, 2018, 2019, 2020 Mark Adler
|
||||
* Version 1.7 12 Apr 2020 Mark Adler
|
||||
*/
|
||||
|
||||
/*
|
||||
This software is provided 'as-is', without any express or implied
|
||||
warranty. In no event will the author be held liable for any damages
|
||||
arising from the use of this software.
|
||||
|
||||
Permission is granted to anyone to use this software for any purpose,
|
||||
including commercial applications, and to alter it and redistribute it
|
||||
freely, subject to the following restrictions:
|
||||
|
||||
1. The origin of this software must not be misrepresented; you must not
|
||||
claim that you wrote the original software. If you use this software
|
||||
in a product, an acknowledgment in the product documentation would be
|
||||
appreciated but is not required.
|
||||
2. Altered source versions must be plainly marked as such, and must not be
|
||||
misrepresented as being the original software.
|
||||
3. This notice may not be removed or altered from any source distribution.
|
||||
|
||||
Mark Adler
|
||||
madler@alumni.caltech.edu
|
||||
*/
|
||||
|
||||
/* Basic thread operations
|
||||
|
||||
This interface isolates the local operating system implementation of threads
|
||||
from the application in order to facilitate platform independent use of
|
||||
threads. All of the implementation details are deliberately hidden.
|
||||
|
||||
Assuming adequate system resources and proper use, none of these functions
|
||||
can fail. As a result, any errors encountered will cause an exit() to be
|
||||
executed, or the execution of your own optionally-provided abort function.
|
||||
|
||||
These functions allow the simple launching and joining of threads, and the
|
||||
locking of objects and synchronization of changes of objects. The latter is
|
||||
implemented with a single lock_t type that contains an integer value. The
|
||||
value can be ignored for simple exclusive access to an object, or the value
|
||||
can be used to signal and wait for changes to an object.
|
||||
|
||||
-- Arguments --
|
||||
|
||||
thread *thread; identifier for launched thread, used by join
|
||||
void probe(void *); pointer to function "probe", run when thread starts
|
||||
void *payload; single argument passed to the probe function
|
||||
lock_t *lock_t; a lock_t with a value -- used for exclusive access to
|
||||
an object and to synchronize threads waiting for
|
||||
changes to an object
|
||||
long val; value to set lock_t, increment lock_t, or wait for
|
||||
int n; number of threads joined
|
||||
|
||||
-- Thread functions --
|
||||
|
||||
thread = launch(probe, payload) - launch a thread -- exit via probe() return
|
||||
join(thread) - join a thread and by joining end it, waiting for the thread
|
||||
to exit if it hasn't already -- will free the resources allocated by
|
||||
launch() (don't try to join the same thread more than once)
|
||||
n = join_all() - join all threads launched by launch() that are not joined
|
||||
yet and free the resources allocated by the launches, usually to clean
|
||||
up when the thread processing is done -- join_all() returns an int with
|
||||
the count of the number of threads joined (join_all() should only be
|
||||
called from the main thread, and should only be called after any calls
|
||||
of join() have completed)
|
||||
|
||||
-- Lock functions --
|
||||
|
||||
lock_t = new_lock(val) - create a new lock_t with initial value val (lock_t is
|
||||
created in the released state)
|
||||
possess(lock_t) - acquire exclusive possession of a lock_t, waiting if necessary
|
||||
twist(lock_t, [TO | BY], val) - set lock_t to or increment lock_t by val, signal
|
||||
all threads waiting on this lock_t and then release the lock_t -- must
|
||||
possess the lock_t before calling (twist releases, so don't do a
|
||||
release() after a twist() on the same lock_t)
|
||||
wait_for(lock_t, [TO_BE | NOT_TO_BE | TO_BE_MORE_THAN | TO_BE_LESS_THAN], val)
|
||||
- wait on lock_t value to be, not to be, be greater than, or be less than
|
||||
val -- must possess the lock_t before calling, will possess the lock_t on
|
||||
return but the lock_t is released while waiting to permit other threads
|
||||
to use twist() to change the value and signal the change (so make sure
|
||||
that the object is in a usable state when waiting)
|
||||
release(lock_t) - release a possessed lock_t (do not try to release a lock_t that
|
||||
the current thread does not possess)
|
||||
val = peek_lock(lock_t) - return the value of the lock_t (assumes that lock_t is
|
||||
already possessed, no possess or release is done by peek_lock())
|
||||
free_lock(lock_t) - free the resources allocated by new_lock() (application
|
||||
must assure that the lock_t is released before calling free_lock())
|
||||
|
||||
-- Memory allocation ---
|
||||
|
||||
yarn_mem(better_malloc, better_free) - set the memory allocation and free
|
||||
routines for use by the yarn routines where the supplied routines have
|
||||
the same interface and operation as malloc() and free(), and may be
|
||||
provided in order to supply thread-safe memory allocation routines or
|
||||
for any other reason -- by default malloc() and free() will be used
|
||||
|
||||
-- Error control --
|
||||
|
||||
yarn_prefix - a char pointer to a string that will be the prefix for any
|
||||
error messages that these routines generate before exiting -- if not
|
||||
changed by the application, "yarn" will be used
|
||||
yarn_abort - an external function that will be executed when there is an
|
||||
internal yarn error, due to out of memory or misuse -- this function
|
||||
may exit to abort the application, or if it returns, the yarn error
|
||||
handler will exit (set to NULL by default for no action)
|
||||
*/
|
||||
|
||||
extern char *yarn_prefix;
|
||||
extern void (*yarn_abort)(int);
|
||||
|
||||
void yarn_mem(void *(*)(size_t), void (*)(void *));
|
||||
|
||||
typedef struct thread_s thread;
|
||||
thread *launch_(void (*)(void *), void *, char const *, long);
|
||||
#define LAUNCH(a, b) launch_(a, b, __FILE__, __LINE__)
|
||||
void join_(thread *, char const *, long);
|
||||
#define JOIN(a) join_(a, __FILE__, __LINE__)
|
||||
int join_all_(char const *, long);
|
||||
#define JOIN_ALL() join_all_(__FILE__, __LINE__)
|
||||
|
||||
typedef struct lock_s lock_t;
|
||||
lock_t *new_lock_(long, char const *, long);
|
||||
#define NEW_LOCK(a) new_lock_(a, __FILE__, __LINE__)
|
||||
void possess_(lock_t *, char const *, long);
|
||||
#define POSSESS(a) possess_(a, __FILE__, __LINE__)
|
||||
void release_(lock_t *, char const *, long);
|
||||
// #define release(a) release_(a, __FILE__, __LINE__)
|
||||
#define RELEASE(a) release_(a, __FILE__, __LINE__)
|
||||
enum twist_op { TO, BY };
|
||||
void twist_(lock_t *, enum twist_op, long, char const *, long);
|
||||
#define TWIST(a, b, c) twist_(a, b, c, __FILE__, __LINE__)
|
||||
enum wait_op {
|
||||
TO_BE, /* or */ NOT_TO_BE, /* that is the question */
|
||||
TO_BE_MORE_THAN, TO_BE_LESS_THAN };
|
||||
void wait_for_(lock_t *, enum wait_op, long, char const *, long);
|
||||
#define WAIT_FOR(a, b, c) wait_for_(a, b, c, __FILE__, __LINE__)
|
||||
long peek_lock(lock_t *);
|
||||
void free_lock_(lock_t *, char const *, long);
|
||||
#define FREE_LOCK(a) free_lock_(a, __FILE__, __LINE__)
|
||||
Loading…
Reference in New Issue