Compare commits

..

1 Commits

Author SHA1 Message Date
zzh fb5279120b hybrid index实现seeding过程的初始代码 2025-06-16 10:11:29 +08:00
22 changed files with 341 additions and 280 deletions

5
.vscode/launch.json vendored
View File

@ -18,11 +18,10 @@
"-R",
"'@RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa'",
"~/data1/fmt_ref/human_g1k_v37_decoy.fasta",
"~/data/fastq/dataset/na12878_wes_144/1w_1.fq",
"~/data/fastq/dataset/na12878_wes_144/1w_2.fq",
"~/data/dataset/real/D3/n1.fq",
"~/data/dataset/real/D3/n2.fq",
"-o",
"/dev/null",
"-Z"
],
"cwd": "${workspaceFolder}", //
},

View File

@ -43,9 +43,7 @@
"ertseeding.h": "c",
"algorithm": "c",
"filesystem": "c",
"deque": "cpp",
"string": "cpp",
"unordered_map": "cpp",
"vector": "cpp"
"chrono": "c",
"queue": "c"
}
}

View File

@ -3,7 +3,7 @@ CFLAGS= -g -Wall -Wno-unused-function -mavx2 -O2
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
SHOW_PERF= -DSHOW_PERF
SHOW_DATA_PERF= #-DSHOW_DATA_PERF
SHOW_DATA_PERF= -DSHOW_DATA_PERF
FILTER_FULL_MATCH= #-DFILTER_FULL_MATCH
USE_MT_READ= -DUSE_MT_READ

25
bwa.c
View File

@ -108,20 +108,14 @@ static void *thread_bseq_read(void *data) {
int cur_n = 0, cur_pos = d->start_pos, size = 0;
int ret_status = 1;
//pthread_t thread_id = pthread_self();
//fprintf(stderr, "Thread ID: %lu\n", thread_id);
PROF_START(parse);
while (cur_n < d->n_bound && (ret_status = kseq_read(ks)) >= 0) {
trim_readno(&ks->name);
kseq2bseq1(ks, seqs + cur_pos, copy_comment);
while (cur_n < d->n_bound && (ret_status = kseq_read(ks)) >= 0) {
trim_readno(&ks->name);
kseq2bseq1(ks, seqs + cur_pos, copy_comment);
seqs[cur_pos].id = cur_pos;
size += seqs[cur_pos].l_seq;
cur_pos += 2; cur_n += 1;
if (size >= chunk_size) break;
if (size >= chunk_size) break;
}
PROF_END(gprof[G_parse_seq], parse);
d->ret_n = cur_n; d->ret_size = size; d->ret_status = ret_status;
return 0;
}
@ -249,13 +243,12 @@ void bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment
int size = 0, m, n;
bseq1_t *seqs = *seqs_ptr;
n = 0; m = *m_;
while (kseq_read(ks) >= 0) {
while (kseq_read(ks) >= 0) {
if (ks2 && kseq_read(ks2) < 0) { // the 2nd file has fewer reads
fprintf(stderr, "[W::%s] the 2nd file has fewer sequences.\n", __func__);
break;
}
PROF_START(parse);
if (n >= m) {
if (n >= m) {
m = m? m<<1 : 256;
seqs = realloc(seqs, m * sizeof(bseq1_t));
memset(seqs + n, 0, (m-n) * sizeof(bseq1_t));
@ -264,11 +257,9 @@ void bseq_read(int chunk_size, int *n_, void *ks1_, void *ks2_, int copy_comment
if (ks2) {
READ_ONE_SEQ(ks2);
}
PROF_END(gprof[G_parse_seq], parse);
if (size >= chunk_size && (n&1) == 0) break;
if (size >= chunk_size && (n&1) == 0) break;
}
if (size == 0) { // test if the 2nd file is finished
if (size == 0) { // test if the 2nd file is finished
if (ks2 && kseq_read(ks2) >= 0)
fprintf(stderr, "[W::%s] the 1st file has fewer sequences.\n", __func__);
}

2
bwa.h
View File

@ -95,7 +95,7 @@ extern "C" {
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size);
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size, int num_thread);
char *bwa_idx_infer_prefix(const char *hint);
bwt_t *bwa_idx_load_bwt(const char *hint);

View File

@ -684,6 +684,8 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
uint8_t *rseq = 0;
uint64_t *srt;
smem_aux_t *aux = (smem_aux_t*)buf;
kvec_t(int) qlenv;
kv_init(qlenv);
if (c->n == 0) return;
// get the max possible span
@ -763,6 +765,10 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
if (bwa_verbose >= 4) err_printf("** ---> Extending from seed(%d) [%ld;%ld,%ld] @ %s <---\n", k, (long)s->len, (long)s->qbeg, (long)s->rbeg, bns->anns[c->rid].name);
if (s->qbeg) { // left extension
// s-qbeg是seed左侧需要扩展的长度
//tdat[s->qbeg][tid]++;
kv_push(int, qlenv, s->qbeg);
fprintf(stderr, "%d,\n", s->qbeg);
int qle, tle, gtle, gscore;
tmp = s->rbeg - rmax[0];
#ifndef USE_AVX2
@ -807,6 +813,9 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
int qle, tle, qe, re, gtle, gscore, sc0 = a->score;
qe = s->qbeg + s->len;
re = s->rbeg + s->len - rmax[0];
kv_push(int, qlenv, l_query - qe);
fprintf(stderr, "%d,\n", l_query - qe);
// tdat[l_query - qe][tid]++;
assert(re >= 0);
for (i = 0; i < MAX_BAND_TRY; ++i) {
int prev = a->score;
@ -848,6 +857,21 @@ void mem_chain2aln(const mem_opt_t *opt, const bntseq_t *bns, const uint8_t *pac
a->frac_rep = c->frac_rep;
}
tdat[qlenv.n][tid] ++;
// 计算标准差
double avgl = 0, stdl = 0;
for (i=0; i<qlenv.n; ++i) {
avgl += qlenv.a[i];
}
avgl = avgl / qlenv.n;
for (i=0; i<qlenv.n; ++i) {
stdl += pow((avgl - qlenv.a[i]), 2);
}
stdl = sqrt(stdl);
kv_destroy(qlenv);
free(srt);
free(rseq);
}
@ -1304,16 +1328,20 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
// 1. first pass: find all SMEMs
// goto third_seed;
PROF_START(seed_1);
while (x < len) {
__sync_fetch_and_add(&gdat[0], 1);
while (x < len) {
if (seq[x] < 4) {
start_flag = 0;
x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
//x = fmt_smem(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
x = fmt_smem_new(fmt, len, seq, x, start_width, opt->min_seed_len, 0, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32); // seed length
max_seed_len = MAX(max_seed_len, slen);
if (slen >= opt->min_seed_len) {
kv_push(bwtintv_t, smem->mem, *p);
//fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]);
//fprintf(stderr, "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]);
}
}
// break; // for test
@ -1322,30 +1350,40 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
if (start_flag) ++start_N_num;
}
}
PROF_END(tprof[T_SEED_1][tid], seed_1);
// && smem->mem.a[0].x[2] == 1
if (max_seed_len == len - start_N_num && smem->mem.n > 0 && smem->mem.a[0].x[2] == 1) {
__sync_fetch_and_add(&gdat[1], 1);
}
PROF_END(tprof[T_SEED_1][tid], seed_1);
//goto third_seed;
#ifdef FILTER_FULL_MATCH
if (max_seed_len == len - start_N_num) {
goto collect_intv_end;
}
#endif
// 2. second pass: find MEMs inside a long SMEM
PROF_START(seed_2);
//fprintf(gf[0], "second %ld\n", ++debug_num);
// 2. second pass: find MEMs inside a long SMEM
// fprintf(stderr, "%ld\n", debug_num++);
PROF_START(seed_2);
old_n = smem->mem.n;
for (k = 0; k < old_n; ++k) {
bwtintv_t *p = &smem->mem.a[k];
int start = p->info>>32, end = (int32_t)p->info;
if (end - start < split_len || p->x[2] > opt->split_width) continue;
fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
for (i = 0; i < a->mem1.n; ++i) {
//fmt_smem(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
fmt_smem_new(fmt, len, seq, (start + end) >> 1, p->x[2] + 1, opt->min_seed_len, p, &a->mem1, a->tmpv[0]);
// fprintf(stderr, "%ld\n", debug_num++);
for (i = 0; i < a->mem1.n; ++i) {
bwtintv_t *p = &a->mem1.a[i];
int slen = (uint32_t)p->info - (p->info >> 32);
if (slen >= opt->min_seed_len) {
kv_push(bwtintv_t, smem->mem, a->mem1.a[i]);
}
//fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]);
}
}
}
PROF_END(tprof[T_SEED_2][tid], seed_2);
//fprintf(gf[0], "\n");
PROF_END(tprof[T_SEED_2][tid], seed_2);
//third_seed:
// 3. third pass: LAST-like
@ -1358,7 +1396,8 @@ static void mem_collect_intv_batch(const mem_opt_t *opt, const FMTIndex *fmt, in
x = fmt_seed_strategy1(fmt, len, seq, x, opt->min_seed_len, opt->max_mem_intv, &m);
if (m.x[2] > 0) {
kv_push(bwtintv_t, smem->mem, m);
}
//fprintf(stderr, "seed_3: %ld %d %ld %ld\n", m.info >> 32, (uint32_t)m.info, (-(m.info >> 32) + (uint32_t)m.info), m.x[2]);
}
} else ++x;
}
}
@ -1369,6 +1408,11 @@ collect_intv_end:
#endif
// sort
ks_introsort(mem_intv, smem->mem.n, smem->mem.a);
// for (i = 0; i < smem->mem.n; ++i) {
// bwtintv_t *p = &smem->mem.a[i];
// fprintf(gf[0], "seed: %ld\t%d\t%ld\t%ld\n", p->info >> 32, (uint32_t)p->info, (-(p->info >> 32) + (uint32_t)p->info), p->x[2]);
// }
// fprintf(gf[0], "\n");
}
void find_smem(const mem_opt_t *opt, const FMTIndex *fmt, int len, const uint8_t *seq, smem_aux_t *aux, smem_v *smemv, int tid)

18
bwt.c
View File

@ -62,7 +62,8 @@ static inline bwtint_t bwt_invPsi(const bwt_t *bwt, bwtint_t k) // compute inver
// 设置某一行的排序值sa的索引有效值从1开始0设置为-1, 小端模式)
void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val)
{
const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组共享33个字节
//const bwtint_t block_idx = (k >> 3) * 33;
const bwtint_t block_idx = ((k >> 3) << 5) + (k>>3); // 8个数为一组共享33个字节
const int val_idx_in_block = k & 7;
const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2);
bwtint_t *sa_addr = (bwtint_t *)(sa_arr + start_byte_idx);
@ -73,13 +74,20 @@ void inline bwt_set_sa(uint8_t *sa_arr, bwtint_t k, bwtint_t val)
// 获取某一行的排序值(小端模式)
bwtint_t bwt_get_sa(uint8_t *sa_arr, bwtint_t k)
{
const bwtint_t block_idx = (k >> 3) * 33; // 8个数为一组共享33个字节
const int val_idx_in_block = k & 7;
// const bwtint_t block_idx = (k >> 3) * 33;
const bwtint_t block_idx = ((k >> 3) << 5) + (k >> 3); // 8个数为一组共享33个字节
const int val_idx_in_block = k & 7;
const bwtint_t start_byte_idx = block_idx + (val_idx_in_block << 2);
bwtint_t val = *(bwtint_t *)(sa_arr + start_byte_idx);
val = (val >> val_idx_in_block) & 8589934591;
return val;
}
bwtint_t bwt_get_sa_2(uint8_t *sa_arr, bwtint_t k) {
const bwtint_t start_byte = ((k << 5) + k) >> 3; // 存储这个sa数据的起始字节
bwtint_t val = *(bwtint_t *)(sa_arr + start_byte);
val = (val >> (k & 7)) & 8589934591;
return val;
}
// 获取kmer的fmt匹配信息
inline void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos)
@ -147,7 +155,7 @@ inline void bwt_kmer_get(const KmerHash *kmer_hash, bwtintv_t *ok, uint32_t qbit
}
// bwt->bwt and bwt->occ must be precalculated
void bwt_cal_byte_sa(bwt_t *bwt, int intv)
void bwt_cal_byte_sa(bwt_t *bwt, int intv, int num_thread)
{
bwtint_t isa, sa, i, block_size; // S(isa) = sa isa是后缀数组的索引sa表示位置
double tmp_time, elapsed_time;
@ -184,7 +192,7 @@ void bwt_cal_byte_sa(bwt_t *bwt, int intv)
}
if (isa % intv == 0) bwt_set_sa(bwt->byte_sa, isa / intv, sa);
// bwt_set_sa(bwt->byte_sa, 0, (bwtint_t)-1); // 赋值成-1也没问题set_sa那里已经修正了
bwt_set_sa(bwt->byte_sa, 0, 8589934591); // before this line, bwt->sa[0] = bwt->seq_len
bwt_set_sa(bwt->byte_sa, 0, 8589934591); // 2的33次方-1 before this line, bwt->sa[0] = bwt->seq_len
}
// bwt->bwt and bwt->occ must be precalculated

4
bwt.h
View File

@ -103,7 +103,7 @@ typedef struct {
typedef struct { size_t n, m; bwtintv_t *a; } bwtintv_v;
#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8)
#define SA_BYTES(n_sa) ((((33 * (n_sa) + 7) / 8) & (~7L)) + 8)/* 8字节对齐*/
/* For general OCC_INTERVAL, the following is correct:
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
@ -137,7 +137,7 @@ extern "C" {
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW
void bwt_cal_sa(bwt_t *bwt, int intv);
void bwt_cal_byte_sa(bwt_t *bwt, int intv);
void bwt_cal_byte_sa(bwt_t *bwt, int intv, int num_thread);
void bwt_bwtupdate_core(bwt_t *bwt);

View File

@ -261,10 +261,14 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
int bwa_bwt2bytesa(int argc, char *argv[]) // the "bwt2bytesa" command
{
bwt_t *bwt;
int c, sa_intv = 32;
int c, sa_intv = 32, num_thread = 1;
while ((c = getopt(argc, argv, "i:")) >= 0) {
switch (c) {
case 'i': sa_intv = atoi(optarg); break;
case 't':
num_thread = atoi(optarg);
if (num_thread < 1) num_thread = 1;
break;
default: return 1;
}
}
@ -273,7 +277,7 @@ int bwa_bwt2bytesa(int argc, char *argv[]) // the "bwt2bytesa" command
return 1;
}
bwt = bwt_restore_bwt(argv[optind]);
bwt_cal_byte_sa(bwt, sa_intv);
bwt_cal_byte_sa(bwt, sa_intv, num_thread);
bwt_dump_byte_sa(argv[optind+1], bwt);
bwt_destroy(bwt);
return 0;
@ -389,12 +393,12 @@ int bwa_index(int argc, char *argv[]) // the "index" command
strcpy(prefix, argv[optind]);
if (is_64) strcat(prefix, ".64");
}
bwa_idx_build(argv[optind], prefix, algo_type, block_size);
bwa_idx_build(argv[optind], prefix, algo_type, block_size, num_threads);
free(prefix);
return 0;
}
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size)
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size, int num_threads)
{
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
@ -475,7 +479,7 @@ int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_s
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
// create byte sa
t = clock();
bwt_cal_byte_sa(bwt, 4);
bwt_cal_byte_sa(bwt, 4, num_threads);
strcpy(str, prefix); strcat(str, ".bytesa");
bwt_dump_byte_sa(str, bwt);
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);

View File

@ -13,6 +13,8 @@ FILE *gf[4] = {0},
*gft[4] = {0},
*gfi[4] = {0};
uint64_t debug_num = 0;
int open_qti_files()
{
char fn[1024] = {0};

View File

@ -6,6 +6,7 @@
Date : 2024/04/09
***********************************************************************************************/
#include <stdio.h>
#include <stdint.h>
////////////////// for debug and test //////////////////////////
@ -21,6 +22,8 @@
extern FILE *gf[4], *gfq[4], *gft[4], *gfi[4];
extern uint64_t debug_num;
int open_qti_files();
int open_debug_files();

View File

@ -225,24 +225,20 @@ static void *thread_read(void *data)
ktp_aux_t *aux = (ktp_aux_t *)data;
while (1)
{
PROF_START(w1);
POSSESS(input_have);
WAIT_FOR(input_have, NOT_TO_BE, 0);
POSSESS(input_have);
WAIT_FOR(input_have, NOT_TO_BE, 0);
RELEASE(input_have);
PROF_END(gprof[G_read_wait_1], w1);
// fprintf(stderr, "start read: %ld\n", aux->read_idx);
// fprintf(stderr, "start read: %ld\n", aux->read_idx);
if (read_data(aux, aux->data) == 0) {
POSSESS(input_have);
aux->read_complete = 1;
TWIST(input_have, BY, -1);
break;
}
PROF_START(w2);
POSSESS(input_have);
POSSESS(input_have);
aux->read_idx++;
TWIST(input_have, BY, -1);
PROF_END(gprof[G_read_wait_2], w2);
// fprintf(stderr, "next read: %ld\n", aux->read_idx);
// fprintf(stderr, "next read: %ld\n", aux->read_idx);
}
return 0;
}
@ -255,17 +251,15 @@ static void *thread_calc(void *data)
while (1)
{
// fprintf(stderr, "start calc: %ld\n", aux->calc_idx);
PROF_START(w1);
POSSESS(input_have);
POSSESS(input_have);
WAIT_FOR(input_have, NOT_TO_BE, 2);
RELEASE(input_have); // 应该没必要持有吧?
POSSESS(output_have);
WAIT_FOR(output_have, NOT_TO_BE, 2);
RELEASE(output_have);
PROF_END(gprof[G_comp_wait_1], w1);
if (aux->calc_idx < aux->read_idx) {
if (aux->calc_idx < aux->read_idx) {
calc_data(aux, aux->data + d_idx);
d_idx = !d_idx;
add_idx = 1;
@ -278,15 +272,13 @@ static void *thread_calc(void *data)
TWIST(output_have, BY, 1); // 最后要唤醒写线程
break; // 计算完了
}
PROF_START(w2);
POSSESS(output_have);
POSSESS(output_have);
if (add_idx) aux->calc_idx ++;
TWIST(output_have, BY, 1);
POSSESS(input_have);
TWIST(input_have, BY, 1);
PROF_END(gprof[G_comp_wait_2], w2);
// fprintf(stderr, "next calc: %ld\n", aux->calc_idx);
// fprintf(stderr, "next calc: %ld\n", aux->calc_idx);
}
return 0;
}
@ -298,12 +290,10 @@ static void *thread_write(void *data)
while (1)
{
// fprintf(stderr, "start write: %ld\n", aux->write_idx);
PROF_START(w1);
POSSESS(output_have);
POSSESS(output_have);
WAIT_FOR(output_have, NOT_TO_BE, 0);
RELEASE(output_have);
PROF_END(gprof[G_write_wait_1], w1);
if (aux->write_idx < aux->calc_idx) {
if (aux->write_idx < aux->calc_idx) {
write_data(aux, aux->data + d_idx);
d_idx = !d_idx;
aux->write_idx++;
@ -313,11 +303,9 @@ static void *thread_write(void *data)
write_data(aux, aux->data + d_idx);
break;
}
PROF_START(w2);
POSSESS(output_have);
POSSESS(output_have);
TWIST(output_have, BY, -1);
PROF_END(gprof[G_write_wait_2], w2);
// fprintf(stderr, "next write: %ld\n", aux->write_idx);
// fprintf(stderr, "next write: %ld\n", aux->write_idx);
}
return 0;
}
@ -356,6 +344,7 @@ static void update_a(mem_opt_t *opt, const mem_opt_t *opt0)
int main_mem(int argc, char *argv[])
{
open_debug_files();
#ifdef DEBUG_FILE_OUTPUT
open_debug_files();
// open_debug_files
@ -629,10 +618,8 @@ int main_mem(int argc, char *argv[])
return 1;
}
fp = gzdopen(fd, "r");
start_async_read(fp);
// stop_async_read(fp);
aux.ks = kseq_init(fp);
if (optind + 2 < argc) {
aux.ks = kseq_init(fp);
if (optind + 2 < argc) {
if (opt->flag&MEM_F_PE) {
if (bwa_verbose >= 2)
fprintf(stderr, "[W::%s] when '-p' is in use, the second query file is ignored.\n", __func__);
@ -643,8 +630,7 @@ int main_mem(int argc, char *argv[])
return 1;
}
fp2 = gzdopen(fd2, "r");
start_async_read(fp2);
aux.ks2 = kseq_init(fp2);
aux.ks2 = kseq_init(fp2);
opt->flag |= MEM_F_PE;
}
}
@ -671,11 +657,9 @@ int main_mem(int argc, char *argv[])
// free(opt);
// bwa_idx_destroy(aux.idx);
// kseq_destroy(aux.ks);
stop_async_read(fp);
err_gzclose(fp); kclose(ko);
if (aux.ks2) {
err_gzclose(fp); kclose(ko);
if (aux.ks2) {
// kseq_destroy(aux.ks2);
stop_async_read(fp2);
err_gzclose(fp2); kclose(ko2);
}
PROF_END(gprof[G_ALL], all);
@ -693,8 +677,9 @@ int main_mem(int argc, char *argv[])
#ifdef DEBUG_FILE_OUTPUT
close_files();
#endif
close_files();
return 0;
return 0;
}
int main_fastmap(int argc, char *argv[])

167
fmt_idx.c
View File

@ -1104,6 +1104,171 @@ fmt_smem_end:
return ret;
}
// 找smemseed(lm: long_smem)
int fmt_smem_new(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm,
bwtintv_v *mem, bwtintv_v *tmpvec) {
if (x == 0 || q[x - 1] > 3)
return fmt_smem_forward(fmt, len, q, x, min_intv, min_seed_len, mem);
// int flag = 0;
int i, ret = x+1, kmer_len;
bwtintv_t ik = {0}, ok1 = {0}, ok2 = {0};
bwtintv_t mt = {0};
uint32_t qbit = 0;
mem->n = 0;
if (q[x] > 3)
return x + 1;
if (min_intv < 1)
min_intv = 1; // the interval size should be at least 1
#define CHECK_INTV_UPDATE(iv, ov, end_pos) \
if (ov.x[2] != iv.x[2]) { \
if (ov.x[2] < min_intv) \
break; \
} \
iv = ov; \
iv.info = end_pos
// 通过反复用forward和backward搜索来找到每个SMEM直到不包含pivot即x
int start = x;
while (1) {
qbit = build_forward_kmer(&q[start], len - start, HASH_KMER_LEN, &kmer_len);
if (kmer_len < HASH_KMER_LEN) {
// 扩展到最远的位置
if (start < x)
break; // 遇到左侧N了
ret = start + kmer_len;
start = ret - HASH_KMER_LEN;
if (start < 0 || q[x-1] > 3)
break;
continue;
}
// forward search
bwt_kmer_get(&fmt->kmer_hash, &ik, qbit, kmer_len - 1); // 初始碱基位置
ik.info = start + kmer_len;
if (ik.x[2] < min_intv) {
if (start == x)
ret = ik.info;
if (start > 0) {
--start;
continue;
} else
break;
}
for (i = (int)ik.info; i + 1 < len; i += 2) {
if (q[i] < 4 && q[i + 1] < 4) {
fmt_extend2(fmt, &ik, &ok1, &ok2, 0, 3 - q[i], 3 - q[i + 1]);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[1] - 1 + ok2.x[2]), 0, 2);
CHECK_INTV_UPDATE(ik, ok1, i + 1);
CHECK_INTV_UPDATE(ik, ok2, i + 2);
#if 0
// 在这里进行判断是否只有一个候选了
if (min_intv == 1 && ok2.x[2] == min_intv) {
direct_extend(fmt, len, q, start, i + 2, ok2.x[0], fmt_sa(fmt, ok2.x[0]), &mt, 0);
if (mt.rm[0].qe <= x)
goto smem_end;
kv_push(bwtintv_t, *mem, mt); // 这里可以判断一下是否大于min_seed_len
if (start == x)
ret = (uint32_t)mt.info;
if (mt.rm[0].qs == 0 || q[mt.rm[0].qs - 1] > 3)
goto smem_end;
i = mt.rm[0].qs - 1;
goto smem_next;
}
#endif
} else if (q[i] < 4) { // q[i+1] >= 4
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
CHECK_INTV_UPDATE(ik, ok1, i + 1);
goto smem_backward_search;
} else {
goto smem_backward_search;
}
}
for (; i == len - 1; ++i) { // 扩展到了最后一个碱基
if (q[i] < 4) {
fmt_extend1(fmt, &ik, &ok1, 0, 3 - q[i]);
CHECK_INTV_UPDATE(ik, ok1, i + 1);
} else {
goto smem_backward_search;
}
}
if (start == x) ret = ik.info;
if (ik.info <= x) // 不再包含pivot退出
goto smem_end;
// backward search
smem_backward_search:
#define CHECK_INTV_BREAK(ik, ok, spos) \
if (ok.x[2] < min_intv) { \
i = spos; \
goto smem_next; \
} \
ok.info = ik.info; \
ik = ok;
#define CHECK_INTV_END(ik, ok) \
if (ok.x[2] < min_intv) { \
CHECK_ADD_MEM(i + 1, ik, mem); \
goto smem_end; \
} \
ok.info = ik.info; \
ik = ok;
i = start - 1;
for (; i > 0; i -= 2) {
if (q[i] < 4 && q[i - 1] < 4) // 两个都可以扩展
{
fmt_direct2_extend2(fmt, &ik, &ok1, &ok2, 1, q[i], q[i - 1]);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1), 0, 2);
__builtin_prefetch(fmt_occ_intv(fmt, ok2.x[0] - 1 + ok2.x[2]), 0, 2);
CHECK_INTV_BREAK(ik, ok1, i);
CHECK_INTV_BREAK(ik, ok2, i-1);
} else if (q[i] < 4) // 只能扩展一个
{
fmt_direct_extend1(fmt, &ik, &ok1, 1, q[i]);
CHECK_INTV_END(ik, ok1);
--i; // 避免进行i-2
break;
} else { // 不能扩展
CHECK_ADD_MEM(i + 1, ik, mem);
goto smem_end;
}
}
for (; i == 0; --i) { // 扩展到了第一个碱基
if (q[i] < 4) {
fmt_direct_extend1(fmt, &ik, &ok1, 1, q[i]);
CHECK_INTV_BREAK(ik, ok1, i);
} else {
CHECK_ADD_MEM(i + 1, ik, mem);
goto smem_end;
}
}
if (i == -1) {
CHECK_ADD_MEM(i + 1, ik, mem);
goto smem_end;
}
smem_next:
ik.info |= (uint64_t)(i+1) << 32;
CHECK_ADD_MEM(i + 1, ik, mem);
if (start < 0 || q[start] > 3) {
break;
}
start = i;
}
smem_end:
fmt_reverse_intvs(mem); // s.t. sorted by the start coordinate
// printf("%d\n", ret);
return ret;
}
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem)
{
int i, kmer_len, first_extend_len;
@ -1310,4 +1475,4 @@ bwtint_t fmt_sa_offset(const FMTIndex *fmt, bwtint_t k)
}
sa = (sa << 48) | (k / fmt->sa_intv);
return sa;
}
}

View File

@ -110,6 +110,7 @@ void kmer_getval_at(uint8_t *mem_addr, bwtintv_t *ok, int pos);
void fmt_kmer_get(const FMTIndex *fmt, bwtintv_t *ok, uint32_t qbit, int pos);
// 找smemseed
int fmt_smem(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec);
int fmt_smem_new(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec);
int fmt_smem_2(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_intv, int min_seed_len, bwtintv_t *lm, bwtintv_v *mem, bwtintv_v *tmpvec, uint32_v qev);
int fmt_seed_strategy1(const FMTIndex *fmt, int len, const uint8_t *q, int x, int min_len, int max_intv, bwtintv_t *mem);

View File

@ -2,10 +2,6 @@
#include <stdio.h>
#include "kstring.h"
#ifdef USE_MALLOC_WRAPPERS
# include "malloc_wrap.h"
#endif
int ksprintf(kstring_t *s, const char *fmt, ...)
{
va_list ap;

View File

@ -136,7 +136,6 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
m = MAX(m, maxVal[0]); /*用来解决与BSW结果不一样的第二种情况(上边界)*/ \
if (maxVal[0] > 0 && m >= max) { \
for(j=beg, i=iend; j<=end; j+=SIMD_WIDTH, i-=SIMD_WIDTH) { \
/*calc_num += 16;*/ \
__m256i h2_vec = _mm256_loadu_si256((__m256i*) (&hA2[j])); \
__m256i vcmp = _mm256_cmpeq_epi16(h2_vec, max_vec); \
uint32_t mask = _mm256_movemask_epi8(vcmp); \
@ -203,7 +202,7 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
int *_max_off, // 取得最大得分时在query和reference上位置差的 最大值
buf_t *buf) // 之前已经开辟过的缓存
{
// return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
//return ksw_extend2_origin(qlen, query, tlen, target, is_left, m, mat, o_del, e_del, o_ins, e_ins, w, end_bonus, zdrop, h0, _qle, _tle, _gtle, _gscore, _max_off);
#ifdef DEBUG_FILE_OUTPUT
//fprintf(gf[0], "%d\n", qlen);
@ -372,9 +371,8 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
//calc_num += 16;
// 存储结果
SIMD_STORE;
// 存储结果
SIMD_STORE;
}
// 剩下的计算单元
if (j <= end) {
@ -384,8 +382,7 @@ int ksw_extend2_avx2(int qlen, // query length 待匹配段碱基的query长度
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
//calc_num += 16;
// 去除多余计算的部分
// 去除多余计算的部分
SIMD_REMOVE_EXTRA;
// 存储结果
SIMD_STORE;
@ -676,7 +673,6 @@ int ksw_extend2_origin(int qlen, // query length 待匹配段碱基的query长
} else h1 = 0;
//m = h1; // 用来解决和VP-BSW结果不一样的第一种情况(左边界)
for (j = beg; LIKELY(j < end); ++j) {
//calc_num++;
#ifdef DEBUG_FILE_OUTPUT
#ifdef COUNT_CALC_NUM

View File

@ -310,9 +310,8 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
//calc_num += 32;
// 存储结果
SIMD_STORE;
// 存储结果
SIMD_STORE;
}
// 剩下的计算单元
if (j <= end) {
@ -322,8 +321,7 @@ int ksw_extend2_avx2_u8(int qlen, // query length 待匹配段碱基的query长
SIMD_CMP_SEQ;
// 计算
SIMD_COMPUTE;
//calc_num += 32;
// 去除多余计算的部分
// 去除多余计算的部分
SIMD_REMOVE_EXTRA;
// 存储结果
SIMD_STORE;

View File

@ -11,8 +11,6 @@ Date : 2024/04/06
#include "utils.h"
#include "profiling.h"
uint64_t calc_num = 0;
#ifdef SHOW_PERF
uint64_t tprof[LIM_THREAD_PROF_TYPE][LIM_THREAD] = {0};
uint64_t proc_freq = 1000;
@ -25,10 +23,20 @@ tdat[0]: read nums
tdat[1]: seed-1 full match nums
*/
int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD] = {0};
int64_t gdat[LIM_GLOBAL_DATA_TYPE] = {0};
#endif
int64_t sum(int64_t *a, int len) {
int64_t res = 0;
int i = 0;
for (i=0; i<len; ++i) {
res += a[i];
}
return res;
}
int find_opt(uint64_t *a, int len, double *max, double *min, double *avg)
{
int i = 0;
@ -49,6 +57,7 @@ int display_stats(int nthreads)
{
#ifdef SHOW_PERF
double avg, max, min;
int i;
fprintf(stderr, "[steps in main_mem]\n");
fprintf(stderr, "time_parse_arg: %0.2lf s\n", gprof[G_PREPARE] * 1.0 / proc_freq);
@ -106,20 +115,18 @@ int display_stats(int nthreads)
fprintf(stderr, "time_ksw_loop: %0.2lf s\n", gprof[G_KSW_LOOP] * 1.0 / proc_freq);
fprintf(stderr, "time_ksw_end_loop: %0.2lf s\n", gprof[G_KSW_END_LOOP] * 1.0 / proc_freq);
fprintf(stderr, "parse seq: %0.2lf s\n", gprof[G_parse_seq] * 1.0 / proc_freq);
fprintf(stderr, "read seq: %0.2lf s\n", gprof[G_read_seq] * 1.0 / proc_freq);
fprintf(stderr, "seq num: %ld\n", gdat[0]);
fprintf(stderr, "full num: %ld\n", gdat[1]);
fprintf(stderr, "percent: %0.2lf%c\n", (double)gdat[1] / gdat[0] * 100, '%');
// fprintf(stderr, "read_wait_1: %0.2lf s\n", gprof[G_read_wait_1] * 1.0 / proc_freq);
// fprintf(stderr, "read_wait_2: %0.2lf s\n", gprof[G_read_wait_2] * 1.0 / proc_freq);
// fprintf(stderr, "comp_wait_1: %0.2lf s\n", gprof[G_comp_wait_1] * 1.0 / proc_freq);
// fprintf(stderr, "comp_wait_2: %0.2lf s\n", gprof[G_comp_wait_2] * 1.0 / proc_freq);
// fprintf(stderr, "write_wait_1: %0.2lf s\n", gprof[G_write_wait_1] * 1.0 / proc_freq);
// fprintf(stderr, "write_wait_2: %0.2lf s\n", gprof[G_write_wait_2] * 1.0 / proc_freq);
fprintf(stderr, "real_cal num: %ld\n", calc_num);
//for (i=0; i<LIM_THREAD_DATA_TYPE; ++i) {
for (i=1; i<=132; ++i) {
//fprintf(stderr, "len: %d, sum: %ld\n", i, sum(tdat[i], nthreads));
fprintf(stderr, "%ld,\n", sum(tdat[i], nthreads));
}
fprintf(stderr, "\n");
#endif
return 1;
}
}

View File

@ -16,8 +16,8 @@ Date : 2024/04/06
#define LIM_THREAD 128
#define LIM_THREAD_PROF_TYPE 128
#define LIM_GLOBAL_PROF_TYPE 128
#define LIM_THREAD_DATA_TYPE 128
#define LIM_GLOBAL_DATA_TYPE 128
#define LIM_THREAD_DATA_TYPE 256
#define LIM_GLOBAL_DATA_TYPE 256
#ifdef SHOW_PERF
extern uint64_t proc_freq;
@ -27,6 +27,7 @@ extern uint64_t gprof[LIM_GLOBAL_PROF_TYPE];
#ifdef SHOW_DATA_PERF
extern int64_t tdat[LIM_THREAD_DATA_TYPE][LIM_THREAD];
extern int64_t t_sd[LIM_THREAD_DATA_TYPE][LIM_THREAD];
extern int64_t gdat[LIM_GLOBAL_DATA_TYPE];
#endif
@ -43,10 +44,11 @@ extern int64_t gdat[LIM_GLOBAL_DATA_TYPE];
#define PROF_END(result, tmp_time)
#endif
extern uint64_t calc_num;
// GLOBAL
enum {
enum
{
G_ALL = 0,
G_PIPELINE,
G_READ,
@ -59,15 +61,7 @@ enum {
G_MEM_PESTAT,
G_MEM_SAM,
G_KSW_LOOP,
G_KSW_END_LOOP,
G_parse_seq,
G_read_seq,
G_read_wait_1,
G_read_wait_2,
G_comp_wait_1,
G_comp_wait_2,
G_write_wait_1,
G_write_wait_2
G_KSW_END_LOOP
};
// THREAD
@ -91,12 +85,9 @@ enum
T_SAM_REG2ALN,
T_SEED_3_1,
T_SEED_3_2,
T_SEED_3_3,
T_SEEDING,
T_EXTENSION,
T_SAM,
T_SEED_3_3
};
int display_stats(int);
#endif
#endif

50
run.sh
View File

@ -1,40 +1,24 @@
thread=128
thread=1
n1=~/data/na12878-1.fq.gz
n2=~/data/na12878-2.fq.gz
n_r1=~/data/dataset/real/D1/n1.fq.gz
n_r2=~/data/dataset/real/D1/n2.fq.gz
#n_r1=~/data/dataset/real/D3/n1.fq.gz
#n_r2=~/data/dataset/real/D3/n2.fq.gz
#n_r1=~/data/dataset/real/D3/n1.fq
#n_r2=~/data/dataset/real/D3/n2.fq
#n_r1=~/data/t120w-1.fq
#n_r2=~/data/t120w-2.fq
#n1=~/data/dataset/real/D1/1w1.fq
#n2=~/data/dataset/real/D1/1w2.fq
#n_r1=~/data/1w-1.fq
#n_r2=~/data/1w-2.fq
#n1=~/data/dataset/real/D1/n1.fq.gz
#n2=~/data/dataset/real/D1/n2.fq.gz
#n1=~/data/dataset/real/D2/n1.fq.gz
#n2=~/data/dataset/real/D2/n2.fq.gz
#n1=~/data/dataset/real/D3/n1.fq.gz
#n2=~/data/dataset/real/D3/n2.fq.gz
#n1=~/data/dataset/real/D4/n1.fq.gz
#n2=~/data/dataset/real/D4/n2.fq.gz
#n1=~/data/dataset/real/D5/n1.fq.gz
#n2=~/data/dataset/real/D5/n2.fq.gz
reference=~/data/fmt_ref/human_g1k_v37_decoy.fasta
out=./out.sam
#out=/dev/null
sudo /home/zzh/clean_mem.sh
rm $out
reference=~/data1/fmt_ref/human_g1k_v37_decoy.fasta
#out=new-wgs.sam
out=/dev/null
time ./fastbwa mem -t $thread -M -R @RG\\tID:normal\\tSM:normal\\tPL:illumina\\tLB:normal\\tPG:bwa \
$reference \
$n1 \
$n2 \
-o $out -2 #-Z
#-K 2560000000 \
#-K 2100000000
$n_r1 \
$n_r2 \
-o $out -2 #> seed.txt #-Z

114
utils.c
View File

@ -37,15 +37,11 @@
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <pthread.h>
#include <sys/resource.h>
#include <sys/time.h>
#include "utils.h"
#include "ksort.h"
#include "kvec.h"
#include "utils.h"
#include "yarn.h"
#include "khash.h"
#define pair64_lt(a, b) ((a).x < (b).x || ((a).x == (b).x && (a).y < (b).y))
KSORT_INIT(128, pair64_t, pair64_lt)
KSORT_INIT(64, uint64_t, ks_lt_generic)
@ -159,54 +155,11 @@ uint64_t fread_fix(FILE *fp, uint64_t size, void *a)
return offset;
}
typedef struct {
pthread_t tid;
void *buf[2];
volatile int readSize[2];
uint64_t getIdx;
uint64_t putIdx;
volatile int finish;
lock_t *mtx;
} FileKV;
KHASH_MAP_INIT_INT64(fkv, FileKV);
static khash_t(fkv) *fHash = 0;
#define USE_ASYNC_READ
int err_gzread(gzFile file, void *ptr, unsigned int len)
{
int ret = 0;
PROF_START(read);
#ifndef USE_ASYNC_READ
ret = gzread(file, ptr, len);
#else
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
FileKV *val = &kh_value(fHash, k);
POSSESS(val->mtx);
WAIT_FOR(val->mtx, NOT_TO_BE, 0); // 等待有数据
RELEASE(val->mtx);
int ret = gzread(file, ptr, len);
int curIdx = val->getIdx % 2;
if (val->finish) {
if (val->getIdx < val->putIdx) {
ret = val->readSize[curIdx];
if (ret > 0) memcpy(ptr, val->buf[curIdx], ret);
++val->getIdx;
return ret;
}
return 0;
}
ret = val->readSize[curIdx];
memcpy(ptr, val->buf[curIdx], ret);
POSSESS(val->mtx);
++val->getIdx;
TWIST(val->mtx, BY, -1);
#endif
PROF_END(gprof[G_read_seq], read);
if (ret < 0)
if (ret < 0)
{
int errnum = 0;
const char *msg = gzerror(file, &errnum);
@ -216,67 +169,6 @@ int err_gzread(gzFile file, void *ptr, unsigned int len)
return ret;
}
static int64_t kBufSize = 16777216;
static void *async_gzread(void *data) {
gzFile file = (gzFile)data;
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
FileKV *val = &kh_value(fHash, k);
int ret = 0;
while (1) {
POSSESS(val->mtx);
WAIT_FOR(val->mtx, NOT_TO_BE, 2); // 等待有数据
RELEASE(val->mtx);
int curIdx = val->putIdx % 2;
ret = gzread(file, val->buf[curIdx], kBufSize);
val->readSize[curIdx] = ret;
if (ret <= 0) {
POSSESS(val->mtx);
val->finish = 1;
TWIST(val->mtx, BY, 1);
break;
}
POSSESS(val->mtx);
val->putIdx += 1;
TWIST(val->mtx, BY, 1);
}
return NULL;
}
int start_async_read(gzFile file) {
int ret = 0;
#ifdef USE_ASYNC_READ
if (fHash == 0) {
fHash = kh_init(fkv);
}
khiter_t k = kh_put(fkv, fHash, (int64_t)file, &ret);
kh_key(fHash, k) = (int64_t)file;
FileKV *fv = &kh_value(fHash, k);
fv->mtx = NEW_LOCK(0);
fv->getIdx = fv->putIdx = fv->finish = 0;
fv->readSize[0] = fv->readSize[1] = 0;
fv->buf[0] = malloc(kBufSize);
fv->buf[1] = malloc(kBufSize);
ret = pthread_create(&fv->tid, 0, async_gzread, file);
#endif
return ret;
}
int stop_async_read(gzFile file) {
#ifdef USE_ASYNC_READ
khiter_t k = kh_get(fkv, fHash, (int64_t)file);
FileKV *val = &kh_value(fHash, k);
pthread_join(val->tid, 0);
#endif
return 0;
}
int err_fseek(FILE *stream, long offset, int whence)
{
int ret = fseek(stream, offset, whence);

View File

@ -135,9 +135,6 @@ extern "C" {
int memcpy_bwamem(void *dest, size_t dmax, const void *src, size_t smax, char *file_name, int line_num);
int start_async_read(gzFile file);
int stop_async_read(gzFile file);
#ifdef __cplusplus
}
#endif