完成avx2的align代码
This commit is contained in:
parent
845a1156b8
commit
338c56e6ed
|
|
@ -23,6 +23,8 @@
|
|||
"utility": "c",
|
||||
"byte_alloc.h": "c",
|
||||
"debug.h": "c",
|
||||
"limits": "c"
|
||||
"limits": "c",
|
||||
"align.h": "c",
|
||||
"extend.h": "c"
|
||||
}
|
||||
}
|
||||
276
align.c
276
align.c
|
|
@ -0,0 +1,276 @@
|
|||
/*********************************************************************************************
|
||||
Description: stripped sw align functions in bwa-mem
|
||||
|
||||
Copyright : All right reserved by NCIC.ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2024/04/11
|
||||
***********************************************************************************************/
|
||||
// u8和i16 score2 结果有不同,不是算法问题,而是边界未清零
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "byte_alloc.h"
|
||||
#include "utils.h"
|
||||
#include "profiling.h"
|
||||
#include "align.h"
|
||||
#include "debug.h"
|
||||
|
||||
const kswr_t g_defr = {0, -1, -1, -1, -1, -1, -1};
|
||||
|
||||
#define ALIGN_PERFORMANCE_TEST(kernel_id, nbyte, func, sp, ep) \
|
||||
do \
|
||||
{ \
|
||||
PROF_START(align); \
|
||||
int i; \
|
||||
kswr_t score; \
|
||||
for (i = sp; i < ep; ++i) \
|
||||
{ if (kv_A(kv_A(i_arr, i), 0) < 144) continue; \
|
||||
kswq_sse_t *q = aln_sse_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
||||
score = func(&bmem[1], q, \
|
||||
kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \
|
||||
6, 1, 6, 1, xtra); \
|
||||
score_total[kernel_id] += score.score; \
|
||||
byte_mem_clear(&bmem[0]); /* free(q); */ \
|
||||
} \
|
||||
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \
|
||||
} while (0)
|
||||
|
||||
#define ALIGN_PERFORMANCE_TEST_AVX2(kernel_id, nbyte, func, sp, ep) \
|
||||
do { \
|
||||
PROF_START(align); \
|
||||
int i; \
|
||||
kswr_t score; \
|
||||
for (i = sp; i < ep; ++i) { \
|
||||
/*if (kv_A(kv_A(i_arr, i), 0) < 144) \
|
||||
continue; */ \
|
||||
kswq_avx2_t *q = aln_avx2_qinit(&bmem[0], nbyte, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat); \
|
||||
score = func(&bmem[1], q, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra); \
|
||||
score_total[kernel_id] += score.score; \
|
||||
byte_mem_clear(&bmem[0]); /* free(q); */ \
|
||||
} \
|
||||
PROF_END(gprof[kernel_prof_idx[kernel_id]], align); \
|
||||
} while (0)
|
||||
|
||||
// sse ksw init
|
||||
/**
|
||||
* Initialize the query data structure
|
||||
*
|
||||
* @param size Number of bytes used to store a score; valid valures are 1 or 2
|
||||
* @param qlen Length of the query sequence
|
||||
* @param query Query sequence
|
||||
* @param m Size of the alphabet
|
||||
* @param mat Scoring matrix in a one-dimension array
|
||||
*
|
||||
* @return Query data structure
|
||||
*/
|
||||
kswq_sse_t *aln_sse_qinit(byte_mem_t *bmem, int size, int qlen, const uint8_t *query, int m, const int8_t *mat)
|
||||
{
|
||||
kswq_sse_t *q;
|
||||
int slen, a, tmp, p, memsize;
|
||||
|
||||
size = size > 1? 2 : 1;
|
||||
p = 8 * (3 - size); // # values per __m128i
|
||||
qlen = 144;
|
||||
slen = (qlen + p - 1) / p; // segmented length
|
||||
memsize = sizeof(kswq_sse_t) + 256 + 16 * slen * (m + 4);
|
||||
q = (kswq_sse_t *)malloc(memsize); // a single block of memory
|
||||
//q = (kswq_sse_t *)byte_mem_request_and_clean(bmem, memsize);
|
||||
//q = (kswq_sse_t *)byte_mem_request(bmem, memsize);
|
||||
q->qp = (__m128i *)(((size_t)q + sizeof(kswq_sse_t) + 15) >> 4 << 4); // align memory
|
||||
q->H0 = q->qp + slen * m;
|
||||
q->H1 = q->H0 + slen;
|
||||
q->E = q->H1 + slen;
|
||||
q->Hmax = q->E + slen;
|
||||
q->slen = slen; q->qlen = qlen; q->size = size;
|
||||
//fprintf(stderr, "%d %d\n", slen, qlen);
|
||||
// compute shift
|
||||
tmp = m * m;
|
||||
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
|
||||
if (mat[a] < (int8_t)q->shift) q->shift = mat[a];
|
||||
if (mat[a] > (int8_t)q->mdiff) q->mdiff = mat[a];
|
||||
}
|
||||
q->max = q->mdiff;
|
||||
q->shift = 256 - q->shift; // NB: q->shift is uint8_t
|
||||
q->mdiff += q->shift; // this is the difference between the min and max scores
|
||||
// An example: p=8, qlen=19, slen=3 and segmentation:
|
||||
// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
|
||||
if (size == 1) {
|
||||
int8_t *t = (int8_t*)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen? 0 : ma[query[k]]) + q->shift;
|
||||
}
|
||||
} else {
|
||||
int16_t *t = (int16_t*)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen? 0 : ma[query[k]]);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the query data structure
|
||||
*
|
||||
* @param size Number of bytes used to store a score; valid valures are 1 or 2
|
||||
* @param qlen Length of the query sequence
|
||||
* @param query Query sequence
|
||||
* @param m Size of the alphabet (ACGTN)
|
||||
* @param mat Scoring matrix in a one-dimension array
|
||||
*
|
||||
* @return Query data structure
|
||||
*/
|
||||
kswq_avx2_t *aln_avx2_qinit(byte_mem_t *bmem, int size, int qlen, const uint8_t *query, int m, const int8_t *mat) {
|
||||
kswq_avx2_t *q;
|
||||
int slen, a, tmp, p;
|
||||
size = size > 1 ? 2 : 1;
|
||||
p = 16 * (3 - size); // # values per __m256i,i16是16个,u8是32个
|
||||
slen = (qlen + p - 1) / p; // segmented length
|
||||
q = (kswq_avx2_t *)malloc(sizeof(kswq_avx2_t) + 512 + 32 * slen * (m + 4)); // a single block of memory
|
||||
q->qp = (__m256i *)(((size_t)q + sizeof(kswq_avx2_t) + 31) >> 5 << 5); // align memory,32字节对齐
|
||||
q->H0 = q->qp + slen * m;
|
||||
q->H1 = q->H0 + slen;
|
||||
q->E = q->H1 + slen;
|
||||
q->Hmax = q->E + slen;
|
||||
q->slen = slen;
|
||||
q->qlen = qlen;
|
||||
q->size = size;
|
||||
// compute shift
|
||||
tmp = m * m;
|
||||
for (a = 0, q->shift = 127, q->mdiff = 0; a < tmp; ++a) { // find the minimum and maximum score
|
||||
if (mat[a] < (int8_t)q->shift)
|
||||
q->shift = mat[a];
|
||||
if (mat[a] > (int8_t)q->mdiff)
|
||||
q->mdiff = mat[a];
|
||||
}
|
||||
q->max = q->mdiff; // (=1)
|
||||
q->shift = 256 - q->shift; // NB: q->shift is uint8_t // 主要用来处理uint8_t类型数据,保证score不小于0,i16不用 (=4)
|
||||
q->mdiff += q->shift; // this is the difference between the min and max scores (=1)
|
||||
// An example: p=8, qlen=19, slen=3 and segmentation:
|
||||
// {{0,3,6,9,12,15,18,-1},{1,4,7,10,13,16,-1,-1},{2,5,8,11,14,17,-1,-1}}
|
||||
if (size == 1) {
|
||||
int8_t *t = (int8_t *)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen ? 0 : ma[query[k]]) + q->shift;
|
||||
}
|
||||
} else {
|
||||
int16_t *t = (int16_t *)q->qp;
|
||||
for (a = 0; a < m; ++a) {
|
||||
int i, k, nlen = slen * p;
|
||||
const int8_t *ma = mat + a * m;
|
||||
for (i = 0; i < slen; ++i)
|
||||
for (k = i; k < nlen; k += slen) // p iterations
|
||||
*t++ = (k >= qlen ? 0 : ma[query[k]]);
|
||||
}
|
||||
}
|
||||
return q;
|
||||
}
|
||||
|
||||
/******
|
||||
* 输入说明:三个文件,query.fa, target.fa, info.txt
|
||||
* query.fa: 每一行代表一个query序列,由ACGTN组成
|
||||
* target.fa: 每一行代表一个reference(target)序列,由ACGTN组成
|
||||
* info.txt: 每一行由两个数字组成,分别代表query序列长度,target序列长度(align不需要初始分数)
|
||||
*/
|
||||
int main_align(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3)
|
||||
{
|
||||
fprintf(stderr, "Need 3 files: query, target, info.\n");
|
||||
return -1;
|
||||
}
|
||||
const char *qf_path = argv[0];
|
||||
const char *tf_path = argv[1];
|
||||
const char *if_path = argv[2];
|
||||
|
||||
FILE *qfp = fopen(qf_path, "r");
|
||||
FILE *tfp = fopen(tf_path, "r");
|
||||
FILE *ifp = fopen(if_path, "r");
|
||||
|
||||
buf_t read_buf = {0};
|
||||
seq_v q_arr = {0};
|
||||
seq_v t_arr = {0};
|
||||
qti_v i_arr = {0};
|
||||
uint64_t score_total[ALIGN_FUNC_NUM] = {0};
|
||||
const int kmax_row = 3000000;
|
||||
|
||||
/* read input files */
|
||||
int query_read_row = read_seq(&q_arr, &read_buf, kmax_row, qfp);
|
||||
int target_read_row = read_seq(&t_arr, &read_buf, kmax_row, tfp);
|
||||
int info_read_row = read_qt_info(&i_arr, &read_buf, kmax_row, 2, ifp);
|
||||
// fprintf(stderr, "read row: %d\t%d\t%d\n", query_read_row, target_read_row, info_read_row);
|
||||
/* init parameters */
|
||||
int8_t mat[25] = {1, -4, -4, -4, -1,
|
||||
-4, 1, -4, -4, -1,
|
||||
-4, -4, 1, -4, -1,
|
||||
-4, -4, -4, 1, -1,
|
||||
-1, -1, -1, -1, -1};
|
||||
int xtra = 851987;
|
||||
int kernel_prof_idx[] = {G_ALN_I16, G_ALN_U8, G_ALN_AVX2_I16, G_ALN_AVX2_U8};
|
||||
byte_mem_t bmem[2] = {{0}, {0}};
|
||||
byte_mem_init_alloc(&bmem[0], 1024*4);
|
||||
byte_mem_init_alloc(&bmem[1], 1024*4);
|
||||
int align_lines = MIN(MIN(query_read_row, target_read_row), info_read_row);
|
||||
// open_qti_files();
|
||||
// open_debug_files();
|
||||
/* convert query sequence for sse or avx2 */
|
||||
|
||||
|
||||
fprintf(stderr, "excute nums: %d\n", align_lines);
|
||||
ALIGN_PERFORMANCE_TEST(0, 2, align_sse_i16, 0, align_lines);
|
||||
ALIGN_PERFORMANCE_TEST(1, 1, align_sse_u8, 0, align_lines);
|
||||
ALIGN_PERFORMANCE_TEST_AVX2(2, 2, align_avx2_i16, 0, align_lines);
|
||||
ALIGN_PERFORMANCE_TEST_AVX2(3, 1, align_avx2_u8, 0, align_lines);
|
||||
#if 0
|
||||
// compare the score2 of i16 and u8
|
||||
{
|
||||
int i;
|
||||
kswr_t si16, su8;
|
||||
for (i = 0; i < align_lines; ++i)
|
||||
{
|
||||
kswq_sse_t *qi16 = aln_sse_qinit(&bmem[0], 2, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat);
|
||||
si16 = align_sse_i16(&bmem[1], qi16, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra);
|
||||
byte_mem_clear(&bmem[0]);
|
||||
fprintf(stderr, "split\n\n");
|
||||
kswq_sse_t *qu8 = aln_sse_qinit(&bmem[0], 1, kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, 5, mat);
|
||||
su8 = align_sse_u8(&bmem[1], qu8, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 6, 1, 6, 1, xtra);
|
||||
byte_mem_clear(&bmem[0]);
|
||||
if (si16.score2 != su8.score2) {
|
||||
fprintf(stderr, "%d %d\n", si16.score2, su8.score2);
|
||||
// write_query_target_sequence(kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, 0, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if 1
|
||||
int i = 0;
|
||||
for (; i < ARRAY_SIZE(kernel_prof_idx); ++i)
|
||||
{
|
||||
gdata[kernel_prof_idx[i]] = score_total[i];
|
||||
}
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
fprintf(stderr, "[align sse i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_I16] / TIME_DIVIDE_BY, gdata[G_ALN_I16]);
|
||||
fprintf(stderr, "[align sse u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_U8] / TIME_DIVIDE_BY, gdata[G_ALN_U8]);
|
||||
fprintf(stderr, "[align avx2 i16] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_I16]);
|
||||
fprintf(stderr, "[align avx2 u8 ] time: %9.6lf s; score: %ld\n", gprof[G_ALN_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_ALN_AVX2_U8]);
|
||||
#endif
|
||||
#endif
|
||||
// close_files();
|
||||
fclose(qfp);
|
||||
fclose(tfp);
|
||||
fclose(ifp);
|
||||
return 0;
|
||||
}
|
||||
49
align.h
49
align.h
|
|
@ -1,15 +1,56 @@
|
|||
/*********************************************************************************************
|
||||
Description: common defines and declarations
|
||||
Description: stripped sw align functions in bwa-mem
|
||||
|
||||
Copyright : All right reserved by NCIC.ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2023/08/26
|
||||
Date : 2024/04/11
|
||||
***********************************************************************************************/
|
||||
#ifndef __COMMON_H
|
||||
#define __COMMON_H
|
||||
#ifndef __ALIGN_H
|
||||
#define __ALIGN_H
|
||||
#include <stdio.h>
|
||||
#include <emmintrin.h>
|
||||
#include <immintrin.h>
|
||||
#include "byte_alloc.h"
|
||||
|
||||
#define KSW_XBYTE 0x10000
|
||||
#define KSW_XSTOP 0x20000
|
||||
#define KSW_XSUBO 0x40000
|
||||
#define KSW_XSTART 0x80000
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int score; // best score
|
||||
int te, qe; // target end and query end
|
||||
int score2, te2; // second best score and ending position on the target
|
||||
int tb, qb; // target start and query start
|
||||
} kswr_t;
|
||||
|
||||
extern const kswr_t g_defr;
|
||||
|
||||
typedef struct _kswq_t
|
||||
{
|
||||
int qlen, slen;
|
||||
uint8_t shift, mdiff, max, size;
|
||||
__m128i *qp, *H0, *H1, *E, *Hmax;
|
||||
} kswq_sse_t;
|
||||
|
||||
typedef struct _kswq_avx2_t {
|
||||
int qlen, slen;
|
||||
uint8_t shift, mdiff, max, size;
|
||||
__m256i *qp, *H0, *H1, *E, *Hmax;
|
||||
} kswq_avx2_t;
|
||||
|
||||
#define ALIGN_FUNC_NUM 4
|
||||
|
||||
kswq_sse_t *aln_sse_qinit(byte_mem_t *bmem, int size, int qlen, const uint8_t *query, int m, const int8_t *mat);
|
||||
kswr_t align_sse_i16(byte_mem_t *bmem, kswq_sse_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, int _e_ins, int xtra); // the first gap costs -(_o+_e)
|
||||
kswr_t align_sse_u8(byte_mem_t *bmem, kswq_sse_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, int _e_ins, int xtra);
|
||||
kswq_avx2_t *aln_avx2_qinit(byte_mem_t *bmem, int size, int qlen, const uint8_t *query, int m, const int8_t *mat);
|
||||
kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||
int _e_ins, int xtra);
|
||||
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||
int _e_ins, int xtra);
|
||||
|
||||
int main_align(int argc, char *argv[]);
|
||||
#endif
|
||||
137
align_avx2_i16.c
137
align_avx2_i16.c
|
|
@ -0,0 +1,137 @@
|
|||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "align.h"
|
||||
#include "utils.h"
|
||||
|
||||
kswr_t align_avx2_i16(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||
int _e_ins, int xtra) {
|
||||
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||
uint64_t *b;
|
||||
__m256i zero, oe_del, e_del, oe_ins, e_ins, *H0, *H1, *E, *Hmax;
|
||||
kswr_t r;
|
||||
|
||||
#define __max_16(ret, xx) \
|
||||
do { \
|
||||
int16_t *maxVal = (int16_t *)&(xx); \
|
||||
(xx) = _mm256_max_epi16((xx), _mm256_srli_si256((xx), 8)); \
|
||||
(xx) = _mm256_max_epi16((xx), _mm256_srli_si256((xx), 4)); \
|
||||
(xx) = _mm256_max_epi16((xx), _mm256_srli_si256((xx), 2)); \
|
||||
(ret) = MAX(maxVal[0], maxVal[8]); \
|
||||
} while (0)
|
||||
|
||||
// Given entries all either 0x0000 or 0xffff, return whether they are all 0x0000
|
||||
#define allzero_16(xx) (!_mm256_movemask_epi8((xx)))
|
||||
|
||||
#define avx2_slli_i16(xx, imm) \
|
||||
do { \
|
||||
int16_t *arr = (int16_t *)&(xx); \
|
||||
int16_t val = arr[7]; \
|
||||
(xx) = _mm256_slli_si256((xx), (imm)); \
|
||||
arr[8] = val; \
|
||||
} while (0)
|
||||
|
||||
// initialization
|
||||
r = g_defr;
|
||||
minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000;
|
||||
endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000;
|
||||
m_b = n_b = 0;
|
||||
b = 0;
|
||||
zero = _mm256_set1_epi32(0);
|
||||
oe_del = _mm256_set1_epi16(_o_del + _e_del);
|
||||
e_del = _mm256_set1_epi16(_e_del);
|
||||
oe_ins = _mm256_set1_epi16(_o_ins + _e_ins);
|
||||
e_ins = _mm256_set1_epi16(_e_ins);
|
||||
H0 = q->H0;
|
||||
H1 = q->H1;
|
||||
E = q->E;
|
||||
Hmax = q->Hmax;
|
||||
slen = q->slen;
|
||||
for (i = 0; i < slen; ++i) {
|
||||
_mm256_store_si256(E + i, zero);
|
||||
_mm256_store_si256(H0 + i, zero);
|
||||
_mm256_store_si256(Hmax + i, zero);
|
||||
}
|
||||
// the core loop
|
||||
for (i = 0; i < tlen; ++i) {
|
||||
int j, k, imax;
|
||||
__m256i e, t, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||
h = _mm256_load_si256(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||
avx2_slli_i16(h, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm256_adds_epi16(h, _mm256_load_si256(S++));
|
||||
e = _mm256_load_si256(E + j);
|
||||
h = _mm256_max_epi16(h, e);
|
||||
h = _mm256_max_epi16(h, f);
|
||||
max = _mm256_max_epi16(max, h);
|
||||
_mm256_store_si256(H1 + j, h);
|
||||
e = _mm256_subs_epu16(e, e_del);
|
||||
t = _mm256_subs_epu16(h, oe_del);
|
||||
e = _mm256_max_epi16(e, t);
|
||||
_mm256_store_si256(E + j, e);
|
||||
f = _mm256_subs_epu16(f, e_ins);
|
||||
t = _mm256_subs_epu16(h, oe_ins);
|
||||
f = _mm256_max_epi16(f, t);
|
||||
h = _mm256_load_si256(H0 + j);
|
||||
}
|
||||
|
||||
for (k = 0; LIKELY(k < 16); ++k) {
|
||||
avx2_slli_i16(f, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm256_load_si256(H1 + j);
|
||||
h = _mm256_max_epi16(h, f);
|
||||
_mm256_store_si256(H1 + j, h);
|
||||
h = _mm256_subs_epu16(h, oe_ins);
|
||||
f = _mm256_subs_epu16(f, e_ins);
|
||||
if (UNLIKELY(allzero_16(_mm256_cmpgt_epi16(f, h))))
|
||||
goto end_loop16;
|
||||
}
|
||||
}
|
||||
end_loop16:
|
||||
__max_16(imax, max);
|
||||
if (imax >= minsc) {
|
||||
if (n_b == 0 || (int32_t)b[n_b - 1] + 1 != i) {
|
||||
if (n_b == m_b) {
|
||||
m_b = m_b ? m_b << 1 : 8;
|
||||
b = (uint64_t *)realloc(b, 8 * m_b);
|
||||
}
|
||||
b[n_b++] = (uint64_t)imax << 32 | i;
|
||||
} else if ((int)(b[n_b - 1] >> 32) < imax)
|
||||
b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last
|
||||
}
|
||||
if (imax > gmax) {
|
||||
gmax = imax;
|
||||
te = i;
|
||||
for (j = 0; LIKELY(j < slen); ++j) _mm256_store_si256(Hmax + j, _mm256_load_si256(H1 + j));
|
||||
if (gmax >= endsc)
|
||||
break;
|
||||
}
|
||||
S = H1;
|
||||
H1 = H0;
|
||||
H0 = S;
|
||||
}
|
||||
r.score = gmax;
|
||||
r.te = te;
|
||||
{
|
||||
int max = -1, tmp, low, high, qlen = slen * 16;
|
||||
uint16_t *t = (uint16_t *)Hmax;
|
||||
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max)
|
||||
max = *t, r.qe = i / 16 + i % 16 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe)
|
||||
r.qe = tmp;
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
low = te - i;
|
||||
high = te + i;
|
||||
for (i = 0; i < n_b; ++i) {
|
||||
int e = (int32_t)b[i];
|
||||
if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2)
|
||||
r.score2 = b[i] >> 32, r.te2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
return r;
|
||||
}
|
||||
154
align_avx2_u8.c
154
align_avx2_u8.c
|
|
@ -0,0 +1,154 @@
|
|||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "align.h"
|
||||
#include "utils.h"
|
||||
|
||||
kswr_t align_avx2_u8(byte_mem_t *bmem, kswq_avx2_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins,
|
||||
int _e_ins, int xtra) // the first gap costs -(_o+_e)
|
||||
{
|
||||
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||
uint64_t *b;
|
||||
__m256i zero, oe_del, e_del, oe_ins, e_ins, shift, *H0, *H1, *E, *Hmax;
|
||||
kswr_t r;
|
||||
|
||||
#define __max_32(ret, xx) \
|
||||
do { \
|
||||
uint8_t *maxVal = (uint8_t *)&(xx); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 8)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 4)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 2)); \
|
||||
(xx) = _mm256_max_epu8((xx), _mm256_srli_si256((xx), 1)); \
|
||||
(ret) = MAX(maxVal[0], maxVal[16]); \
|
||||
} while (0)
|
||||
|
||||
// Given entries with arbitrary values, return whether they are all 0x00
|
||||
#define allzero_32(xx) (_mm256_movemask_epi8(_mm256_cmpeq_epi8((xx), zero)) == 0xffffffff)
|
||||
|
||||
#define avx2_slli_wrap(xx, imm) \
|
||||
do { \
|
||||
uint8_t *arr = (uint8_t *)&(xx); \
|
||||
uint8_t val = arr[15]; \
|
||||
(xx) = _mm256_slli_si256((xx), (imm)); \
|
||||
arr[16] = val; \
|
||||
} while (0)
|
||||
|
||||
// initialization
|
||||
r = g_defr;
|
||||
minsc = (xtra & KSW_XSUBO) ? xtra & 0xffff : 0x10000;
|
||||
endsc = (xtra & KSW_XSTOP) ? xtra & 0xffff : 0x10000;
|
||||
m_b = n_b = 0;
|
||||
b = 0;
|
||||
zero = _mm256_set1_epi32(0);
|
||||
oe_del = _mm256_set1_epi8(_o_del + _e_del);
|
||||
e_del = _mm256_set1_epi8(_e_del);
|
||||
oe_ins = _mm256_set1_epi8(_o_ins + _e_ins);
|
||||
e_ins = _mm256_set1_epi8(_e_ins);
|
||||
shift = _mm256_set1_epi8(q->shift);
|
||||
H0 = q->H0;
|
||||
H1 = q->H1;
|
||||
E = q->E;
|
||||
Hmax = q->Hmax;
|
||||
slen = q->slen;
|
||||
for (i = 0; i < slen; ++i) {
|
||||
_mm256_store_si256(E + i, zero);
|
||||
_mm256_store_si256(H0 + i, zero);
|
||||
_mm256_store_si256(Hmax + i, zero);
|
||||
}
|
||||
// the core loop
|
||||
for (i = 0; i < tlen; ++i) {
|
||||
int j, k, imax;
|
||||
__m256i e, h, t, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||
h = _mm256_load_si256(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||
avx2_slli_wrap(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
/* SW cells are computed in the following order:
|
||||
* H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||
* E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
|
||||
* F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
|
||||
*/
|
||||
// compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
|
||||
h = _mm256_adds_epu8(h, _mm256_load_si256(S + j));
|
||||
h = _mm256_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j)
|
||||
e = _mm256_load_si256(E + j); // e=E'(i,j)
|
||||
h = _mm256_max_epu8(h, e);
|
||||
h = _mm256_max_epu8(h, f); // h=H'(i,j)
|
||||
max = _mm256_max_epu8(max, h); // set max
|
||||
_mm256_store_si256(H1 + j, h); // save to H'(i,j)
|
||||
// now compute E'(i+1,j)
|
||||
e = _mm256_subs_epu8(e, e_del); // e=E'(i,j) - e_del
|
||||
t = _mm256_subs_epu8(h, oe_del); // h=H'(i,j) - o_del - e_del
|
||||
e = _mm256_max_epu8(e, t); // e=E'(i+1,j)
|
||||
_mm256_store_si256(E + j, e); // save to E'(i+1,j)
|
||||
// now compute F'(i,j+1)
|
||||
f = _mm256_subs_epu8(f, e_ins);
|
||||
t = _mm256_subs_epu8(h, oe_ins); // h=H'(i,j) - o_ins - e_ins
|
||||
f = _mm256_max_epu8(f, t);
|
||||
// get H'(i-1,j) and prepare for the next j
|
||||
h = _mm256_load_si256(H0 + j); // h=H'(i-1,j)
|
||||
}
|
||||
// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
|
||||
for (k = 0; LIKELY(k < 32); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
|
||||
avx2_slli_wrap(f, 1);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm256_load_si256(H1 + j);
|
||||
h = _mm256_max_epu8(h, f); // h=H'(i,j)
|
||||
_mm256_store_si256(H1 + j, h);
|
||||
h = _mm256_subs_epu8(h, oe_ins);
|
||||
f = _mm256_subs_epu8(f, e_ins);
|
||||
if (UNLIKELY(allzero_32(_mm256_subs_epu8(f, h))))
|
||||
goto end_loop32;
|
||||
}
|
||||
}
|
||||
end_loop32:
|
||||
__max_32(imax, max); // imax is the maximum number in max
|
||||
if (imax >= minsc) { // write the b array; this condition adds branching unfornately
|
||||
if (n_b == 0 ||
|
||||
(int32_t)b[n_b - 1] + 1 !=
|
||||
i) { // then append 新的max用新的数值记录,用来记录次优分值,不能与最优分值相邻(否则就是连续匹配的)
|
||||
if (n_b == m_b) {
|
||||
m_b = m_b ? m_b << 1 : 8;
|
||||
b = (uint64_t *)realloc(b, 8 * m_b);
|
||||
}
|
||||
b[n_b++] = (uint64_t)imax << 32 | i;
|
||||
} else if ((int)(b[n_b - 1] >> 32) < imax)
|
||||
b[n_b - 1] = (uint64_t)imax << 32 | i; // modify the last
|
||||
}
|
||||
if (imax > gmax) {
|
||||
gmax = imax;
|
||||
te = i; // te is the end position on the target
|
||||
for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
|
||||
_mm256_store_si256(Hmax + j, _mm256_load_si256(H1 + j));
|
||||
if (gmax + q->shift >= 255 || gmax >= endsc)
|
||||
break; // 最大分值溢出了,结束运算
|
||||
}
|
||||
S = H1;
|
||||
H1 = H0;
|
||||
H0 = S; // swap H0 and H1
|
||||
}
|
||||
// fprintf(gf[0], "%d\n", i);
|
||||
r.score = gmax + q->shift < 255 ? gmax : 255;
|
||||
r.te = te;
|
||||
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
||||
int max = -1, tmp, low, high, qlen = slen * 32;
|
||||
uint8_t *t = (uint8_t *)Hmax;
|
||||
for (i = 0; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max)
|
||||
max = *t, r.qe = i / 32 + i % 32 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 32 + i % 32 * slen) < r.qe)
|
||||
r.qe = tmp;
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
low = te - i;
|
||||
high = te + i;
|
||||
for (i = 0; i < n_b; ++i) {
|
||||
int e = (int32_t)b[i];
|
||||
if ((e < low || e > high) && (int)(b[i] >> 32) > r.score2)
|
||||
r.score2 = b[i] >> 32, r.te2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
return r;
|
||||
}
|
||||
134
align_sse_i16.c
134
align_sse_i16.c
|
|
@ -0,0 +1,134 @@
|
|||
/*********************************************************************************************
|
||||
Description: sw align functions in bwa-mem
|
||||
|
||||
Copyright : All right reserved by NCIC.ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2024/04/11
|
||||
***********************************************************************************************/
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "utils.h"
|
||||
#include "align.h"
|
||||
|
||||
kswr_t align_sse_i16(byte_mem_t *bmem, kswq_sse_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, int _e_ins, int xtra) // the first gap costs -(_o+_e)
|
||||
{
|
||||
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||
uint64_t *b;
|
||||
__m128i zero, oe_del, e_del, oe_ins, e_ins, *H0, *H1, *E, *Hmax;
|
||||
kswr_t r;
|
||||
|
||||
#if defined __SSE2__
|
||||
#define __max_8(ret, xx) do { \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 8)); \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 4)); \
|
||||
(xx) = _mm_max_epi16((xx), _mm_srli_si128((xx), 2)); \
|
||||
(ret) = _mm_extract_epi16((xx), 0); \
|
||||
} while (0)
|
||||
|
||||
// Given entries all either 0x0000 or 0xffff, return whether they are all 0x0000
|
||||
#define allzero_0f_8(xx) (!_mm_movemask_epi8((xx)))
|
||||
|
||||
#elif defined __ARM_NEON
|
||||
#define __max_8(ret, xx) (ret) = vmaxvq_s16(vreinterpretq_s16_u8((xx)))
|
||||
#define allzero_0f_8(xx) (vmaxvq_u16(vreinterpretq_u16_u8((xx))) == 0)
|
||||
|
||||
#else
|
||||
#define __max_8(ret, xx) (ret) = m128i_max_s16((xx))
|
||||
#define allzero_0f_8(xx) (m128i_allzero((xx)))
|
||||
#endif
|
||||
|
||||
// initialization
|
||||
r = g_defr;
|
||||
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
|
||||
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
|
||||
m_b = n_b = 0; b = 0;
|
||||
zero = _mm_set1_epi32(0);
|
||||
oe_del = _mm_set1_epi16(_o_del + _e_del);
|
||||
e_del = _mm_set1_epi16(_e_del);
|
||||
oe_ins = _mm_set1_epi16(_o_ins + _e_ins);
|
||||
e_ins = _mm_set1_epi16(_e_ins);
|
||||
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
|
||||
slen = q->slen;
|
||||
for (i = 0; i < slen; ++i) {
|
||||
_mm_store_si128(E + i, zero);
|
||||
_mm_store_si128(H0 + i, zero);
|
||||
_mm_store_si128(Hmax + i, zero);
|
||||
}
|
||||
// the core loop
|
||||
for (i = 0; i < tlen; ++i) {
|
||||
int j, k, imax;
|
||||
__m128i e, t, h, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||
h = _mm_slli_si128(h, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm_adds_epi16(h, _mm_load_si128(S++));
|
||||
e = _mm_load_si128(E + j);
|
||||
h = _mm_max_epi16(h, e);
|
||||
h = _mm_max_epi16(h, f);
|
||||
max = _mm_max_epi16(max, h);
|
||||
_mm_store_si128(H1 + j, h);
|
||||
e = _mm_subs_epu16(e, e_del);
|
||||
t = _mm_subs_epu16(h, oe_del);
|
||||
e = _mm_max_epi16(e, t);
|
||||
_mm_store_si128(E + j, e);
|
||||
f = _mm_subs_epu16(f, e_ins);
|
||||
t = _mm_subs_epu16(h, oe_ins);
|
||||
f = _mm_max_epi16(f, t);
|
||||
h = _mm_load_si128(H0 + j);
|
||||
}
|
||||
for (k = 0; LIKELY(k < 16); ++k) {
|
||||
f = _mm_slli_si128(f, 2);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm_load_si128(H1 + j);
|
||||
h = _mm_max_epi16(h, f);
|
||||
_mm_store_si128(H1 + j, h);
|
||||
h = _mm_subs_epu16(h, oe_ins);
|
||||
f = _mm_subs_epu16(f, e_ins);
|
||||
if(UNLIKELY(allzero_0f_8(_mm_cmpgt_epi16(f, h)))) goto end_loop8;
|
||||
}
|
||||
}
|
||||
end_loop8:
|
||||
__max_8(imax, max);
|
||||
//fprintf(stderr, "%d\n", imax);
|
||||
if (imax >= minsc) {
|
||||
//fprintf(stderr, "%d\n", imax);
|
||||
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) {
|
||||
if (n_b == m_b) {
|
||||
m_b = m_b? m_b<<1 : 8;
|
||||
b = (uint64_t*)realloc(b, 8 * m_b);
|
||||
// b = (uint64_t *)byte_mem_request(bmem, 8 * m_b);
|
||||
}
|
||||
b[n_b++] = (uint64_t)imax<<32 | i;
|
||||
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
|
||||
}
|
||||
if (imax > gmax) {
|
||||
gmax = imax; te = i;
|
||||
for (j = 0; LIKELY(j < slen); ++j)
|
||||
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
|
||||
if (gmax >= endsc) break;
|
||||
}
|
||||
S = H1; H1 = H0; H0 = S;
|
||||
|
||||
}
|
||||
r.score = gmax; r.te = te;
|
||||
{
|
||||
int max = -1, tmp, low, high, qlen = slen * 8;
|
||||
uint16_t *t = (uint16_t*)Hmax;
|
||||
for (i = 0, r.qe = -1; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max) max = *t, r.qe = i / 8 + i % 8 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 8 + i % 8 * slen) < r.qe) r.qe = tmp;
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
low = te - i; high = te + i;
|
||||
for (i = 0; i < n_b; ++i) {
|
||||
int e = (int32_t)b[i];
|
||||
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
|
||||
r.score2 = b[i]>>32, r.te2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
// byte_mem_clear(bmem);
|
||||
return r;
|
||||
}
|
||||
149
align_sse_u8.c
149
align_sse_u8.c
|
|
@ -0,0 +1,149 @@
|
|||
/*********************************************************************************************
|
||||
Description: sw align functions in bwa-mem
|
||||
|
||||
Copyright : All right reserved by NCIC.ICT
|
||||
|
||||
Author : Zhang Zhonghai
|
||||
Date : 2024/04/11
|
||||
***********************************************************************************************/
|
||||
#include <stdlib.h>
|
||||
#include <stdint.h>
|
||||
#include "utils.h"
|
||||
#include "align.h"
|
||||
|
||||
kswr_t align_sse_u8(byte_mem_t *bmem, kswq_sse_t *q, int tlen, const uint8_t *target, int _o_del, int _e_del, int _o_ins, int _e_ins, int xtra) // the first gap costs -(_o+_e)
|
||||
{
|
||||
int slen, i, m_b, n_b, te = -1, gmax = 0, minsc, endsc;
|
||||
uint64_t *b;
|
||||
__m128i zero, oe_del, e_del, oe_ins, e_ins, shift, *H0, *H1, *E, *Hmax;
|
||||
kswr_t r;
|
||||
|
||||
#if defined __SSE2__
|
||||
#define __max_16(ret, xx) do { \
|
||||
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 8)); \
|
||||
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 4)); \
|
||||
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 2)); \
|
||||
(xx) = _mm_max_epu8((xx), _mm_srli_si128((xx), 1)); \
|
||||
(ret) = _mm_extract_epi16((xx), 0) & 0x00ff; \
|
||||
} while (0)
|
||||
|
||||
// Given entries with arbitrary values, return whether they are all 0x00
|
||||
#define allzero_16(xx) (_mm_movemask_epi8(_mm_cmpeq_epi8((xx), zero)) == 0xffff)
|
||||
|
||||
#elif defined __ARM_NEON
|
||||
#define __max_16(ret, xx) (ret) = vmaxvq_u8((xx))
|
||||
#define allzero_16(xx) (vmaxvq_u8((xx)) == 0)
|
||||
|
||||
#else
|
||||
#define __max_16(ret, xx) (ret) = m128i_max_u8((xx))
|
||||
#define allzero_16(xx) (m128i_allzero((xx)))
|
||||
#endif
|
||||
|
||||
// initialization
|
||||
r = g_defr;
|
||||
minsc = (xtra&KSW_XSUBO)? xtra&0xffff : 0x10000;
|
||||
endsc = (xtra&KSW_XSTOP)? xtra&0xffff : 0x10000;
|
||||
m_b = n_b = 0; b = 0;
|
||||
zero = _mm_set1_epi32(0);
|
||||
oe_del = _mm_set1_epi8(_o_del + _e_del);
|
||||
e_del = _mm_set1_epi8(_e_del);
|
||||
oe_ins = _mm_set1_epi8(_o_ins + _e_ins);
|
||||
e_ins = _mm_set1_epi8(_e_ins);
|
||||
shift = _mm_set1_epi8(q->shift);
|
||||
H0 = q->H0; H1 = q->H1; E = q->E; Hmax = q->Hmax;
|
||||
slen = q->slen;
|
||||
for (i = 0; i < slen; ++i) {
|
||||
_mm_store_si128(E + i, zero);
|
||||
_mm_store_si128(H0 + i, zero);
|
||||
_mm_store_si128(Hmax + i, zero);
|
||||
}
|
||||
// the core loop
|
||||
for (i = 0; i < tlen; ++i) {
|
||||
int j, k, imax;
|
||||
__m128i e, h, t, f = zero, max = zero, *S = q->qp + target[i] * slen; // s is the 1st score vector
|
||||
h = _mm_load_si128(H0 + slen - 1); // h={2,5,8,11,14,17,-1,-1} in the above example
|
||||
h = _mm_slli_si128(h, 1); // h=H(i-1,-1); << instead of >> because x64 is little-endian
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
/* SW cells are computed in the following order:
|
||||
* H(i,j) = max{H(i-1,j-1)+S(i,j), E(i,j), F(i,j)}
|
||||
* E(i+1,j) = max{H(i,j)-q, E(i,j)-r}
|
||||
* F(i,j+1) = max{H(i,j)-q, F(i,j)-r}
|
||||
*/
|
||||
// compute H'(i,j); note that at the beginning, h=H'(i-1,j-1)
|
||||
h = _mm_adds_epu8(h, _mm_load_si128(S + j));
|
||||
h = _mm_subs_epu8(h, shift); // h=H'(i-1,j-1)+S(i,j)
|
||||
e = _mm_load_si128(E + j); // e=E'(i,j)
|
||||
h = _mm_max_epu8(h, e);
|
||||
h = _mm_max_epu8(h, f); // h=H'(i,j)
|
||||
max = _mm_max_epu8(max, h); // set max
|
||||
_mm_store_si128(H1 + j, h); // save to H'(i,j)
|
||||
// now compute E'(i+1,j)
|
||||
e = _mm_subs_epu8(e, e_del); // e=E'(i,j) - e_del
|
||||
t = _mm_subs_epu8(h, oe_del); // h=H'(i,j) - o_del - e_del
|
||||
e = _mm_max_epu8(e, t); // e=E'(i+1,j)
|
||||
_mm_store_si128(E + j, e); // save to E'(i+1,j)
|
||||
// now compute F'(i,j+1)
|
||||
f = _mm_subs_epu8(f, e_ins);
|
||||
t = _mm_subs_epu8(h, oe_ins); // h=H'(i,j) - o_ins - e_ins
|
||||
f = _mm_max_epu8(f, t);
|
||||
// get H'(i-1,j) and prepare for the next j
|
||||
h = _mm_load_si128(H0 + j); // h=H'(i-1,j)
|
||||
}
|
||||
// NB: we do not need to set E(i,j) as we disallow adjecent insertion and then deletion
|
||||
for (k = 0; LIKELY(k < 16); ++k) { // this block mimics SWPS3; NB: H(i,j) updated in the lazy-F loop cannot exceed max
|
||||
f = _mm_slli_si128(f, 1);
|
||||
for (j = 0; LIKELY(j < slen); ++j) {
|
||||
h = _mm_load_si128(H1 + j);
|
||||
h = _mm_max_epu8(h, f); // h=H'(i,j)
|
||||
_mm_store_si128(H1 + j, h);
|
||||
h = _mm_subs_epu8(h, oe_ins);
|
||||
f = _mm_subs_epu8(f, e_ins);
|
||||
if (UNLIKELY(allzero_16(_mm_subs_epu8(f, h)))) goto end_loop16;
|
||||
}
|
||||
}
|
||||
end_loop16:
|
||||
//int k;for (k=0;k<16;++k)printf("%d ", ((uint8_t*)&max)[k]);printf("\n");
|
||||
__max_16(imax, max); // imax is the maximum number in max
|
||||
//fprintf(stderr, "%d\n", imax);
|
||||
if (imax >= minsc) { // write the b array; this condition adds branching unfornately
|
||||
//fprintf(stderr, "%d\n", imax);
|
||||
if (n_b == 0 || (int32_t)b[n_b-1] + 1 != i) { // then append
|
||||
if (n_b == m_b) {
|
||||
m_b = m_b? m_b<<1 : 8;
|
||||
b = (uint64_t*)realloc(b, 8 * m_b);
|
||||
//b = (uint64_t *)byte_mem_request(bmem, 8 * m_b);
|
||||
}
|
||||
b[n_b++] = (uint64_t)imax<<32 | i;
|
||||
} else if ((int)(b[n_b-1]>>32) < imax) b[n_b-1] = (uint64_t)imax<<32 | i; // modify the last
|
||||
}
|
||||
if (imax > gmax) {
|
||||
gmax = imax; te = i; // te is the end position on the target
|
||||
for (j = 0; LIKELY(j < slen); ++j) // keep the H1 vector
|
||||
_mm_store_si128(Hmax + j, _mm_load_si128(H1 + j));
|
||||
if (gmax + q->shift >= 255 || gmax >= endsc) break;
|
||||
}
|
||||
S = H1; H1 = H0; H0 = S; // swap H0 and H1
|
||||
}
|
||||
r.score = gmax + q->shift < 255? gmax : 255;
|
||||
r.te = te;
|
||||
if (r.score != 255) { // get a->qe, the end of query match; find the 2nd best score
|
||||
int max = -1, tmp, low, high, qlen = slen * 16;
|
||||
uint8_t *t = (uint8_t*)Hmax;
|
||||
for (i = 0; i < qlen; ++i, ++t)
|
||||
if ((int)*t > max) max = *t, r.qe = i / 16 + i % 16 * slen;
|
||||
else if ((int)*t == max && (tmp = i / 16 + i % 16 * slen) < r.qe) r.qe = tmp;
|
||||
//printf("%d,%d\n", max, gmax);
|
||||
if (b) {
|
||||
i = (r.score + q->max - 1) / q->max;
|
||||
low = te - i; high = te + i;
|
||||
for (i = 0; i < n_b; ++i) {
|
||||
int e = (int32_t)b[i];
|
||||
if ((e < low || e > high) && (int)(b[i]>>32) > r.score2)
|
||||
r.score2 = b[i]>>32, r.te2 = e;
|
||||
}
|
||||
}
|
||||
}
|
||||
free(b);
|
||||
//byte_mem_clear(bmem);
|
||||
return r;
|
||||
}
|
||||
14
byte_alloc.c
14
byte_alloc.c
|
|
@ -39,31 +39,33 @@ void byte_mem_init_alloc(byte_mem_t *bmem, size_t byte_cnt)
|
|||
void *byte_mem_request(byte_mem_t *bmem, size_t byte_cnt)
|
||||
{
|
||||
// fprintf(stderr, "capacity:%ld, occupied: %ld, byte_cnt: %ld\n", bmem->capacity, bmem->occupied, byte_cnt);
|
||||
void *ret_mem = 0;
|
||||
//void *ret_mem = 0;
|
||||
if (bmem == 0)
|
||||
{
|
||||
ret_mem = 0;
|
||||
// ret_mem = 0;
|
||||
return 0;
|
||||
}
|
||||
else if (bmem->capacity == 0)
|
||||
{
|
||||
bmem->capacity = byte_cnt;
|
||||
bmem->mem = malloc(bmem->capacity);
|
||||
bmem->occupied = byte_cnt;
|
||||
ret_mem = bmem->mem;
|
||||
//ret_mem = bmem->mem;
|
||||
}
|
||||
else if (bmem->capacity - bmem->occupied >= byte_cnt)
|
||||
{
|
||||
ret_mem = bmem->mem + bmem->occupied;
|
||||
//ret_mem = bmem->mem + bmem->occupied;
|
||||
bmem->occupied += byte_cnt;
|
||||
}
|
||||
else
|
||||
{
|
||||
bmem->capacity = bmem->occupied + byte_cnt;
|
||||
bmem->mem = realloc(bmem->mem, bmem->capacity);
|
||||
ret_mem = bmem->mem + bmem->occupied;
|
||||
//ret_mem = bmem->mem + bmem->occupied;
|
||||
bmem->occupied += byte_cnt;
|
||||
}
|
||||
return ret_mem;
|
||||
// return ret_mem;
|
||||
return bmem->mem;
|
||||
}
|
||||
|
||||
void *byte_mem_request_and_clean(byte_mem_t *bmem, size_t byte_cnt)
|
||||
|
|
|
|||
18
extend.c
18
extend.c
|
|
@ -28,7 +28,7 @@
|
|||
kv_A(kv_A(i_arr, i), 0), kv_A(q_arr, i).a, \
|
||||
kv_A(kv_A(i_arr, i), 1), kv_A(t_arr, i).a, \
|
||||
5, mat, 6, 1, 6, 1, 100, 5, 100, \
|
||||
kv_A(kv_A(i_arr, i), 2), \
|
||||
/*kv_A(kv_A(i_arr, i), 2)*/10, \
|
||||
&qle, &tle, >le, &gscore, &max_off[0]); \
|
||||
score_total[kernel_id] += score; \
|
||||
} \
|
||||
|
|
@ -60,10 +60,11 @@ int main_extend(int argc, char *argv[])
|
|||
seq_v t_arr = {0};
|
||||
qti_v i_arr = {0};
|
||||
uint64_t score_total[EXTEND_FUNC_NUM] = {0};
|
||||
const int kmax_row = 3000000;
|
||||
|
||||
int query_read_row = read_seq(&q_arr, &read_buf, 3000000, qfp);
|
||||
int target_read_row = read_seq(&t_arr, &read_buf, 3000000, tfp);
|
||||
int info_read_row = read_qt_info(&i_arr, &read_buf, 3000000, 3, ifp);
|
||||
int query_read_row = read_seq(&q_arr, &read_buf, kmax_row, qfp);
|
||||
int target_read_row = read_seq(&t_arr, &read_buf, kmax_row, tfp);
|
||||
int info_read_row = read_qt_info(&i_arr, &read_buf, kmax_row, 3, ifp);
|
||||
// fprintf(stderr, "read row: %d\t%d\t%d\n", query_read_row, target_read_row, info_read_row);
|
||||
|
||||
int8_t mat[25] = {1, -4, -4, -4, -1,
|
||||
|
|
@ -84,9 +85,14 @@ int main_extend(int argc, char *argv[])
|
|||
EXTEND_PERFORMANCE_TEST(2, extend_avx2_u8, 0, excute_lines);
|
||||
EXTEND_PERFORMANCE_TEST(3, extend_avx2_i16_sp, 0, excute_lines);
|
||||
int i = 0; for(; i<ARRAY_SIZE(kernel_prof_idx); ++i) { gdata[kernel_prof_idx[i]] = score_total[i]; }
|
||||
display_extend_stats();
|
||||
|
||||
close_files();
|
||||
#ifdef SHOW_PERF
|
||||
fprintf(stderr, "[extend scalar ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_SCALAR] / TIME_DIVIDE_BY, gdata[G_EXT_SCALAR]);
|
||||
fprintf(stderr, "[extend avx i16] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16]);
|
||||
fprintf(stderr, "[extend avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_U8]);
|
||||
fprintf(stderr, "[extend avx sp ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16_SP] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16_SP]);
|
||||
#endif
|
||||
// close_files();
|
||||
fclose(qfp);
|
||||
fclose(tfp);
|
||||
fclose(ifp);
|
||||
|
|
|
|||
|
|
@ -23,19 +23,11 @@
|
|||
#define ARR_FOR_EQUAL_CHECK 1
|
||||
#define ELIMINATE_DIFF_3 0
|
||||
|
||||
#define PRUNING 1
|
||||
#define PRUNING 0
|
||||
#define REVERSE_TARGET 0
|
||||
|
||||
#define NO_VAL -1
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x),1)
|
||||
#define UNLIKELY(x) __builtin_expect((x),0)
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
static int extend_scalar_inner(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del,
|
||||
|
|
@ -186,27 +178,6 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
|||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||
|
||||
static void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||
{
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||
FILE *query_f = gfq[fnum],
|
||||
*target_f = gft[fnum],
|
||||
*info_f = gfi[fnum];
|
||||
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||
int i;
|
||||
// 处理query
|
||||
for (i = 0; i < qlen; ++i)
|
||||
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||
fprintf(query_f, "\n");
|
||||
// 处理target
|
||||
for (i = 0; i < tlen; ++i)
|
||||
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||
fprintf(target_f, "\n");
|
||||
// 处理其他信息
|
||||
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||
#endif
|
||||
}
|
||||
|
||||
int extend_avx2_i16(byte_mem_t *bmem,
|
||||
int qlen, // query length 待匹配段碱基的query长度
|
||||
|
|
|
|||
|
|
@ -27,14 +27,6 @@
|
|||
|
||||
#define NO_VAL -1
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x),1)
|
||||
#define UNLIKELY(x) __builtin_expect((x),0)
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define SIMD_WIDTH 16
|
||||
|
||||
static int extend_scalar_inner(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int m, const int8_t *mat, int o_del, int e_del,
|
||||
|
|
@ -175,28 +167,6 @@ static const uint16_t h_vec_int_mask[SIMD_WIDTH][SIMD_WIDTH] = {
|
|||
tmp = fA1; fA1 = fA2; fA2 = tmp; \
|
||||
tmp = mA1; mA1 = mA2; mA2 = tmp;
|
||||
|
||||
static void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||
{
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||
FILE *query_f = gfq[fnum],
|
||||
*target_f = gft[fnum],
|
||||
*info_f = gfi[fnum];
|
||||
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||
int i;
|
||||
// 处理query
|
||||
for (i = 0; i < qlen; ++i)
|
||||
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||
fprintf(query_f, "\n");
|
||||
// 处理target
|
||||
for (i = 0; i < tlen; ++i)
|
||||
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||
fprintf(target_f, "\n");
|
||||
// 处理其他信息
|
||||
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||
#endif
|
||||
}
|
||||
|
||||
int extend_avx2_i16_sp(byte_mem_t *bmem,
|
||||
int qlen, // query length 待匹配段碱基的query长度
|
||||
const uint8_t *query, // read碱基序列
|
||||
|
|
|
|||
|
|
@ -17,18 +17,10 @@
|
|||
#include "byte_alloc.h"
|
||||
#include "utils.h"
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x), 1)
|
||||
#define UNLIKELY(x) __builtin_expect((x), 0)
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define KSW_EQUAL
|
||||
#define ARR_FOR_EQUAL_CHECK 1
|
||||
|
||||
#define PRUNING 1
|
||||
#define PRUNING 0
|
||||
#define REVERSE_TARGET 0
|
||||
|
||||
#define SIMD_WIDTH 32
|
||||
|
|
@ -429,23 +421,23 @@ int extend_avx2_u8(byte_mem_t *bmem,
|
|||
#if ARR_FOR_EQUAL_CHECK
|
||||
if (hA1[0] < b && checkspecial)
|
||||
{
|
||||
int mi;
|
||||
int mii;
|
||||
if (hA1[0] == b - 1)
|
||||
{
|
||||
icheck = iend + 1;
|
||||
}
|
||||
for (mi = 0; mi < b - 1; ++mi)
|
||||
for (mii = 0; mii < b - 1; ++mii)
|
||||
{
|
||||
if (midx - mi > 0)
|
||||
marr[mi] = MAX(marr[mi], hA2[midx - mi]);
|
||||
if (midx - mii > 0)
|
||||
marr[mii] = MAX(marr[mii], hA2[midx - mii]);
|
||||
}
|
||||
midx += 1;
|
||||
if (ibeg > icheck)
|
||||
{
|
||||
int stopCalc = 0;
|
||||
for (mi = 0; mi < b - 1; ++mi)
|
||||
for (mii = 0; mii < b - 1; ++mii)
|
||||
{
|
||||
stopCalc |= !marr[mi];
|
||||
stopCalc |= !marr[mii];
|
||||
}
|
||||
if (stopCalc)
|
||||
break;
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define PRUNING 1
|
||||
#define PRUNING 0
|
||||
|
||||
typedef struct
|
||||
{
|
||||
|
|
@ -37,8 +37,11 @@ int extend_scalar(byte_mem_t *bmem, int qlen, const uint8_t *query, int tlen, co
|
|||
assert(h0 > 0);
|
||||
// qp = malloc(qlen * m);
|
||||
// eh = calloc(qlen + 1, 8);
|
||||
qp = byte_mem_request(bmem, qlen * m);
|
||||
eh = byte_mem_request_and_clean(bmem, (qlen + 1) * 8);
|
||||
int qp_size = qlen * m;
|
||||
int eh_size = (qlen + 1) * 8;
|
||||
qp = byte_mem_request(bmem, qp_size + eh_size);
|
||||
eh = (eh_t*)((uint8_t*)qp + qp_size);
|
||||
memset(eh, 0, eh_size);
|
||||
// generate the query profile
|
||||
for (k = i = 0; k < m; ++k)
|
||||
{
|
||||
|
|
|
|||
13
main.c
13
main.c
|
|
@ -15,19 +15,24 @@
|
|||
#include <sys/time.h>
|
||||
#include "byte_alloc.h"
|
||||
#include "utils.h"
|
||||
#include "extend.h"
|
||||
#include "profiling.h"
|
||||
#include "extend.h"
|
||||
#include "align.h"
|
||||
|
||||
|
||||
#define TEST_EXTEND
|
||||
#define TEST_EXTEND 0
|
||||
#define TEST_ALIGN 1
|
||||
|
||||
// 程序执行入口
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
|
||||
#ifdef TEST_EXTEND
|
||||
#if TEST_EXTEND
|
||||
main_extend(argc - 1, argv + 1);
|
||||
#endif
|
||||
|
||||
#if TEST_ALIGN
|
||||
main_align(argc - 1, argv + 1);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
13
profiling.c
13
profiling.c
|
|
@ -28,17 +28,4 @@ uint64_t get_msec()
|
|||
// gettimeofday(&tv, NULL);
|
||||
// return (uint64_t)1000 * (tv.tv_sec + ((1e-6) * tv.tv_usec));
|
||||
return clock();
|
||||
}
|
||||
|
||||
/* show excution time */
|
||||
int display_extend_stats()
|
||||
{
|
||||
#ifdef SHOW_PERF
|
||||
fprintf(stderr, "[extend scalar ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_SCALAR] / TIME_DIVIDE_BY, gdata[G_EXT_SCALAR]);
|
||||
fprintf(stderr, "[extend avx i16] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16]);
|
||||
fprintf(stderr, "[extend avx u8 ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_U8] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_U8]);
|
||||
fprintf(stderr, "[extend avx sp ] time: %9.6lf s; score: %ld\n", gprof[G_EXT_AVX2_I16_SP] / TIME_DIVIDE_BY, gdata[G_EXT_AVX2_I16_SP]);
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
@ -8,6 +8,7 @@
|
|||
#ifndef __PROFILING_H
|
||||
#define __PROFILING_H
|
||||
#include <stdio.h>
|
||||
#include <time.h>
|
||||
|
||||
#define TIME_DIVIDE_BY (CLOCKS_PER_SEC * 1.0)
|
||||
|
||||
|
|
@ -19,7 +20,7 @@ extern FILE *ins_f_arr[LIM_TYPE],
|
|||
*score_f_arr[LIM_TYPE],
|
||||
*retval_f_arr[LIM_TYPE];
|
||||
|
||||
// GLOBAL
|
||||
// GLOBAL performance info
|
||||
enum
|
||||
{
|
||||
G_ALL = 0,
|
||||
|
|
@ -27,6 +28,10 @@ enum
|
|||
G_EXT_AVX2_I16,
|
||||
G_EXT_AVX2_U8,
|
||||
G_EXT_AVX2_I16_SP,
|
||||
G_ALN_I16,
|
||||
G_ALN_U8,
|
||||
G_ALN_AVX2_I16,
|
||||
G_ALN_AVX2_U8
|
||||
};
|
||||
|
||||
#ifdef SHOW_PERF
|
||||
|
|
@ -39,6 +44,4 @@ enum
|
|||
|
||||
// get current milli seconds
|
||||
uint64_t get_msec();
|
||||
// show excution time
|
||||
int display_extend_stats();
|
||||
#endif
|
||||
14
run.sh
14
run.sh
|
|
@ -1,6 +1,12 @@
|
|||
#!/bin/bash
|
||||
#./sw_perf ./input/diff_ext/q0.txt ./input/diff_ext/t0.txt ./input/diff_ext/i0.txt
|
||||
./sw_perf ./input/ext/q_0.fa ./input/ext/t_0.fa ./input/ext/i_0.txt
|
||||
./sw_perf ./input/ext/q_1.fa ./input/ext/t_1.fa ./input/ext/i_1.txt
|
||||
./sw_perf ./input/ext/q_2.fa ./input/ext/t_2.fa ./input/ext/i_2.txt
|
||||
./sw_perf ./input/ext/q_3.fa ./input/ext/t_3.fa ./input/ext/i_3.txt
|
||||
#./sw_perf ./input/ext/q_0.fa ./input/ext/t_0.fa ./input/ext/i_0.txt
|
||||
#./sw_perf ./input/ext/q_1.fa ./input/ext/t_1.fa ./input/ext/i_1.txt
|
||||
#./sw_perf ./input/ext/q_2.fa ./input/ext/t_2.fa ./input/ext/i_2.txt
|
||||
#./sw_perf ./input/ext/q_3.fa ./input/ext/t_3.fa ./input/ext/i_3.txt
|
||||
|
||||
#./sw_perf ./input/align/q_0.fa ./input/align/t_0.fa ./input/align/i_0.txt
|
||||
./sw_perf ./input/align/q_0.fa ./input/align/t_0.fa ./input/align/i_0_ext.txt
|
||||
|
||||
# debug
|
||||
#./sw_perf ./input/diff_align/q0.txt ./input/diff_align/t0.txt ./input/diff_align/i0.txt
|
||||
|
|
|
|||
26
utils.c
26
utils.c
|
|
@ -12,6 +12,7 @@
|
|||
#include <string.h>
|
||||
|
||||
#include "utils.h"
|
||||
#include "debug.h"
|
||||
|
||||
unsigned char nst_nt4_table[256] = {
|
||||
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
||||
|
|
@ -112,4 +113,29 @@ int read_seq(seq_v *seq_arr, buf_t *buf, int row_num_to_read, FILE *fp)
|
|||
++read_row_num;
|
||||
}
|
||||
return read_row_num;
|
||||
}
|
||||
|
||||
void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum)
|
||||
{
|
||||
#ifdef DEBUG_FILE_OUTPUT
|
||||
// 写到三个文件里,query.fa,target.fa,每行一个序列,info.txt,包含前缀得分h0,和长度信息qlen,tlen
|
||||
FILE *query_f = gfq[fnum],
|
||||
*target_f = gft[fnum],
|
||||
*info_f = gfi[fnum];
|
||||
const char seq_map[5] = {'A', 'C', 'G', 'T', 'N'};
|
||||
int i;
|
||||
// 处理query
|
||||
for (i = 0; i < qlen; ++i)
|
||||
fprintf(query_f, "%c", seq_map[query[i]]);
|
||||
fprintf(query_f, "\n");
|
||||
// 处理target
|
||||
for (i = 0; i < tlen; ++i)
|
||||
fprintf(target_f, "%c", seq_map[target[i]]);
|
||||
fprintf(target_f, "\n");
|
||||
// 处理其他信息
|
||||
if (h0 > 0)
|
||||
fprintf(info_f, "%-8d%-8d%-8d\n", qlen, tlen, h0);
|
||||
else
|
||||
fprintf(info_f, "%-8d%-8d\n", qlen, tlen);
|
||||
#endif
|
||||
}
|
||||
11
utils.h
11
utils.h
|
|
@ -10,6 +10,7 @@
|
|||
#define __UTILS_H
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
#include "kvec.h"
|
||||
|
||||
|
|
@ -18,6 +19,14 @@
|
|||
#define MAX(x, y) ((x) > (y) ? (x) : (y))
|
||||
#define MIN(x, y) ((x) < (y) ? (x) : (y))
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define LIKELY(x) __builtin_expect((x), 1)
|
||||
#define UNLIKELY(x) __builtin_expect((x), 0)
|
||||
#else
|
||||
#define LIKELY(x) (x)
|
||||
#define UNLIKELY(x) (x)
|
||||
#endif
|
||||
|
||||
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
|
||||
|
||||
#define BUF_SIZE 1048576
|
||||
|
|
@ -42,4 +51,6 @@ int read_qt_info(qti_v *qti_arr, buf_t *buf, int row_num_to_read, int info_num,
|
|||
|
||||
int read_seq(seq_v *seq_arr, buf_t *buf, int row_num_to_read, FILE *fp);
|
||||
|
||||
void write_query_target_sequence(int qlen, const uint8_t *query, int tlen, const uint8_t *target, int h0, int fnum);
|
||||
|
||||
#endif
|
||||
Loading…
Reference in New Issue