sw_perf/utils.c

115 lines
4.5 KiB
C

/*********************************************************************************************
Description: Some useful functions
Copyright : All right reserved by NCIC.ICT
Author : Zhang Zhonghai
Date : 2023/08/25
***********************************************************************************************/
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include "utils.h"
unsigned char nst_nt4_table[256] = {
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5 /*'-'*/, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4};
const char *BASE = "ACGTN";
// 将碱基字符转成2位编码
void base_char_to_int(char *str)
{
int i;
const int slen = strlen(str);
for (i = 0; i < slen; ++i)
{
str[i] = nst_nt4_table[(uint8_t)str[i]];
}
}
/* read row_num_to_read lines of query-target info */
/**
* info_num: numbers of int value in one line of info file (2: qlen, tlen or 3: qlen, tlen, init_score)
*/
int read_qt_info(qti_v *qti_arr, buf_t *buf, int row_num_to_read, int info_num, FILE *fp)
{
if (!qti_arr || !buf || !fp) return -1;
if (info_num !=2 && info_num != 3) return -1;
if (buf->m == 0) { buf->m = BUF_SIZE; buf->addr = malloc(buf->m); }
int read_row_num = 0;
while (row_num_to_read > 0 && fgets((char *)buf->addr, buf->m, fp) != NULL)
{
// if (strlen((char *)buf->addr) == 1) continue; // skip null line
if (qti_arr->m <= read_row_num) { // expend the seq array
int old_m = qti_arr->m;
kv_resize(qti_t, *qti_arr, old_m == 0 ? 128 : old_m * 2);
memset(&qti_arr->a[old_m], 0, (qti_arr->m - old_m) * sizeof(qti_t)); // set the newly allocated mem to zero
}
qti_t *qti = &kv_A(*qti_arr, qti_arr->n++); // get a new query-target info
if (qti->m < info_num) { kv_resize(int, *qti, info_num); }
if (info_num == 2) sscanf((char *)buf->addr, "%d %d", &qti->a[0], &qti->a[1]);
else sscanf((char *)buf->addr, "%d %d %d", &qti->a[0], &qti->a[1], &qti->a[2]);
// test output
// int i; for (i = 0; i < info_num; ++i) { fprintf(stderr, "%d\t", kv_A(*qti, i)); } fprintf(stderr, "\n");
--row_num_to_read;
++read_row_num;
}
return read_row_num;
}
/* read row_num_to_read lines of query or target sequence */
int read_seq(seq_v *seq_arr, buf_t *buf, int row_num_to_read, FILE *fp)
{
if (!seq_arr || !buf || !fp) return -1;
if (buf->m == 0) { buf->m = BUF_SIZE; buf->addr = malloc(buf->m); }
int read_row_num = 0;
while (row_num_to_read > 0 && fgets((char*)buf->addr, buf->m, fp) != NULL)
{
int base_size = strlen((char *)buf->addr);
// trim the '\n' char
if ((char)buf->addr[base_size - 1] == '\n') {
buf->addr[base_size - 1] = (uint8_t)'\0';
--base_size;
}
// if (base_size == 0) continue; // skip null line
// fprintf(stderr, "%d %s\n", base_size, (char *) buf->addr);
// test output
// fprintf(stderr, "%s\n", (char *)buf->addr);
if (seq_arr->m <= read_row_num) { // expend the seq array
int old_m = seq_arr->m;
kv_resize(seq_t, *seq_arr, old_m == 0 ? 128 : old_m * 2);
memset(&seq_arr->a[old_m], 0, (seq_arr->m - old_m) * sizeof(seq_t)); // set the newly allocated mem to zero
}
seq_t *seq = &kv_A(*seq_arr, seq_arr->n++); // get a new query-target sequence
if (seq->m < base_size) { kv_resize(uint8_t, *seq, base_size + 16); }
base_char_to_int((char*)buf->addr);
memcpy(seq->a, buf->addr, base_size);
seq->n = base_size;
// test output
// int i; for (i = 0; i < seq->n; ++i) fprintf(stderr, "%d", kv_A(*seq, i)); fprintf(stderr, "\n");
--row_num_to_read;
++read_row_num;
}
return read_row_num;
}