code refactoring
This commit is contained in:
parent
e613195e17
commit
67543f19a1
37
bwa.c
37
bwa.c
|
|
@ -94,3 +94,40 @@ ret_gen_cigar:
|
|||
return cigar;
|
||||
}
|
||||
|
||||
/*********************
|
||||
* Full index reader *
|
||||
*********************/
|
||||
|
||||
bwaidx_t *bwa_idx_load(const char *prefix, int which)
|
||||
{
|
||||
bwaidx_t *idx;
|
||||
idx = calloc(1, sizeof(bwaidx_t));
|
||||
if (which & BWA_IDX_BWT) {
|
||||
char *tmp;
|
||||
tmp = calloc(strlen(prefix) + 5, 1);
|
||||
strcat(strcpy(tmp, prefix), ".bwt"); // FM-index
|
||||
idx->bwt = bwt_restore_bwt(tmp);
|
||||
strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)
|
||||
bwt_restore_sa(tmp, idx->bwt);
|
||||
free(tmp);
|
||||
}
|
||||
if (which & BWA_IDX_BNS) {
|
||||
idx->bns = bns_restore(prefix);
|
||||
if (which & BWA_IDX_PAC) {
|
||||
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
|
||||
fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
|
||||
}
|
||||
fclose(idx->bns->fp_pac);
|
||||
idx->bns->fp_pac = 0;
|
||||
}
|
||||
return idx;
|
||||
}
|
||||
|
||||
void bwa_idx_destroy(bwaidx_t *idx)
|
||||
{
|
||||
if (idx == 0) return;
|
||||
if (idx->bwt) bwt_destroy(idx->bwt);
|
||||
if (idx->bns) bns_destroy(idx->bns);
|
||||
if (idx->pac) free(idx->pac);
|
||||
free(idx);
|
||||
}
|
||||
|
|
|
|||
16
bwa.h
16
bwa.h
|
|
@ -2,6 +2,19 @@
|
|||
#define BWA_H_
|
||||
|
||||
#include <stdint.h>
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
|
||||
#define BWA_IDX_BWT 0x1
|
||||
#define BWA_IDX_BNS 0x2
|
||||
#define BWA_IDX_PAC 0x4
|
||||
#define BWA_IDX_ALL 0x7
|
||||
|
||||
typedef struct {
|
||||
bwt_t *bwt; // FM-index
|
||||
bntseq_t *bns; // information on the reference sequences
|
||||
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||
} bwaidx_t;
|
||||
|
||||
typedef struct {
|
||||
int l_seq;
|
||||
|
|
@ -16,6 +29,9 @@ extern "C" {
|
|||
|
||||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
|
||||
|
||||
bwaidx_t *bwa_idx_load(const char *prefix, int which);
|
||||
void bwa_idx_destroy(bwaidx_t *idx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
1
bwamem.c
1
bwamem.c
|
|
@ -112,7 +112,6 @@ void smem_set_query(smem_i *itr, int len, const uint8_t *query)
|
|||
itr->len = len;
|
||||
}
|
||||
|
||||
|
||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
||||
{
|
||||
int i, max, max_i, ori_start;
|
||||
|
|
|
|||
54
fastmap.c
54
fastmap.c
|
|
@ -2,8 +2,7 @@
|
|||
#include <stdio.h>
|
||||
#include <unistd.h>
|
||||
#include <stdlib.h>
|
||||
#include "bntseq.h"
|
||||
#include "bwt.h"
|
||||
#include "bwa.h"
|
||||
#include "bwamem.h"
|
||||
#include "kvec.h"
|
||||
#include "utils.h"
|
||||
|
|
@ -15,13 +14,11 @@ extern unsigned char nst_nt4_table[256];
|
|||
int main_mem(int argc, char *argv[])
|
||||
{
|
||||
mem_opt_t *opt;
|
||||
bwt_t *bwt;
|
||||
bntseq_t *bns;
|
||||
int c, n, l;
|
||||
gzFile fp, fp2 = 0;
|
||||
kseq_t *ks, *ks2 = 0;
|
||||
uint8_t *pac = 0;
|
||||
bseq1_t *seqs;
|
||||
bwaidx_t *idx;
|
||||
|
||||
opt = mem_opt_init();
|
||||
while ((c = getopt(argc, argv, "PHk:c:v:s:r:t:")) >= 0) {
|
||||
|
|
@ -48,19 +45,9 @@ int main_mem(int argc, char *argv[])
|
|||
return 1;
|
||||
}
|
||||
mem_fill_scmat(opt->a, opt->b, opt->mat);
|
||||
{ // load the packed sequences, BWT and SA
|
||||
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
||||
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
||||
bwt = bwt_restore_bwt(tmp);
|
||||
strcat(strcpy(tmp, argv[optind]), ".sa");
|
||||
bwt_restore_sa(tmp, bwt);
|
||||
free(tmp);
|
||||
bns = bns_restore(argv[optind]);
|
||||
pac = calloc(bns->l_pac/4+1, 1);
|
||||
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
||||
}
|
||||
for (l = 0; l < bns->n_seqs; ++l)
|
||||
printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
|
||||
idx = bwa_idx_load(argv[optind], BWA_IDX_ALL);
|
||||
for (l = 0; l < idx->bns->n_seqs; ++l)
|
||||
printf("@SQ\tSN:%s\tLN:%d\n", idx->bns->anns[l].name, idx->bns->anns[l].len);
|
||||
|
||||
fp = strcmp(argv[optind + 1], "-")? gzopen(argv[optind + 1], "r") : gzdopen(fileno(stdin), "r");
|
||||
ks = kseq_init(fp);
|
||||
|
|
@ -70,13 +57,12 @@ int main_mem(int argc, char *argv[])
|
|||
opt->flag |= MEM_F_PE;
|
||||
}
|
||||
while ((seqs = bseq_read(opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
||||
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
|
||||
free(seqs);
|
||||
}
|
||||
|
||||
free(opt); free(pac);
|
||||
bns_destroy(bns);
|
||||
bwt_destroy(bwt);
|
||||
free(opt);
|
||||
bwa_idx_destroy(idx);
|
||||
kseq_destroy(ks);
|
||||
gzclose(fp);
|
||||
if (ks2) {
|
||||
|
|
@ -92,10 +78,9 @@ int main_fastmap(int argc, char *argv[])
|
|||
kseq_t *seq;
|
||||
bwtint_t k;
|
||||
gzFile fp;
|
||||
bwt_t *bwt;
|
||||
bntseq_t *bns;
|
||||
smem_i *itr;
|
||||
const bwtintv_v *a;
|
||||
bwaidx_t *idx;
|
||||
|
||||
while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
|
||||
switch (c) {
|
||||
|
|
@ -112,16 +97,8 @@ int main_fastmap(int argc, char *argv[])
|
|||
|
||||
fp = gzopen(argv[optind + 1], "r");
|
||||
seq = kseq_init(fp);
|
||||
{ // load the packed sequences, BWT and SA
|
||||
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
||||
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
||||
bwt = bwt_restore_bwt(tmp);
|
||||
strcat(strcpy(tmp, argv[optind]), ".sa");
|
||||
bwt_restore_sa(tmp, bwt);
|
||||
free(tmp);
|
||||
bns = bns_restore(argv[optind]);
|
||||
}
|
||||
itr = smem_itr_init(bwt);
|
||||
idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS);
|
||||
itr = smem_itr_init(idx->bwt);
|
||||
while (kseq_read(seq) >= 0) {
|
||||
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
||||
if (print_seq) {
|
||||
|
|
@ -141,10 +118,10 @@ int main_fastmap(int argc, char *argv[])
|
|||
bwtint_t pos;
|
||||
int len, is_rev, ref_id;
|
||||
len = (uint32_t)p->info - (p->info>>32);
|
||||
pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
|
||||
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
|
||||
if (is_rev) pos -= len - 1;
|
||||
bns_cnt_ambi(bns, pos, len, &ref_id);
|
||||
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
||||
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
|
||||
printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
|
||||
}
|
||||
} else fputs("\t*", stdout);
|
||||
putchar('\n');
|
||||
|
|
@ -154,8 +131,7 @@ int main_fastmap(int argc, char *argv[])
|
|||
}
|
||||
|
||||
smem_itr_destroy(itr);
|
||||
bns_destroy(bns);
|
||||
bwt_destroy(bwt);
|
||||
bwa_idx_destroy(idx);
|
||||
kseq_destroy(seq);
|
||||
gzclose(fp);
|
||||
return 0;
|
||||
|
|
|
|||
Loading…
Reference in New Issue