code refactoring
This commit is contained in:
parent
e613195e17
commit
67543f19a1
37
bwa.c
37
bwa.c
|
|
@ -94,3 +94,40 @@ ret_gen_cigar:
|
||||||
return cigar;
|
return cigar;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*********************
|
||||||
|
* Full index reader *
|
||||||
|
*********************/
|
||||||
|
|
||||||
|
bwaidx_t *bwa_idx_load(const char *prefix, int which)
|
||||||
|
{
|
||||||
|
bwaidx_t *idx;
|
||||||
|
idx = calloc(1, sizeof(bwaidx_t));
|
||||||
|
if (which & BWA_IDX_BWT) {
|
||||||
|
char *tmp;
|
||||||
|
tmp = calloc(strlen(prefix) + 5, 1);
|
||||||
|
strcat(strcpy(tmp, prefix), ".bwt"); // FM-index
|
||||||
|
idx->bwt = bwt_restore_bwt(tmp);
|
||||||
|
strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA)
|
||||||
|
bwt_restore_sa(tmp, idx->bwt);
|
||||||
|
free(tmp);
|
||||||
|
}
|
||||||
|
if (which & BWA_IDX_BNS) {
|
||||||
|
idx->bns = bns_restore(prefix);
|
||||||
|
if (which & BWA_IDX_PAC) {
|
||||||
|
idx->pac = calloc(idx->bns->l_pac/4+1, 1);
|
||||||
|
fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence
|
||||||
|
}
|
||||||
|
fclose(idx->bns->fp_pac);
|
||||||
|
idx->bns->fp_pac = 0;
|
||||||
|
}
|
||||||
|
return idx;
|
||||||
|
}
|
||||||
|
|
||||||
|
void bwa_idx_destroy(bwaidx_t *idx)
|
||||||
|
{
|
||||||
|
if (idx == 0) return;
|
||||||
|
if (idx->bwt) bwt_destroy(idx->bwt);
|
||||||
|
if (idx->bns) bns_destroy(idx->bns);
|
||||||
|
if (idx->pac) free(idx->pac);
|
||||||
|
free(idx);
|
||||||
|
}
|
||||||
|
|
|
||||||
16
bwa.h
16
bwa.h
|
|
@ -2,6 +2,19 @@
|
||||||
#define BWA_H_
|
#define BWA_H_
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include "bntseq.h"
|
||||||
|
#include "bwt.h"
|
||||||
|
|
||||||
|
#define BWA_IDX_BWT 0x1
|
||||||
|
#define BWA_IDX_BNS 0x2
|
||||||
|
#define BWA_IDX_PAC 0x4
|
||||||
|
#define BWA_IDX_ALL 0x7
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
bwt_t *bwt; // FM-index
|
||||||
|
bntseq_t *bns; // information on the reference sequences
|
||||||
|
uint8_t *pac; // the actual 2-bit encoded reference sequences with 'N' converted to a random base
|
||||||
|
} bwaidx_t;
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
int l_seq;
|
int l_seq;
|
||||||
|
|
@ -16,6 +29,9 @@ extern "C" {
|
||||||
|
|
||||||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
|
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar);
|
||||||
|
|
||||||
|
bwaidx_t *bwa_idx_load(const char *prefix, int which);
|
||||||
|
void bwa_idx_destroy(bwaidx_t *idx);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
1
bwamem.c
1
bwamem.c
|
|
@ -112,7 +112,6 @@ void smem_set_query(smem_i *itr, int len, const uint8_t *query)
|
||||||
itr->len = len;
|
itr->len = len;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
const bwtintv_v *smem_next(smem_i *itr, int split_len, int split_width)
|
||||||
{
|
{
|
||||||
int i, max, max_i, ori_start;
|
int i, max, max_i, ori_start;
|
||||||
|
|
|
||||||
54
fastmap.c
54
fastmap.c
|
|
@ -2,8 +2,7 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include "bntseq.h"
|
#include "bwa.h"
|
||||||
#include "bwt.h"
|
|
||||||
#include "bwamem.h"
|
#include "bwamem.h"
|
||||||
#include "kvec.h"
|
#include "kvec.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
|
|
@ -15,13 +14,11 @@ extern unsigned char nst_nt4_table[256];
|
||||||
int main_mem(int argc, char *argv[])
|
int main_mem(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
mem_opt_t *opt;
|
mem_opt_t *opt;
|
||||||
bwt_t *bwt;
|
|
||||||
bntseq_t *bns;
|
|
||||||
int c, n, l;
|
int c, n, l;
|
||||||
gzFile fp, fp2 = 0;
|
gzFile fp, fp2 = 0;
|
||||||
kseq_t *ks, *ks2 = 0;
|
kseq_t *ks, *ks2 = 0;
|
||||||
uint8_t *pac = 0;
|
|
||||||
bseq1_t *seqs;
|
bseq1_t *seqs;
|
||||||
|
bwaidx_t *idx;
|
||||||
|
|
||||||
opt = mem_opt_init();
|
opt = mem_opt_init();
|
||||||
while ((c = getopt(argc, argv, "PHk:c:v:s:r:t:")) >= 0) {
|
while ((c = getopt(argc, argv, "PHk:c:v:s:r:t:")) >= 0) {
|
||||||
|
|
@ -48,19 +45,9 @@ int main_mem(int argc, char *argv[])
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
mem_fill_scmat(opt->a, opt->b, opt->mat);
|
mem_fill_scmat(opt->a, opt->b, opt->mat);
|
||||||
{ // load the packed sequences, BWT and SA
|
idx = bwa_idx_load(argv[optind], BWA_IDX_ALL);
|
||||||
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
for (l = 0; l < idx->bns->n_seqs; ++l)
|
||||||
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
printf("@SQ\tSN:%s\tLN:%d\n", idx->bns->anns[l].name, idx->bns->anns[l].len);
|
||||||
bwt = bwt_restore_bwt(tmp);
|
|
||||||
strcat(strcpy(tmp, argv[optind]), ".sa");
|
|
||||||
bwt_restore_sa(tmp, bwt);
|
|
||||||
free(tmp);
|
|
||||||
bns = bns_restore(argv[optind]);
|
|
||||||
pac = calloc(bns->l_pac/4+1, 1);
|
|
||||||
fread(pac, 1, bns->l_pac/4+1, bns->fp_pac);
|
|
||||||
}
|
|
||||||
for (l = 0; l < bns->n_seqs; ++l)
|
|
||||||
printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len);
|
|
||||||
|
|
||||||
fp = strcmp(argv[optind + 1], "-")? gzopen(argv[optind + 1], "r") : gzdopen(fileno(stdin), "r");
|
fp = strcmp(argv[optind + 1], "-")? gzopen(argv[optind + 1], "r") : gzdopen(fileno(stdin), "r");
|
||||||
ks = kseq_init(fp);
|
ks = kseq_init(fp);
|
||||||
|
|
@ -70,13 +57,12 @@ int main_mem(int argc, char *argv[])
|
||||||
opt->flag |= MEM_F_PE;
|
opt->flag |= MEM_F_PE;
|
||||||
}
|
}
|
||||||
while ((seqs = bseq_read(opt->chunk_size, &n, ks, ks2)) != 0) {
|
while ((seqs = bseq_read(opt->chunk_size, &n, ks, ks2)) != 0) {
|
||||||
mem_process_seqs(opt, bwt, bns, pac, n, seqs);
|
mem_process_seqs(opt, idx->bwt, idx->bns, idx->pac, n, seqs);
|
||||||
free(seqs);
|
free(seqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
free(opt); free(pac);
|
free(opt);
|
||||||
bns_destroy(bns);
|
bwa_idx_destroy(idx);
|
||||||
bwt_destroy(bwt);
|
|
||||||
kseq_destroy(ks);
|
kseq_destroy(ks);
|
||||||
gzclose(fp);
|
gzclose(fp);
|
||||||
if (ks2) {
|
if (ks2) {
|
||||||
|
|
@ -92,10 +78,9 @@ int main_fastmap(int argc, char *argv[])
|
||||||
kseq_t *seq;
|
kseq_t *seq;
|
||||||
bwtint_t k;
|
bwtint_t k;
|
||||||
gzFile fp;
|
gzFile fp;
|
||||||
bwt_t *bwt;
|
|
||||||
bntseq_t *bns;
|
|
||||||
smem_i *itr;
|
smem_i *itr;
|
||||||
const bwtintv_v *a;
|
const bwtintv_v *a;
|
||||||
|
bwaidx_t *idx;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
|
while ((c = getopt(argc, argv, "w:l:ps:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
|
|
@ -112,16 +97,8 @@ int main_fastmap(int argc, char *argv[])
|
||||||
|
|
||||||
fp = gzopen(argv[optind + 1], "r");
|
fp = gzopen(argv[optind + 1], "r");
|
||||||
seq = kseq_init(fp);
|
seq = kseq_init(fp);
|
||||||
{ // load the packed sequences, BWT and SA
|
idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS);
|
||||||
char *tmp = calloc(strlen(argv[optind]) + 5, 1);
|
itr = smem_itr_init(idx->bwt);
|
||||||
strcat(strcpy(tmp, argv[optind]), ".bwt");
|
|
||||||
bwt = bwt_restore_bwt(tmp);
|
|
||||||
strcat(strcpy(tmp, argv[optind]), ".sa");
|
|
||||||
bwt_restore_sa(tmp, bwt);
|
|
||||||
free(tmp);
|
|
||||||
bns = bns_restore(argv[optind]);
|
|
||||||
}
|
|
||||||
itr = smem_itr_init(bwt);
|
|
||||||
while (kseq_read(seq) >= 0) {
|
while (kseq_read(seq) >= 0) {
|
||||||
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
printf("SQ\t%s\t%ld", seq->name.s, seq->seq.l);
|
||||||
if (print_seq) {
|
if (print_seq) {
|
||||||
|
|
@ -141,10 +118,10 @@ int main_fastmap(int argc, char *argv[])
|
||||||
bwtint_t pos;
|
bwtint_t pos;
|
||||||
int len, is_rev, ref_id;
|
int len, is_rev, ref_id;
|
||||||
len = (uint32_t)p->info - (p->info>>32);
|
len = (uint32_t)p->info - (p->info>>32);
|
||||||
pos = bns_depos(bns, bwt_sa(bwt, p->x[0] + k), &is_rev);
|
pos = bns_depos(idx->bns, bwt_sa(idx->bwt, p->x[0] + k), &is_rev);
|
||||||
if (is_rev) pos -= len - 1;
|
if (is_rev) pos -= len - 1;
|
||||||
bns_cnt_ambi(bns, pos, len, &ref_id);
|
bns_cnt_ambi(idx->bns, pos, len, &ref_id);
|
||||||
printf("\t%s:%c%ld", bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - bns->anns[ref_id].offset) + 1);
|
printf("\t%s:%c%ld", idx->bns->anns[ref_id].name, "+-"[is_rev], (long)(pos - idx->bns->anns[ref_id].offset) + 1);
|
||||||
}
|
}
|
||||||
} else fputs("\t*", stdout);
|
} else fputs("\t*", stdout);
|
||||||
putchar('\n');
|
putchar('\n');
|
||||||
|
|
@ -154,8 +131,7 @@ int main_fastmap(int argc, char *argv[])
|
||||||
}
|
}
|
||||||
|
|
||||||
smem_itr_destroy(itr);
|
smem_itr_destroy(itr);
|
||||||
bns_destroy(bns);
|
bwa_idx_destroy(idx);
|
||||||
bwt_destroy(bwt);
|
|
||||||
kseq_destroy(seq);
|
kseq_destroy(seq);
|
||||||
gzclose(fp);
|
gzclose(fp);
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue