diff --git a/bwa.c b/bwa.c index e8735b5..fac0db7 100644 --- a/bwa.c +++ b/bwa.c @@ -4,6 +4,8 @@ #include "bwa.h" #include "ksw.h" +int bwa_verbose = 3; + /************************ * Batch FASTA/Q reader * ************************/ @@ -98,28 +100,69 @@ ret_gen_cigar: * Full index reader * *********************/ -bwaidx_t *bwa_idx_load(const char *prefix, int which) +char *bwa_idx_infer_prefix(const char *hint) +{ + char *prefix; + int l_hint; + FILE *fp; + l_hint = strlen(hint); + prefix = malloc(l_hint + 3 + 4 + 1); + strcpy(prefix, hint); + strcpy(prefix + l_hint, ".64.bwt"); + if ((fp = fopen(prefix, "rb")) != 0) { + fclose(fp); + prefix[l_hint + 3] = 0; + return prefix; + } else { + strcpy(prefix + l_hint, ".bwt"); + if ((fp = fopen(prefix, "rb")) == 0) { + free(prefix); + return 0; + } else { + fclose(fp); + prefix[l_hint] = 0; + return prefix; + } + } +} + +bwt_t *bwa_idx_load_bwt(const char *hint) +{ + char *tmp, *prefix; + bwt_t *bwt; + prefix = bwa_idx_infer_prefix(hint); + if (prefix == 0) { + if (bwa_verbose >= 1) + fprintf(stderr, "[E::%s] fail to locate the index files\n", __func__); + return 0; + } + tmp = calloc(strlen(prefix) + 5, 1); + strcat(strcpy(tmp, prefix), ".bwt"); // FM-index + bwt = bwt_restore_bwt(tmp); + strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) + bwt_restore_sa(tmp, bwt); + free(tmp); free(prefix); + return bwt; +} + +bwaidx_t *bwa_idx_load(const char *hint, int which) { bwaidx_t *idx; + char *prefix; + prefix = bwa_idx_infer_prefix(hint); + if (prefix == 0) return 0; idx = calloc(1, sizeof(bwaidx_t)); - if (which & BWA_IDX_BWT) { - char *tmp; - tmp = calloc(strlen(prefix) + 5, 1); - strcat(strcpy(tmp, prefix), ".bwt"); // FM-index - idx->bwt = bwt_restore_bwt(tmp); - strcat(strcpy(tmp, prefix), ".sa"); // partial suffix array (SA) - bwt_restore_sa(tmp, idx->bwt); - free(tmp); - } + if (which & BWA_IDX_BWT) idx->bwt = bwa_idx_load_bwt(hint); if (which & BWA_IDX_BNS) { idx->bns = bns_restore(prefix); if (which & BWA_IDX_PAC) { idx->pac = calloc(idx->bns->l_pac/4+1, 1); fread(idx->pac, 1, idx->bns->l_pac/4+1, idx->bns->fp_pac); // concatenated 2-bit encoded sequence + fclose(idx->bns->fp_pac); + idx->bns->fp_pac = 0; } - fclose(idx->bns->fp_pac); - idx->bns->fp_pac = 0; } + free(prefix); return idx; } diff --git a/bwa.h b/bwa.h index ad528c9..b5eda13 100644 --- a/bwa.h +++ b/bwa.h @@ -21,6 +21,8 @@ typedef struct { char *name, *comment, *seq, *qual, *sam; } bseq1_t; +extern int bwa_verbose; + #ifdef __cplusplus extern "C" { #endif @@ -29,7 +31,9 @@ extern "C" { uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar); - bwaidx_t *bwa_idx_load(const char *prefix, int which); + char *bwa_idx_infer_prefix(const char *hint); + bwt_t *bwa_idx_load_bwt(const char *hint); + bwaidx_t *bwa_idx_load(const char *hint, int which); void bwa_idx_destroy(bwaidx_t *idx); #ifdef __cplusplus diff --git a/bwape.c b/bwape.c index 77ae1fa..87393b1 100644 --- a/bwape.c +++ b/bwape.c @@ -10,6 +10,7 @@ #include "utils.h" #include "stdaln.h" #include "bwase.h" +#include "bwa.h" typedef struct { int n; @@ -716,7 +717,6 @@ int bwa_sai2sam_pe(int argc, char *argv[]) { extern char *bwa_rg_line, *bwa_rg_id; extern int bwa_set_rg(const char *s); - extern char *bwa_infer_prefix(const char *hint); int c; pe_opt_t *popt; char *prefix; @@ -762,7 +762,7 @@ int bwa_sai2sam_pe(int argc, char *argv[]) fprintf(stderr, "\n"); return 1; } - if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { + if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); free(bwa_rg_line); free(bwa_rg_id); return 0; diff --git a/bwase.c b/bwase.c index 017322b..8f50c7a 100644 --- a/bwase.c +++ b/bwase.c @@ -10,6 +10,7 @@ #include "bntseq.h" #include "utils.h" #include "kstring.h" +#include "bwa.h" int g_log_n[256]; char *bwa_rg_line, *bwa_rg_id; @@ -606,7 +607,6 @@ void bwa_sai2sam_se_core(const char *prefix, const char *fn_sa, const char *fn_f int bwa_sai2sam_se(int argc, char *argv[]) { - extern char *bwa_infer_prefix(const char *hint); int c, n_occ = 3; char *prefix; while ((c = getopt(argc, argv, "hn:f:r:")) >= 0) { @@ -628,7 +628,7 @@ int bwa_sai2sam_se(int argc, char *argv[]) fprintf(stderr, "Usage: bwa samse [-n max_occ] [-f out.sam] [-r RG_line] \n"); return 1; } - if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { + if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); free(bwa_rg_line); free(bwa_rg_id); return 0; diff --git a/bwtaln.c b/bwtaln.c index 84be510..96d4026 100644 --- a/bwtaln.c +++ b/bwtaln.c @@ -11,6 +11,7 @@ #include "bwtaln.h" #include "bwtgap.h" #include "utils.h" +#include "bwa.h" #ifdef HAVE_PTHREAD #include @@ -219,32 +220,6 @@ void bwa_aln_core(const char *prefix, const char *fn_fa, const gap_opt_t *opt) bwa_seq_close(ks); } -char *bwa_infer_prefix(const char *hint) -{ - char *prefix; - int l_hint; - FILE *fp; - l_hint = strlen(hint); - prefix = malloc(l_hint + 3 + 4 + 1); - strcpy(prefix, hint); - strcpy(prefix + l_hint, ".64.bwt"); - if ((fp = fopen(prefix, "rb")) != 0) { - fclose(fp); - prefix[l_hint + 3] = 0; - return prefix; - } else { - strcpy(prefix + l_hint, ".bwt"); - if ((fp = fopen(prefix, "rb")) == 0) { - free(prefix); - return 0; - } else { - fclose(fp); - prefix[l_hint] = 0; - return prefix; - } - } -} - int bwa_aln(int argc, char *argv[]) { int c, opte = -1; @@ -328,7 +303,7 @@ int bwa_aln(int argc, char *argv[]) k = l; } } - if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { + if ((prefix = bwa_idx_infer_prefix(argv[optind])) == 0) { fprintf(stderr, "[%s] fail to locate the index\n", __func__); free(opt); return 0; diff --git a/bwtsw2_main.c b/bwtsw2_main.c index e3f57f8..ab126f2 100644 --- a/bwtsw2_main.c +++ b/bwtsw2_main.c @@ -6,14 +6,12 @@ #include "bwt.h" #include "bwtsw2.h" #include "utils.h" +#include "bwa.h" int bwa_bwtsw2(int argc, char *argv[]) { - extern char *bwa_infer_prefix(const char *hint); bsw2opt_t *opt; - bwt_t *target; - char buf[1024], *prefix; - bntseq_t *bns; + bwaidx_t *idx; int c; opt = bsw2_init_opt(); @@ -81,19 +79,10 @@ int bwa_bwtsw2(int argc, char *argv[]) opt->t *= opt->a; opt->coef *= opt->a; - if ((prefix = bwa_infer_prefix(argv[optind])) == 0) { - fprintf(stderr, "[%s] fail to locate the index\n", __func__); - return 0; - } - strcpy(buf, prefix); target = bwt_restore_bwt(strcat(buf, ".bwt")); - strcpy(buf, prefix); bwt_restore_sa(strcat(buf, ".sa"), target); - bns = bns_restore(prefix); - - bsw2_aln(opt, bns, target, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); - - bns_destroy(bns); - bwt_destroy(target); - free(opt); free(prefix); + if ((idx = bwa_idx_load(argv[optind], BWA_IDX_BWT|BWA_IDX_BNS)) == 0) return 0; + bsw2_aln(opt, idx->bns, idx->bwt, argv[optind+1], optind+2 < argc? argv[optind+2] : 0); + bwa_idx_destroy(idx); + free(opt); return 0; }