diff --git a/Makefile b/Makefile index e11a04d..f14d906 100644 --- a/Makefile +++ b/Makefile @@ -3,10 +3,9 @@ CFLAGS= -g -Wall -O2 CXXFLAGS= $(CFLAGS) AR= ar DFLAGS= -DHAVE_PTHREAD #-D_NO_SSE2 #-D_FILE_OFFSET_BITS=64 -LOBJS= bamlite.o utils.o bwt.o bwtio.o bwtaln.o bwtgap.o bntseq.o bwamem.o bwamem_pair.o stdaln.o \ - bwaseqio.o bwase.o kstring.o -AOBJS= QSufSort.o bwt_gen.o \ - is.o bwtmisc.o bwtindex.o ksw.o bwape.o \ +LOBJS= utils.o kstring.o ksw.o bwt.o bntseq.o bwamem.o bwamem_pair.o +AOBJS= QSufSort.o bwt_gen.o stdaln.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \ + is.o bwtmisc.o bwtindex.o bwape.o \ bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \ bwtsw2_chain.o fastmap.o bwtsw2_pair.o PROG= bwa diff --git a/bwase.c b/bwase.c index 8fa79ac..1f36aaa 100644 --- a/bwase.c +++ b/bwase.c @@ -489,17 +489,6 @@ void bwa_print_sam1(const bntseq_t *bns, bwa_seq_t *p, const bwa_seq_t *mate, in } } -bntseq_t *bwa_open_nt(const char *prefix) -{ - bntseq_t *ntbns; - char *str; - str = (char*)calloc(strlen(prefix) + 10, 1); - strcat(strcpy(str, prefix), ".nt"); - ntbns = bns_restore(str); - free(str); - return ntbns; -} - void bwa_print_sam_SQ(const bntseq_t *bns) { int i; diff --git a/bwt.c b/bwt.c index 2903daa..7b37fe5 100644 --- a/bwt.c +++ b/bwt.c @@ -338,3 +338,79 @@ int bwt_smem1(const bwt_t *bwt, int len, const uint8_t *q, int x, int min_intv, if (tmpvec == 0 || tmpvec[1] == 0) free(a[1].a); return ret; } + +/************************* + * Read/write BWT and SA * + *************************/ + +void bwt_dump_bwt(const char *fn, const bwt_t *bwt) +{ + FILE *fp; + fp = xopen(fn, "wb"); + fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + fwrite(bwt->bwt, 4, bwt->bwt_size, fp); + fclose(fp); +} + +void bwt_dump_sa(const char *fn, const bwt_t *bwt) +{ + FILE *fp; + fp = xopen(fn, "wb"); + fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); + fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); + fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); + fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + fclose(fp); +} + +void bwt_restore_sa(const char *fn, bwt_t *bwt) +{ + char skipped[256]; + FILE *fp; + bwtint_t primary; + + fp = xopen(fn, "rb"); + fread(&primary, sizeof(bwtint_t), 1, fp); + xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same."); + fread(skipped, sizeof(bwtint_t), 4, fp); // skip + fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); + fread(&primary, sizeof(bwtint_t), 1, fp); + xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); + + bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; + bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); + bwt->sa[0] = -1; + + fread(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); + fclose(fp); +} + +bwt_t *bwt_restore_bwt(const char *fn) +{ + bwt_t *bwt; + FILE *fp; + + bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); + fp = xopen(fn, "rb"); + fseek(fp, 0, SEEK_END); + bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2; + bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); + fseek(fp, 0, SEEK_SET); + fread(&bwt->primary, sizeof(bwtint_t), 1, fp); + fread(bwt->L2+1, sizeof(bwtint_t), 4, fp); + fread(bwt->bwt, 4, bwt->bwt_size, fp); + bwt->seq_len = bwt->L2[4]; + fclose(fp); + bwt_gen_cnt_table(bwt); + + return bwt; +} + +void bwt_destroy(bwt_t *bwt) +{ + if (bwt == 0) return; + free(bwt->sa); free(bwt->bwt); + free(bwt); +} diff --git a/bwtio.c b/bwtio.c deleted file mode 100644 index 7508609..0000000 --- a/bwtio.c +++ /dev/null @@ -1,77 +0,0 @@ -#include -#include -#include -#include "bwt.h" -#include "utils.h" - -void bwt_dump_bwt(const char *fn, const bwt_t *bwt) -{ - FILE *fp; - fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(bwt->bwt, 4, bwt->bwt_size, fp); - fclose(fp); -} - -void bwt_dump_sa(const char *fn, const bwt_t *bwt) -{ - FILE *fp; - fp = xopen(fn, "wb"); - fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp); - fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fwrite(&bwt->seq_len, sizeof(bwtint_t), 1, fp); - fwrite(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); - fclose(fp); -} - -void bwt_restore_sa(const char *fn, bwt_t *bwt) -{ - char skipped[256]; - FILE *fp; - bwtint_t primary; - - fp = xopen(fn, "rb"); - fread(&primary, sizeof(bwtint_t), 1, fp); - xassert(primary == bwt->primary, "SA-BWT inconsistency: primary is not the same."); - fread(skipped, sizeof(bwtint_t), 4, fp); // skip - fread(&bwt->sa_intv, sizeof(bwtint_t), 1, fp); - fread(&primary, sizeof(bwtint_t), 1, fp); - xassert(primary == bwt->seq_len, "SA-BWT inconsistency: seq_len is not the same."); - - bwt->n_sa = (bwt->seq_len + bwt->sa_intv) / bwt->sa_intv; - bwt->sa = (bwtint_t*)calloc(bwt->n_sa, sizeof(bwtint_t)); - bwt->sa[0] = -1; - - fread(bwt->sa + 1, sizeof(bwtint_t), bwt->n_sa - 1, fp); - fclose(fp); -} - -bwt_t *bwt_restore_bwt(const char *fn) -{ - bwt_t *bwt; - FILE *fp; - - bwt = (bwt_t*)calloc(1, sizeof(bwt_t)); - fp = xopen(fn, "rb"); - fseek(fp, 0, SEEK_END); - bwt->bwt_size = (ftell(fp) - sizeof(bwtint_t) * 5) >> 2; - bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4); - fseek(fp, 0, SEEK_SET); - fread(&bwt->primary, sizeof(bwtint_t), 1, fp); - fread(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fread(bwt->bwt, 4, bwt->bwt_size, fp); - bwt->seq_len = bwt->L2[4]; - fclose(fp); - bwt_gen_cnt_table(bwt); - - return bwt; -} - -void bwt_destroy(bwt_t *bwt) -{ - if (bwt == 0) return; - free(bwt->sa); free(bwt->bwt); - free(bwt); -} diff --git a/fastmap.c b/fastmap.c index 91a4ecb..d52a315 100644 --- a/fastmap.c +++ b/fastmap.c @@ -17,7 +17,7 @@ int main_mem(int argc, char *argv[]) mem_opt_t *opt; bwt_t *bwt; bntseq_t *bns; - int c, n; + int c, n, l; gzFile fp, fp2 = 0; kseq_t *ks, *ks2 = 0; uint8_t *pac = 0; @@ -57,6 +57,8 @@ int main_mem(int argc, char *argv[]) pac = calloc(bns->l_pac/4+1, 1); fread(pac, 1, bns->l_pac/4+1, bns->fp_pac); } + for (l = 0; l < bns->n_seqs; ++l) + printf("@SQ\tSN:%s\tLN:%d\n", bns->anns[l].name, bns->anns[l].len); fp = strcmp(argv[optind + 1], "-")? gzopen(argv[optind + 1], "r") : gzdopen(fileno(stdin), "r"); ks = kseq_init(fp); diff --git a/utils.c b/utils.c index 127c8fe..1cebaab 100644 --- a/utils.c +++ b/utils.c @@ -40,6 +40,10 @@ KSORT_INIT(128, pair64_t, pair64_lt) KSORT_INIT(64, uint64_t, ks_lt_generic) +/******************** + * System utilities * + ********************/ + FILE *err_xopen_core(const char *func, const char *fn, const char *mode) { FILE *fp = 0;