moved indexing to libbwa
This commit is contained in:
parent
86170ffd2f
commit
9a828344ec
9
Makefile
9
Makefile
|
|
@ -4,9 +4,10 @@ CFLAGS= -g -Wall -Wno-unused-function -O2
|
|||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||
AR= ar
|
||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
|
||||
AOBJS= QSufSort.o bwt_gen.o rope.o rle.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
is.o bwtindex.o bwape.o kopen.o pemerge.o maxk.o \
|
||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
|
||||
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
|
||||
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||
bwape.o kopen.o pemerge.o maxk.o \
|
||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||
PROG= bwa
|
||||
|
|
@ -63,7 +64,7 @@ bwt_gen.o: QSufSort.h malloc_wrap.h
|
|||
bwt_lite.o: bwt_lite.h malloc_wrap.h
|
||||
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
|
||||
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
|
||||
bwtindex.o: bntseq.h bwt.h utils.h rle.h rope.h malloc_wrap.h
|
||||
bwtindex.o: bntseq.h bwa.h bwt.h utils.h rle.h rope.h malloc_wrap.h
|
||||
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
|
||||
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
|
||||
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
|
||||
|
|
|
|||
7
bwa.h
7
bwa.h
|
|
@ -12,6 +12,11 @@
|
|||
|
||||
#define BWA_CTL_SIZE 0x10000
|
||||
|
||||
#define BWTALGO_AUTO 0
|
||||
#define BWTALGO_RB2 1
|
||||
#define BWTALGO_BWTSW 2
|
||||
#define BWTALGO_IS 3
|
||||
|
||||
typedef struct {
|
||||
bwt_t *bwt; // FM-index
|
||||
bntseq_t *bns; // information on the reference sequences
|
||||
|
|
@ -41,6 +46,8 @@ extern "C" {
|
|||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||||
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||||
|
||||
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size);
|
||||
|
||||
char *bwa_idx_infer_prefix(const char *hint);
|
||||
bwt_t *bwa_idx_load_bwt(const char *hint);
|
||||
|
||||
|
|
|
|||
55
bwtindex.c
55
bwtindex.c
|
|
@ -32,6 +32,7 @@
|
|||
#include <time.h>
|
||||
#include <zlib.h>
|
||||
#include "bntseq.h"
|
||||
#include "bwa.h"
|
||||
#include "bwt.h"
|
||||
#include "utils.h"
|
||||
#include "rle.h"
|
||||
|
|
@ -208,19 +209,14 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
|
|||
|
||||
int bwa_index(int argc, char *argv[]) // the "index" command
|
||||
{
|
||||
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||
|
||||
char *prefix = 0, *str, *str2, *str3;
|
||||
int c, algo_type = 0, is_64 = 0, block_size = 10000000;
|
||||
clock_t t;
|
||||
int64_t l_pac;
|
||||
|
||||
int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000;
|
||||
char *prefix = 0, *str;
|
||||
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
||||
switch (c) {
|
||||
case 'a': // if -a is not set, algo_type will be determined later
|
||||
if (strcmp(optarg, "rb2") == 0) algo_type = 1;
|
||||
else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
|
||||
else if (strcmp(optarg, "is") == 0) algo_type = 3;
|
||||
if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2;
|
||||
else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW;
|
||||
else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS;
|
||||
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
|
||||
break;
|
||||
case 'p': prefix = strdup(optarg); break;
|
||||
|
|
@ -252,16 +248,29 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
strcpy(prefix, argv[optind]);
|
||||
if (is_64) strcat(prefix, ".64");
|
||||
}
|
||||
bwa_idx_build(argv[optind], prefix, algo_type, block_size);
|
||||
free(prefix);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size)
|
||||
{
|
||||
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||
|
||||
char *str, *str2, *str3;
|
||||
clock_t t;
|
||||
int64_t l_pac;
|
||||
|
||||
str = (char*)calloc(strlen(prefix) + 10, 1);
|
||||
str2 = (char*)calloc(strlen(prefix) + 10, 1);
|
||||
str3 = (char*)calloc(strlen(prefix) + 10, 1);
|
||||
|
||||
{ // nucleotide indexing
|
||||
gzFile fp = xzopen(argv[optind], "r");
|
||||
gzFile fp = xzopen(fa, "r");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
err_gzclose(fp);
|
||||
}
|
||||
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
||||
|
|
@ -269,7 +278,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
strcpy(str, prefix); strcat(str, ".pac");
|
||||
strcpy(str2, prefix); strcat(str2, ".bwt");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
||||
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
||||
else if (algo_type == 1 || algo_type == 3) {
|
||||
bwt_t *bwt;
|
||||
|
|
@ -277,25 +286,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
bwt_dump_bwt(str2, bwt);
|
||||
bwt_destroy(bwt);
|
||||
}
|
||||
fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
}
|
||||
{
|
||||
bwt_t *bwt;
|
||||
strcpy(str, prefix); strcat(str, ".bwt");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Update BWT... ");
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... ");
|
||||
bwt = bwt_restore_bwt(str);
|
||||
bwt_bwtupdate_core(bwt);
|
||||
bwt_dump_bwt(str, bwt);
|
||||
bwt_destroy(bwt);
|
||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
}
|
||||
{
|
||||
gzFile fp = xzopen(argv[optind], "r");
|
||||
gzFile fp = xzopen(fa, "r");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
||||
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
err_gzclose(fp);
|
||||
}
|
||||
{
|
||||
|
|
@ -303,13 +312,13 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
strcpy(str, prefix); strcat(str, ".bwt");
|
||||
strcpy(str3, prefix); strcat(str3, ".sa");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
|
||||
bwt = bwt_restore_bwt(str);
|
||||
bwt_cal_sa(bwt, 32);
|
||||
bwt_dump_sa(str3, bwt);
|
||||
bwt_destroy(bwt);
|
||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||
}
|
||||
free(str3); free(str2); free(str); free(prefix);
|
||||
free(str3); free(str2); free(str);
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue