moved indexing to libbwa
This commit is contained in:
parent
86170ffd2f
commit
9a828344ec
9
Makefile
9
Makefile
|
|
@ -4,9 +4,10 @@ CFLAGS= -g -Wall -Wno-unused-function -O2
|
||||||
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
WRAP_MALLOC=-DUSE_MALLOC_WRAPPERS
|
||||||
AR= ar
|
AR= ar
|
||||||
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
DFLAGS= -DHAVE_PTHREAD $(WRAP_MALLOC)
|
||||||
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o
|
LOBJS= utils.o kthread.o kstring.o ksw.o bwt.o bntseq.o bwa.o bwamem.o bwamem_pair.o bwamem_extra.o malloc_wrap.o \
|
||||||
AOBJS= QSufSort.o bwt_gen.o rope.o rle.o bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
QSufSort.o bwt_gen.o rope.o rle.o is.o bwtindex.o
|
||||||
is.o bwtindex.o bwape.o kopen.o pemerge.o maxk.o \
|
AOBJS= bwashm.o bwase.o bwaseqio.o bwtgap.o bwtaln.o bamlite.o \
|
||||||
|
bwape.o kopen.o pemerge.o maxk.o \
|
||||||
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
bwtsw2_core.o bwtsw2_main.o bwtsw2_aux.o bwt_lite.o \
|
||||||
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
bwtsw2_chain.o fastmap.o bwtsw2_pair.o
|
||||||
PROG= bwa
|
PROG= bwa
|
||||||
|
|
@ -63,7 +64,7 @@ bwt_gen.o: QSufSort.h malloc_wrap.h
|
||||||
bwt_lite.o: bwt_lite.h malloc_wrap.h
|
bwt_lite.o: bwt_lite.h malloc_wrap.h
|
||||||
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
|
bwtaln.o: bwtaln.h bwt.h bwtgap.h utils.h bwa.h bntseq.h malloc_wrap.h
|
||||||
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
|
bwtgap.o: bwtgap.h bwt.h bwtaln.h malloc_wrap.h
|
||||||
bwtindex.o: bntseq.h bwt.h utils.h rle.h rope.h malloc_wrap.h
|
bwtindex.o: bntseq.h bwa.h bwt.h utils.h rle.h rope.h malloc_wrap.h
|
||||||
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
|
bwtsw2_aux.o: bntseq.h bwt_lite.h utils.h bwtsw2.h bwt.h kstring.h
|
||||||
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
|
bwtsw2_aux.o: malloc_wrap.h bwa.h ksw.h kseq.h ksort.h
|
||||||
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
|
bwtsw2_chain.o: bwtsw2.h bntseq.h bwt_lite.h bwt.h malloc_wrap.h ksort.h
|
||||||
|
|
|
||||||
7
bwa.h
7
bwa.h
|
|
@ -12,6 +12,11 @@
|
||||||
|
|
||||||
#define BWA_CTL_SIZE 0x10000
|
#define BWA_CTL_SIZE 0x10000
|
||||||
|
|
||||||
|
#define BWTALGO_AUTO 0
|
||||||
|
#define BWTALGO_RB2 1
|
||||||
|
#define BWTALGO_BWTSW 2
|
||||||
|
#define BWTALGO_IS 3
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
bwt_t *bwt; // FM-index
|
bwt_t *bwt; // FM-index
|
||||||
bntseq_t *bns; // information on the reference sequences
|
bntseq_t *bns; // information on the reference sequences
|
||||||
|
|
@ -41,6 +46,8 @@ extern "C" {
|
||||||
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
uint32_t *bwa_gen_cigar(const int8_t mat[25], int q, int r, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||||||
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
uint32_t *bwa_gen_cigar2(const int8_t mat[25], int o_del, int e_del, int o_ins, int e_ins, int w_, int64_t l_pac, const uint8_t *pac, int l_query, uint8_t *query, int64_t rb, int64_t re, int *score, int *n_cigar, int *NM);
|
||||||
|
|
||||||
|
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size);
|
||||||
|
|
||||||
char *bwa_idx_infer_prefix(const char *hint);
|
char *bwa_idx_infer_prefix(const char *hint);
|
||||||
bwt_t *bwa_idx_load_bwt(const char *hint);
|
bwt_t *bwa_idx_load_bwt(const char *hint);
|
||||||
|
|
||||||
|
|
|
||||||
55
bwtindex.c
55
bwtindex.c
|
|
@ -32,6 +32,7 @@
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <zlib.h>
|
#include <zlib.h>
|
||||||
#include "bntseq.h"
|
#include "bntseq.h"
|
||||||
|
#include "bwa.h"
|
||||||
#include "bwt.h"
|
#include "bwt.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "rle.h"
|
#include "rle.h"
|
||||||
|
|
@ -208,19 +209,14 @@ int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
|
||||||
|
|
||||||
int bwa_index(int argc, char *argv[]) // the "index" command
|
int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
{
|
{
|
||||||
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000;
|
||||||
|
char *prefix = 0, *str;
|
||||||
char *prefix = 0, *str, *str2, *str3;
|
|
||||||
int c, algo_type = 0, is_64 = 0, block_size = 10000000;
|
|
||||||
clock_t t;
|
|
||||||
int64_t l_pac;
|
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'a': // if -a is not set, algo_type will be determined later
|
case 'a': // if -a is not set, algo_type will be determined later
|
||||||
if (strcmp(optarg, "rb2") == 0) algo_type = 1;
|
if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2;
|
||||||
else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
|
else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW;
|
||||||
else if (strcmp(optarg, "is") == 0) algo_type = 3;
|
else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS;
|
||||||
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
|
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
|
||||||
break;
|
break;
|
||||||
case 'p': prefix = strdup(optarg); break;
|
case 'p': prefix = strdup(optarg); break;
|
||||||
|
|
@ -252,16 +248,29 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
strcpy(prefix, argv[optind]);
|
strcpy(prefix, argv[optind]);
|
||||||
if (is_64) strcat(prefix, ".64");
|
if (is_64) strcat(prefix, ".64");
|
||||||
}
|
}
|
||||||
|
bwa_idx_build(argv[optind], prefix, algo_type, block_size);
|
||||||
|
free(prefix);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size)
|
||||||
|
{
|
||||||
|
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||||
|
|
||||||
|
char *str, *str2, *str3;
|
||||||
|
clock_t t;
|
||||||
|
int64_t l_pac;
|
||||||
|
|
||||||
str = (char*)calloc(strlen(prefix) + 10, 1);
|
str = (char*)calloc(strlen(prefix) + 10, 1);
|
||||||
str2 = (char*)calloc(strlen(prefix) + 10, 1);
|
str2 = (char*)calloc(strlen(prefix) + 10, 1);
|
||||||
str3 = (char*)calloc(strlen(prefix) + 10, 1);
|
str3 = (char*)calloc(strlen(prefix) + 10, 1);
|
||||||
|
|
||||||
{ // nucleotide indexing
|
{ // nucleotide indexing
|
||||||
gzFile fp = xzopen(argv[optind], "r");
|
gzFile fp = xzopen(fa, "r");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||||
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
err_gzclose(fp);
|
err_gzclose(fp);
|
||||||
}
|
}
|
||||||
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
||||||
|
|
@ -269,7 +278,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
strcpy(str, prefix); strcat(str, ".pac");
|
strcpy(str, prefix); strcat(str, ".pac");
|
||||||
strcpy(str2, prefix); strcat(str2, ".bwt");
|
strcpy(str2, prefix); strcat(str2, ".bwt");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
||||||
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
||||||
else if (algo_type == 1 || algo_type == 3) {
|
else if (algo_type == 1 || algo_type == 3) {
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
|
|
@ -277,25 +286,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
bwt_dump_bwt(str2, bwt);
|
bwt_dump_bwt(str2, bwt);
|
||||||
bwt_destroy(bwt);
|
bwt_destroy(bwt);
|
||||||
}
|
}
|
||||||
fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
strcpy(str, prefix); strcat(str, ".bwt");
|
strcpy(str, prefix); strcat(str, ".bwt");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Update BWT... ");
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... ");
|
||||||
bwt = bwt_restore_bwt(str);
|
bwt = bwt_restore_bwt(str);
|
||||||
bwt_bwtupdate_core(bwt);
|
bwt_bwtupdate_core(bwt);
|
||||||
bwt_dump_bwt(str, bwt);
|
bwt_dump_bwt(str, bwt);
|
||||||
bwt_destroy(bwt);
|
bwt_destroy(bwt);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
gzFile fp = xzopen(argv[optind], "r");
|
gzFile fp = xzopen(fa, "r");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
||||||
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
err_gzclose(fp);
|
err_gzclose(fp);
|
||||||
}
|
}
|
||||||
{
|
{
|
||||||
|
|
@ -303,13 +312,13 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
strcpy(str, prefix); strcat(str, ".bwt");
|
strcpy(str, prefix); strcat(str, ".bwt");
|
||||||
strcpy(str3, prefix); strcat(str3, ".sa");
|
strcpy(str3, prefix); strcat(str3, ".sa");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
|
||||||
bwt = bwt_restore_bwt(str);
|
bwt = bwt_restore_bwt(str);
|
||||||
bwt_cal_sa(bwt, 32);
|
bwt_cal_sa(bwt, 32);
|
||||||
bwt_dump_sa(str3, bwt);
|
bwt_dump_sa(str3, bwt);
|
||||||
bwt_destroy(bwt);
|
bwt_destroy(bwt);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
free(str3); free(str2); free(str); free(prefix);
|
free(str3); free(str2); free(str);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue