diff --git a/bwt.h b/bwt.h index d2ff0ac..c862352 100644 --- a/bwt.h +++ b/bwt.h @@ -92,6 +92,7 @@ extern "C" { void bwt_destroy(bwt_t *bwt); void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW + void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW void bwt_cal_sa(bwt_t *bwt, int intv); void bwt_bwtupdate_core(bwt_t *bwt); diff --git a/bwt_gen.c b/bwt_gen.c index 6139d80..76f28c9 100644 --- a/bwt_gen.c +++ b/bwt_gen.c @@ -1598,15 +1598,20 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o } } -void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) +void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size) { BWTInc *bwtInc; - bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000); + bwtInc = BWTIncConstructFromPacked(fn_pac, block_size, block_size); printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone); BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0); BWTIncFree(bwtInc); } +void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) +{ + bwt_bwtgen2(fn_pac, fn_bwt, 10000000); +} + int bwt_bwtgen_main(int argc, char *argv[]) { if (argc < 3) { diff --git a/bwtindex.c b/bwtindex.c index 9e3ec15..18523da 100644 --- a/bwtindex.c +++ b/bwtindex.c @@ -189,11 +189,11 @@ int bwa_index(int argc, char *argv[]) // the "index" command extern void bwa_pac_rev_core(const char *fn, const char *fn_rev); char *prefix = 0, *str, *str2, *str3; - int c, algo_type = 0, is_64 = 0; + int c, algo_type = 0, is_64 = 0, block_size = 10000000; clock_t t; int64_t l_pac; - while ((c = getopt(argc, argv, "6a:p:")) >= 0) { + while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) { switch (c) { case 'a': // if -a is not set, algo_type will be determined later if (strcmp(optarg, "div") == 0) algo_type = 1; @@ -203,20 +203,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command break; case 'p': prefix = strdup(optarg); break; case '6': is_64 = 1; break; + case 'b': + block_size = strtol(optarg, &str, 10); + if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024; + else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024; + else if (*str == 'K' || *str == 'k') block_size *= 1024; default: return 1; } } if (optind + 1 > argc) { fprintf(stderr, "\n"); - fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] \n\n"); + fprintf(stderr, "Usage: bwa index [options] \n\n"); fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n"); fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n"); + fprintf(stderr, " -b INT block size for the bwtsw algorithm (force -a bwtsw) [%d]\n", block_size); fprintf(stderr, " -6 index files named as .64.* instead of .* \n"); fprintf(stderr, "\n"); fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n"); - fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n"); - fprintf(stderr, " according to the length of the genome.\n\n"); + fprintf(stderr, " `-a div' do not work not for long genomes.\n\n"); return 1; } if (prefix == 0) { @@ -242,7 +247,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command strcpy(str2, prefix); strcat(str2, ".bwt"); t = clock(); fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n"); - if (algo_type == 2) bwt_bwtgen(str, str2); + if (algo_type == 2) bwt_bwtgen2(str, str2, block_size); else if (algo_type == 1 || algo_type == 3) { bwt_t *bwt; bwt = bwt_pac2bwt(str, algo_type == 3); diff --git a/main.c b/main.c index b873b75..d212e24 100644 --- a/main.c +++ b/main.c @@ -4,7 +4,7 @@ #include "utils.h" #ifndef PACKAGE_VERSION -#define PACKAGE_VERSION "0.7.10-r789" +#define PACKAGE_VERSION "0.7.10-r807-dirty" #endif int bwa_fa2pac(int argc, char *argv[]);