r807: allow to change block size in bwt_gen

For a very large reference genome, the default is too small.
This commit is contained in:
Heng Li 2014-08-25 10:31:54 -04:00
parent b73f4c977a
commit 1bba5ef20e
4 changed files with 20 additions and 9 deletions

1
bwt.h
View File

@ -92,6 +92,7 @@ extern "C" {
void bwt_destroy(bwt_t *bwt);
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW
void bwt_cal_sa(bwt_t *bwt, int intv);
void bwt_bwtupdate_core(bwt_t *bwt);

View File

@ -1598,15 +1598,20 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
}
}
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size)
{
BWTInc *bwtInc;
bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
bwtInc = BWTIncConstructFromPacked(fn_pac, block_size, block_size);
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
BWTIncFree(bwtInc);
}
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
{
bwt_bwtgen2(fn_pac, fn_bwt, 10000000);
}
int bwt_bwtgen_main(int argc, char *argv[])
{
if (argc < 3) {

View File

@ -189,11 +189,11 @@ int bwa_index(int argc, char *argv[]) // the "index" command
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
char *prefix = 0, *str, *str2, *str3;
int c, algo_type = 0, is_64 = 0;
int c, algo_type = 0, is_64 = 0, block_size = 10000000;
clock_t t;
int64_t l_pac;
while ((c = getopt(argc, argv, "6a:p:")) >= 0) {
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
switch (c) {
case 'a': // if -a is not set, algo_type will be determined later
if (strcmp(optarg, "div") == 0) algo_type = 1;
@ -203,20 +203,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
break;
case 'p': prefix = strdup(optarg); break;
case '6': is_64 = 1; break;
case 'b':
block_size = strtol(optarg, &str, 10);
if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024;
else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024;
else if (*str == 'K' || *str == 'k') block_size *= 1024;
default: return 1;
}
}
if (optind + 1 > argc) {
fprintf(stderr, "\n");
fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
fprintf(stderr, "Usage: bwa index [options] <in.fasta>\n\n");
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
fprintf(stderr, " -b INT block size for the bwtsw algorithm (force -a bwtsw) [%d]\n", block_size);
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
fprintf(stderr, "\n");
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
fprintf(stderr, " according to the length of the genome.\n\n");
fprintf(stderr, " `-a div' do not work not for long genomes.\n\n");
return 1;
}
if (prefix == 0) {
@ -242,7 +247,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
strcpy(str2, prefix); strcat(str2, ".bwt");
t = clock();
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
if (algo_type == 2) bwt_bwtgen(str, str2);
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
else if (algo_type == 1 || algo_type == 3) {
bwt_t *bwt;
bwt = bwt_pac2bwt(str, algo_type == 3);

2
main.c
View File

@ -4,7 +4,7 @@
#include "utils.h"
#ifndef PACKAGE_VERSION
#define PACKAGE_VERSION "0.7.10-r789"
#define PACKAGE_VERSION "0.7.10-r807-dirty"
#endif
int bwa_fa2pac(int argc, char *argv[]);