r807: allow to change block size in bwt_gen
For a very large reference genome, the default is too small.
This commit is contained in:
parent
b73f4c977a
commit
1bba5ef20e
1
bwt.h
1
bwt.h
|
|
@ -92,6 +92,7 @@ extern "C" {
|
|||
void bwt_destroy(bwt_t *bwt);
|
||||
|
||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
|
||||
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW
|
||||
void bwt_cal_sa(bwt_t *bwt, int intv);
|
||||
|
||||
void bwt_bwtupdate_core(bwt_t *bwt);
|
||||
|
|
|
|||
|
|
@ -1598,15 +1598,20 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
|
|||
}
|
||||
}
|
||||
|
||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
||||
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size)
|
||||
{
|
||||
BWTInc *bwtInc;
|
||||
bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
|
||||
bwtInc = BWTIncConstructFromPacked(fn_pac, block_size, block_size);
|
||||
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
|
||||
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
|
||||
BWTIncFree(bwtInc);
|
||||
}
|
||||
|
||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
||||
{
|
||||
bwt_bwtgen2(fn_pac, fn_bwt, 10000000);
|
||||
}
|
||||
|
||||
int bwt_bwtgen_main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
|
|
|
|||
17
bwtindex.c
17
bwtindex.c
|
|
@ -189,11 +189,11 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||
|
||||
char *prefix = 0, *str, *str2, *str3;
|
||||
int c, algo_type = 0, is_64 = 0;
|
||||
int c, algo_type = 0, is_64 = 0, block_size = 10000000;
|
||||
clock_t t;
|
||||
int64_t l_pac;
|
||||
|
||||
while ((c = getopt(argc, argv, "6a:p:")) >= 0) {
|
||||
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
||||
switch (c) {
|
||||
case 'a': // if -a is not set, algo_type will be determined later
|
||||
if (strcmp(optarg, "div") == 0) algo_type = 1;
|
||||
|
|
@ -203,20 +203,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
break;
|
||||
case 'p': prefix = strdup(optarg); break;
|
||||
case '6': is_64 = 1; break;
|
||||
case 'b':
|
||||
block_size = strtol(optarg, &str, 10);
|
||||
if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024;
|
||||
else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024;
|
||||
else if (*str == 'K' || *str == 'k') block_size *= 1024;
|
||||
default: return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (optind + 1 > argc) {
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
|
||||
fprintf(stderr, "Usage: bwa index [options] <in.fasta>\n\n");
|
||||
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
|
||||
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
|
||||
fprintf(stderr, " -b INT block size for the bwtsw algorithm (force -a bwtsw) [%d]\n", block_size);
|
||||
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
|
||||
fprintf(stderr, "\n");
|
||||
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
|
||||
fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
|
||||
fprintf(stderr, " according to the length of the genome.\n\n");
|
||||
fprintf(stderr, " `-a div' do not work not for long genomes.\n\n");
|
||||
return 1;
|
||||
}
|
||||
if (prefix == 0) {
|
||||
|
|
@ -242,7 +247,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
|||
strcpy(str2, prefix); strcat(str2, ".bwt");
|
||||
t = clock();
|
||||
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
||||
if (algo_type == 2) bwt_bwtgen(str, str2);
|
||||
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
||||
else if (algo_type == 1 || algo_type == 3) {
|
||||
bwt_t *bwt;
|
||||
bwt = bwt_pac2bwt(str, algo_type == 3);
|
||||
|
|
|
|||
Loading…
Reference in New Issue