r807: allow to change block size in bwt_gen
For a very large reference genome, the default is too small.
This commit is contained in:
parent
b73f4c977a
commit
1bba5ef20e
1
bwt.h
1
bwt.h
|
|
@ -92,6 +92,7 @@ extern "C" {
|
||||||
void bwt_destroy(bwt_t *bwt);
|
void bwt_destroy(bwt_t *bwt);
|
||||||
|
|
||||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
|
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt); // from BWT-SW
|
||||||
|
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size); // from BWT-SW
|
||||||
void bwt_cal_sa(bwt_t *bwt, int intv);
|
void bwt_cal_sa(bwt_t *bwt, int intv);
|
||||||
|
|
||||||
void bwt_bwtupdate_core(bwt_t *bwt);
|
void bwt_bwtupdate_core(bwt_t *bwt);
|
||||||
|
|
|
||||||
|
|
@ -1598,15 +1598,20 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
void bwt_bwtgen2(const char *fn_pac, const char *fn_bwt, int block_size)
|
||||||
{
|
{
|
||||||
BWTInc *bwtInc;
|
BWTInc *bwtInc;
|
||||||
bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
|
bwtInc = BWTIncConstructFromPacked(fn_pac, block_size, block_size);
|
||||||
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
|
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
|
||||||
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
|
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
|
||||||
BWTIncFree(bwtInc);
|
BWTIncFree(bwtInc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
||||||
|
{
|
||||||
|
bwt_bwtgen2(fn_pac, fn_bwt, 10000000);
|
||||||
|
}
|
||||||
|
|
||||||
int bwt_bwtgen_main(int argc, char *argv[])
|
int bwt_bwtgen_main(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
if (argc < 3) {
|
if (argc < 3) {
|
||||||
|
|
|
||||||
17
bwtindex.c
17
bwtindex.c
|
|
@ -189,11 +189,11 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||||
|
|
||||||
char *prefix = 0, *str, *str2, *str3;
|
char *prefix = 0, *str, *str2, *str3;
|
||||||
int c, algo_type = 0, is_64 = 0;
|
int c, algo_type = 0, is_64 = 0, block_size = 10000000;
|
||||||
clock_t t;
|
clock_t t;
|
||||||
int64_t l_pac;
|
int64_t l_pac;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "6a:p:")) >= 0) {
|
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'a': // if -a is not set, algo_type will be determined later
|
case 'a': // if -a is not set, algo_type will be determined later
|
||||||
if (strcmp(optarg, "div") == 0) algo_type = 1;
|
if (strcmp(optarg, "div") == 0) algo_type = 1;
|
||||||
|
|
@ -203,20 +203,25 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
break;
|
break;
|
||||||
case 'p': prefix = strdup(optarg); break;
|
case 'p': prefix = strdup(optarg); break;
|
||||||
case '6': is_64 = 1; break;
|
case '6': is_64 = 1; break;
|
||||||
|
case 'b':
|
||||||
|
block_size = strtol(optarg, &str, 10);
|
||||||
|
if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024;
|
||||||
|
else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024;
|
||||||
|
else if (*str == 'K' || *str == 'k') block_size *= 1024;
|
||||||
default: return 1;
|
default: return 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (optind + 1 > argc) {
|
if (optind + 1 > argc) {
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "Usage: bwa index [-a bwtsw|is] [-c] <in.fasta>\n\n");
|
fprintf(stderr, "Usage: bwa index [options] <in.fasta>\n\n");
|
||||||
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
|
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw or is [auto]\n");
|
||||||
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
|
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
|
||||||
|
fprintf(stderr, " -b INT block size for the bwtsw algorithm (force -a bwtsw) [%d]\n", block_size);
|
||||||
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
|
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
|
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
|
||||||
fprintf(stderr, " `-a div' do not work not for long genomes. Please choose `-a'\n");
|
fprintf(stderr, " `-a div' do not work not for long genomes.\n\n");
|
||||||
fprintf(stderr, " according to the length of the genome.\n\n");
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
if (prefix == 0) {
|
if (prefix == 0) {
|
||||||
|
|
@ -242,7 +247,7 @@ int bwa_index(int argc, char *argv[]) // the "index" command
|
||||||
strcpy(str2, prefix); strcat(str2, ".bwt");
|
strcpy(str2, prefix); strcat(str2, ".bwt");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
||||||
if (algo_type == 2) bwt_bwtgen(str, str2);
|
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
||||||
else if (algo_type == 1 || algo_type == 3) {
|
else if (algo_type == 1 || algo_type == 3) {
|
||||||
bwt_t *bwt;
|
bwt_t *bwt;
|
||||||
bwt = bwt_pac2bwt(str, algo_type == 3);
|
bwt = bwt_pac2bwt(str, algo_type == 3);
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue