automatically choose the algorithm for BWT
This commit is contained in:
parent
a74523a68d
commit
72563c38f3
5
bntseq.c
5
bntseq.c
|
|
@ -163,7 +163,7 @@ void bns_destroy(bntseq_t *bns)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
|
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
|
||||||
{
|
{
|
||||||
kseq_t *seq;
|
kseq_t *seq;
|
||||||
char name[1024];
|
char name[1024];
|
||||||
|
|
@ -172,6 +172,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
|
||||||
int l_buf;
|
int l_buf;
|
||||||
unsigned char buf[0x10000];
|
unsigned char buf[0x10000];
|
||||||
int32_t m_seqs, m_holes, l, i;
|
int32_t m_seqs, m_holes, l, i;
|
||||||
|
int64_t ret = -1;
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
|
|
||||||
// initialization
|
// initialization
|
||||||
|
|
@ -235,6 +236,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
|
||||||
bns->l_pac += seq->seq.l;
|
bns->l_pac += seq->seq.l;
|
||||||
}
|
}
|
||||||
xassert(bns->l_pac, "zero length sequence.");
|
xassert(bns->l_pac, "zero length sequence.");
|
||||||
|
ret = bns->l_pac;
|
||||||
{ // finalize .pac file
|
{ // finalize .pac file
|
||||||
ubyte_t ct;
|
ubyte_t ct;
|
||||||
fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp);
|
fwrite(buf, 1, (l_buf>>2) + ((l_buf&3) == 0? 0 : 1), fp);
|
||||||
|
|
@ -251,6 +253,7 @@ void bns_fasta2bntseq(gzFile fp_fa, const char *prefix)
|
||||||
bns_dump(bns, prefix);
|
bns_dump(bns, prefix);
|
||||||
bns_destroy(bns);
|
bns_destroy(bns);
|
||||||
kseq_destroy(seq);
|
kseq_destroy(seq);
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
int bwa_fa2pac(int argc, char *argv[])
|
int bwa_fa2pac(int argc, char *argv[])
|
||||||
|
|
|
||||||
2
bntseq.h
2
bntseq.h
|
|
@ -70,7 +70,7 @@ extern "C" {
|
||||||
bntseq_t *bns_restore(const char *prefix);
|
bntseq_t *bns_restore(const char *prefix);
|
||||||
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
|
bntseq_t *bns_restore_core(const char *ann_filename, const char* amb_filename, const char* pac_filename);
|
||||||
void bns_destroy(bntseq_t *bns);
|
void bns_destroy(bntseq_t *bns);
|
||||||
void bns_fasta2bntseq(gzFile fp_fa, const char *prefix);
|
int64_t bns_fasta2bntseq(gzFile fp_fa, const char *prefix);
|
||||||
int bns_coor_pac2real(const bntseq_t *bns, int64_t pac_coor, int len, int32_t *real_seq);
|
int bns_coor_pac2real(const bntseq_t *bns, int64_t pac_coor, int len, int32_t *real_seq);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
|
|
|
||||||
14
bwtindex.c
14
bwtindex.c
|
|
@ -42,12 +42,13 @@ void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
||||||
int bwa_index(int argc, char *argv[])
|
int bwa_index(int argc, char *argv[])
|
||||||
{
|
{
|
||||||
char *prefix = 0, *str, *str2, *str3;
|
char *prefix = 0, *str, *str2, *str3;
|
||||||
int c, algo_type = 3, is_color = 0;
|
int c, algo_type = 0, is_color = 0;
|
||||||
clock_t t;
|
clock_t t;
|
||||||
|
int64_t l_pac;
|
||||||
|
|
||||||
while ((c = getopt(argc, argv, "ca:p:")) >= 0) {
|
while ((c = getopt(argc, argv, "ca:p:")) >= 0) {
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case 'a':
|
case 'a': // if -a is not set, algo_type will be determined later
|
||||||
if (strcmp(optarg, "div") == 0) algo_type = 1;
|
if (strcmp(optarg, "div") == 0) algo_type = 1;
|
||||||
else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
|
else if (strcmp(optarg, "bwtsw") == 0) algo_type = 2;
|
||||||
else if (strcmp(optarg, "is") == 0) algo_type = 3;
|
else if (strcmp(optarg, "is") == 0) algo_type = 3;
|
||||||
|
|
@ -79,7 +80,7 @@ int bwa_index(int argc, char *argv[])
|
||||||
gzFile fp = xzopen(argv[optind], "r");
|
gzFile fp = xzopen(argv[optind], "r");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
||||||
bns_fasta2bntseq(fp, prefix);
|
l_pac = bns_fasta2bntseq(fp, prefix);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
gzclose(fp);
|
gzclose(fp);
|
||||||
} else { // color indexing
|
} else { // color indexing
|
||||||
|
|
@ -87,7 +88,7 @@ int bwa_index(int argc, char *argv[])
|
||||||
strcat(strcpy(str, prefix), ".nt");
|
strcat(strcpy(str, prefix), ".nt");
|
||||||
t = clock();
|
t = clock();
|
||||||
fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
|
fprintf(stderr, "[bwa_index] Pack nucleotide FASTA... ");
|
||||||
bns_fasta2bntseq(fp, str);
|
l_pac = bns_fasta2bntseq(fp, str);
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
gzclose(fp);
|
gzclose(fp);
|
||||||
{
|
{
|
||||||
|
|
@ -99,6 +100,11 @@ int bwa_index(int argc, char *argv[])
|
||||||
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (l_pac > 0xffffffffu) {
|
||||||
|
fprintf(stderr, "[%s] BWA only works with reference sequences shorter than 4GB in total. Abort!\n", __func__);
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
||||||
{
|
{
|
||||||
strcpy(str, prefix); strcat(str, ".pac");
|
strcpy(str, prefix); strcat(str, ".pac");
|
||||||
strcpy(str2, prefix); strcat(str2, ".rpac");
|
strcpy(str2, prefix); strcat(str2, ".rpac");
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue