2011-01-14 09:52:12 +08:00
|
|
|
/* The MIT License
|
|
|
|
|
|
2020-07-02 11:02:01 +08:00
|
|
|
Copyright (c) 2018- Dana-Farber Cancer Institute
|
|
|
|
|
2009-2018 Broad Institute, Inc.
|
|
|
|
|
2008-2009 Genome Research Ltd. (GRL)
|
2011-01-14 09:52:12 +08:00
|
|
|
|
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining
|
|
|
|
|
a copy of this software and associated documentation files (the
|
|
|
|
|
"Software"), to deal in the Software without restriction, including
|
|
|
|
|
without limitation the rights to use, copy, modify, merge, publish,
|
|
|
|
|
distribute, sublicense, and/or sell copies of the Software, and to
|
|
|
|
|
permit persons to whom the Software is furnished to do so, subject to
|
|
|
|
|
the following conditions:
|
|
|
|
|
|
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
|
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
|
|
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
|
|
|
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
|
|
|
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
|
|
|
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
|
|
|
|
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
|
|
|
|
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
|
|
|
|
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
|
|
|
SOFTWARE.
|
|
|
|
|
*/
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
|
#include <string.h>
|
|
|
|
|
#include <unistd.h>
|
|
|
|
|
#include <time.h>
|
|
|
|
|
#include <zlib.h>
|
|
|
|
|
#include "bntseq.h"
|
2016-02-14 07:48:46 +08:00
|
|
|
#include "bwa.h"
|
2011-01-14 09:52:12 +08:00
|
|
|
#include "bwt.h"
|
|
|
|
|
#include "utils.h"
|
2016-01-27 02:46:08 +08:00
|
|
|
#include "rle.h"
|
|
|
|
|
#include "rope.h"
|
2011-01-14 09:52:12 +08:00
|
|
|
|
2013-02-23 06:23:34 +08:00
|
|
|
#ifdef _DIVBWT
|
|
|
|
|
#include "divsufsort.h"
|
|
|
|
|
#endif
|
2011-01-14 09:52:12 +08:00
|
|
|
|
2013-05-02 22:12:01 +08:00
|
|
|
#ifdef USE_MALLOC_WRAPPERS
|
|
|
|
|
# include "malloc_wrap.h"
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
2013-02-23 06:23:34 +08:00
|
|
|
int is_bwt(ubyte_t *T, int n);
|
|
|
|
|
|
|
|
|
|
int64_t bwa_seq_len(const char *fn_pac)
|
|
|
|
|
{
|
|
|
|
|
FILE *fp;
|
|
|
|
|
int64_t pac_len;
|
|
|
|
|
ubyte_t c;
|
|
|
|
|
fp = xopen(fn_pac, "rb");
|
2013-03-01 17:37:46 +08:00
|
|
|
err_fseek(fp, -1, SEEK_END);
|
|
|
|
|
pac_len = err_ftell(fp);
|
|
|
|
|
err_fread_noeof(&c, 1, 1, fp);
|
|
|
|
|
err_fclose(fp);
|
2013-02-23 06:23:34 +08:00
|
|
|
return (pac_len - 1) * 4 + (int)c;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bwt_t *bwt_pac2bwt(const char *fn_pac, int use_is)
|
2011-01-14 09:52:12 +08:00
|
|
|
{
|
2013-02-23 06:23:34 +08:00
|
|
|
bwt_t *bwt;
|
|
|
|
|
ubyte_t *buf, *buf2;
|
2016-01-27 12:21:23 +08:00
|
|
|
int64_t i, pac_size;
|
2013-02-23 06:23:34 +08:00
|
|
|
FILE *fp;
|
|
|
|
|
|
|
|
|
|
// initialization
|
2013-05-02 22:12:01 +08:00
|
|
|
bwt = (bwt_t*)calloc(1, sizeof(bwt_t));
|
2013-02-23 06:23:34 +08:00
|
|
|
bwt->seq_len = bwa_seq_len(fn_pac);
|
|
|
|
|
bwt->bwt_size = (bwt->seq_len + 15) >> 4;
|
|
|
|
|
fp = xopen(fn_pac, "rb");
|
|
|
|
|
|
|
|
|
|
// prepare sequence
|
|
|
|
|
pac_size = (bwt->seq_len>>2) + ((bwt->seq_len&3) == 0? 0 : 1);
|
2013-05-02 22:12:01 +08:00
|
|
|
buf2 = (ubyte_t*)calloc(pac_size, 1);
|
2013-03-01 17:37:46 +08:00
|
|
|
err_fread_noeof(buf2, 1, pac_size, fp);
|
|
|
|
|
err_fclose(fp);
|
2013-02-23 06:23:34 +08:00
|
|
|
memset(bwt->L2, 0, 5 * 4);
|
2013-05-02 22:12:01 +08:00
|
|
|
buf = (ubyte_t*)calloc(bwt->seq_len + 1, 1);
|
2013-02-23 06:23:34 +08:00
|
|
|
for (i = 0; i < bwt->seq_len; ++i) {
|
|
|
|
|
buf[i] = buf2[i>>2] >> ((3 - (i&3)) << 1) & 3;
|
|
|
|
|
++bwt->L2[1+buf[i]];
|
|
|
|
|
}
|
|
|
|
|
for (i = 2; i <= 4; ++i) bwt->L2[i] += bwt->L2[i-1];
|
|
|
|
|
free(buf2);
|
|
|
|
|
|
|
|
|
|
// Burrows-Wheeler Transform
|
|
|
|
|
if (use_is) {
|
|
|
|
|
bwt->primary = is_bwt(buf, bwt->seq_len);
|
|
|
|
|
} else {
|
2016-01-27 02:46:08 +08:00
|
|
|
rope_t *r;
|
|
|
|
|
int64_t x;
|
|
|
|
|
rpitr_t itr;
|
|
|
|
|
const uint8_t *blk;
|
|
|
|
|
|
|
|
|
|
r = rope_init(ROPE_DEF_MAX_NODES, ROPE_DEF_BLOCK_LEN);
|
|
|
|
|
for (i = bwt->seq_len - 1, x = 0; i >= 0; --i) {
|
|
|
|
|
int c = buf[i] + 1;
|
|
|
|
|
x = rope_insert_run(r, x, c, 1, 0) + 1;
|
|
|
|
|
while (--c >= 0) x += r->c[c];
|
|
|
|
|
}
|
|
|
|
|
bwt->primary = x;
|
|
|
|
|
rope_itr_first(r, &itr);
|
|
|
|
|
x = 0;
|
|
|
|
|
while ((blk = rope_itr_next_block(&itr)) != 0) {
|
|
|
|
|
const uint8_t *q = blk + 2, *end = blk + 2 + *rle_nptr(blk);
|
|
|
|
|
while (q < end) {
|
|
|
|
|
int c = 0;
|
|
|
|
|
int64_t l;
|
|
|
|
|
rle_dec1(q, c, l);
|
|
|
|
|
for (i = 0; i < l; ++i)
|
|
|
|
|
buf[x++] = c - 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
rope_destroy(r);
|
2013-02-23 06:23:34 +08:00
|
|
|
}
|
2017-04-25 20:51:52 +08:00
|
|
|
bwt->bwt = (uint32_t*)calloc(bwt->bwt_size, 4);
|
2013-02-23 06:23:34 +08:00
|
|
|
for (i = 0; i < bwt->seq_len; ++i)
|
|
|
|
|
bwt->bwt[i>>4] |= buf[i] << ((15 - (i&15)) << 1);
|
|
|
|
|
free(buf);
|
|
|
|
|
return bwt;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int bwa_pac2bwt(int argc, char *argv[]) // the "pac2bwt" command; IMPORTANT: bwt generated at this step CANNOT be used with BWA. bwtupdate is required!
|
|
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
int c, use_is = 1;
|
|
|
|
|
while ((c = getopt(argc, argv, "d")) >= 0) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'd': use_is = 0; break;
|
|
|
|
|
default: return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 2 > argc) {
|
2024-04-02 07:42:37 +08:00
|
|
|
fprintf(stderr, "Usage: fastbwa pac2bwt [-d] <in.pac> <out.bwt>\n");
|
2013-02-23 06:23:34 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
bwt = bwt_pac2bwt(argv[optind], use_is);
|
|
|
|
|
bwt_dump_bwt(argv[optind+1], bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define bwt_B00(b, k) ((b)->bwt[(k)>>4]>>((~(k)&0xf)<<1)&3)
|
|
|
|
|
|
|
|
|
|
void bwt_bwtupdate_core(bwt_t *bwt)
|
|
|
|
|
{
|
|
|
|
|
bwtint_t i, k, c[4], n_occ;
|
|
|
|
|
uint32_t *buf;
|
|
|
|
|
|
|
|
|
|
n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
|
|
|
|
|
bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
|
2013-05-02 22:12:01 +08:00
|
|
|
buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
|
2013-02-23 06:23:34 +08:00
|
|
|
c[0] = c[1] = c[2] = c[3] = 0;
|
|
|
|
|
for (i = k = 0; i < bwt->seq_len; ++i) {
|
|
|
|
|
if (i % OCC_INTERVAL == 0) {
|
|
|
|
|
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
|
|
|
|
|
k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
|
|
|
|
|
}
|
|
|
|
|
if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
|
|
|
|
|
++c[bwt_B00(bwt, i)];
|
|
|
|
|
}
|
|
|
|
|
// the last element
|
|
|
|
|
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
|
|
|
|
|
xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
|
|
|
|
|
// update bwt
|
|
|
|
|
free(bwt->bwt); bwt->bwt = buf;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int bwa_bwtupdate(int argc, char *argv[]) // the "bwtupdate" command
|
2011-01-14 09:52:12 +08:00
|
|
|
{
|
2013-02-23 06:23:34 +08:00
|
|
|
bwt_t *bwt;
|
2015-07-12 20:59:23 +08:00
|
|
|
if (argc != 2) {
|
2024-04-02 07:42:37 +08:00
|
|
|
fprintf(stderr, "Usage: fastbwa bwtupdate <the.bwt>\n");
|
2013-02-23 06:23:34 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
bwt = bwt_restore_bwt(argv[1]);
|
|
|
|
|
bwt_bwtupdate_core(bwt);
|
|
|
|
|
bwt_dump_bwt(argv[1], bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int bwa_bwt2sa(int argc, char *argv[]) // the "bwt2sa" command
|
|
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
int c, sa_intv = 32;
|
|
|
|
|
while ((c = getopt(argc, argv, "i:")) >= 0) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'i': sa_intv = atoi(optarg); break;
|
|
|
|
|
default: return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 2 > argc) {
|
2024-04-02 07:42:37 +08:00
|
|
|
fprintf(stderr, "Usage: fastbwa bwt2sa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
|
2013-02-23 06:23:34 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
bwt = bwt_restore_bwt(argv[optind]);
|
|
|
|
|
bwt_cal_sa(bwt, sa_intv);
|
|
|
|
|
bwt_dump_sa(argv[optind+1], bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-02 07:42:37 +08:00
|
|
|
int bwa_bwt2bytesa(int argc, char *argv[]) // the "bwt2bytesa" command
|
2024-02-22 01:26:57 +08:00
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
int c, sa_intv = 32;
|
|
|
|
|
while ((c = getopt(argc, argv, "i:")) >= 0) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'i': sa_intv = atoi(optarg); break;
|
|
|
|
|
default: return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (optind + 2 > argc) {
|
2024-04-02 07:42:37 +08:00
|
|
|
fprintf(stderr, "Usage: fastbwa bwt2bytesa [-i %d] <in.bwt> <out.sa>\n", sa_intv);
|
2024-02-22 01:26:57 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
bwt = bwt_restore_bwt(argv[optind]);
|
|
|
|
|
bwt_cal_byte_sa(bwt, sa_intv);
|
|
|
|
|
bwt_dump_byte_sa(argv[optind+1], bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2024-04-02 07:42:37 +08:00
|
|
|
int bwa_bwt2fmt(int argc, char *argv[]) // create fmt index
|
|
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
2024-04-02 07:49:38 +08:00
|
|
|
//char buf[1024];
|
2024-04-02 07:42:37 +08:00
|
|
|
if (optind + 1 > argc) {
|
|
|
|
|
fprintf(stderr, "Usage: fastbwa bwt2fmt <in.bwt> <out.fmt>\n");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
bwt = bwt_restore_bwt(argv[optind]);
|
|
|
|
|
FMTIndex *fmt;
|
|
|
|
|
fmt = create_fmt_from_bwt(bwt);
|
|
|
|
|
dump_fmt(argv[optind + 1], fmt);
|
|
|
|
|
// sprintf(buf, "%s.kmer", argv[optind + 1]);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int bwa_build_kmer(int argc, char *argv[])
|
|
|
|
|
{
|
|
|
|
|
char buf[1024];
|
|
|
|
|
if (optind + 1 > argc)
|
|
|
|
|
{
|
|
|
|
|
fprintf(stderr, "Usage: fastbwa build_kmerhash <in.fmt> <out.kmerhash>\n");
|
|
|
|
|
return 1;
|
|
|
|
|
}
|
|
|
|
|
FMTIndex *fmt = fmt_restore_fmt(argv[optind]);
|
|
|
|
|
fmt_create_kmer_index(fmt);
|
|
|
|
|
sprintf(buf, "%s", argv[optind + 1]);
|
|
|
|
|
fmt_dump_kmer_idx(buf, &fmt->kmer_hash);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
2013-02-23 06:23:34 +08:00
|
|
|
int bwa_index(int argc, char *argv[]) // the "index" command
|
|
|
|
|
{
|
2016-02-14 07:48:46 +08:00
|
|
|
int c, algo_type = BWTALGO_AUTO, is_64 = 0, block_size = 10000000;
|
|
|
|
|
char *prefix = 0, *str;
|
2014-08-25 22:31:54 +08:00
|
|
|
while ((c = getopt(argc, argv, "6a:p:b:")) >= 0) {
|
2011-01-14 09:52:12 +08:00
|
|
|
switch (c) {
|
2011-06-10 05:33:25 +08:00
|
|
|
case 'a': // if -a is not set, algo_type will be determined later
|
2016-02-14 07:48:46 +08:00
|
|
|
if (strcmp(optarg, "rb2") == 0) algo_type = BWTALGO_RB2;
|
|
|
|
|
else if (strcmp(optarg, "bwtsw") == 0) algo_type = BWTALGO_BWTSW;
|
|
|
|
|
else if (strcmp(optarg, "is") == 0) algo_type = BWTALGO_IS;
|
2011-01-14 09:52:12 +08:00
|
|
|
else err_fatal(__func__, "unknown algorithm: '%s'.", optarg);
|
|
|
|
|
break;
|
2013-05-02 22:12:01 +08:00
|
|
|
case 'p': prefix = strdup(optarg); break;
|
2012-03-30 00:31:01 +08:00
|
|
|
case '6': is_64 = 1; break;
|
2014-08-25 22:31:54 +08:00
|
|
|
case 'b':
|
|
|
|
|
block_size = strtol(optarg, &str, 10);
|
|
|
|
|
if (*str == 'G' || *str == 'g') block_size *= 1024 * 1024 * 1024;
|
|
|
|
|
else if (*str == 'M' || *str == 'm') block_size *= 1024 * 1024;
|
|
|
|
|
else if (*str == 'K' || *str == 'k') block_size *= 1024;
|
2014-08-25 22:36:24 +08:00
|
|
|
break;
|
2011-01-14 09:52:12 +08:00
|
|
|
default: return 1;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (optind + 1 > argc) {
|
|
|
|
|
fprintf(stderr, "\n");
|
2024-04-02 07:42:37 +08:00
|
|
|
fprintf(stderr, "Usage: fastbwa index [options] <in.fasta>\n\n");
|
2016-01-27 02:46:08 +08:00
|
|
|
fprintf(stderr, "Options: -a STR BWT construction algorithm: bwtsw, is or rb2 [auto]\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, " -p STR prefix of the index [same as fasta name]\n");
|
2014-08-25 22:36:24 +08:00
|
|
|
fprintf(stderr, " -b INT block size for the bwtsw algorithm (effective with -a bwtsw) [%d]\n", block_size);
|
2012-03-30 00:31:01 +08:00
|
|
|
fprintf(stderr, " -6 index files named as <in.fasta>.64.* instead of <in.fasta>.* \n");
|
2011-11-24 12:11:50 +08:00
|
|
|
fprintf(stderr, "\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
fprintf(stderr, "Warning: `-a bwtsw' does not work for short genomes, while `-a is' and\n");
|
2014-08-25 22:31:54 +08:00
|
|
|
fprintf(stderr, " `-a div' do not work not for long genomes.\n\n");
|
2011-01-14 09:52:12 +08:00
|
|
|
return 1;
|
|
|
|
|
}
|
2012-03-30 00:22:51 +08:00
|
|
|
if (prefix == 0) {
|
2013-05-02 22:12:01 +08:00
|
|
|
prefix = malloc(strlen(argv[optind]) + 4);
|
2012-03-30 00:22:51 +08:00
|
|
|
strcpy(prefix, argv[optind]);
|
2012-03-30 00:31:01 +08:00
|
|
|
if (is_64) strcat(prefix, ".64");
|
2012-03-30 00:22:51 +08:00
|
|
|
}
|
2016-02-14 07:48:46 +08:00
|
|
|
bwa_idx_build(argv[optind], prefix, algo_type, block_size);
|
|
|
|
|
free(prefix);
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int bwa_idx_build(const char *fa, const char *prefix, int algo_type, int block_size)
|
|
|
|
|
{
|
|
|
|
|
extern void bwa_pac_rev_core(const char *fn, const char *fn_rev);
|
|
|
|
|
|
|
|
|
|
char *str, *str2, *str3;
|
|
|
|
|
clock_t t;
|
|
|
|
|
int64_t l_pac;
|
|
|
|
|
|
2013-05-02 22:12:01 +08:00
|
|
|
str = (char*)calloc(strlen(prefix) + 10, 1);
|
|
|
|
|
str2 = (char*)calloc(strlen(prefix) + 10, 1);
|
|
|
|
|
str3 = (char*)calloc(strlen(prefix) + 10, 1);
|
2011-01-14 09:52:12 +08:00
|
|
|
|
2013-02-12 23:21:17 +08:00
|
|
|
{ // nucleotide indexing
|
2016-02-14 07:48:46 +08:00
|
|
|
gzFile fp = xzopen(fa, "r");
|
2011-01-14 09:52:12 +08:00
|
|
|
t = clock();
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack FASTA... ");
|
2011-10-22 00:03:14 +08:00
|
|
|
l_pac = bns_fasta2bntseq(fp, prefix, 0);
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
2013-01-04 00:57:37 +08:00
|
|
|
err_gzclose(fp);
|
2011-01-14 09:52:12 +08:00
|
|
|
}
|
2011-06-10 05:33:25 +08:00
|
|
|
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
|
2011-01-14 09:52:12 +08:00
|
|
|
{
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".pac");
|
|
|
|
|
strcpy(str2, prefix); strcat(str2, ".bwt");
|
|
|
|
|
t = clock();
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct BWT for the packed sequence...\n");
|
2014-08-25 22:31:54 +08:00
|
|
|
if (algo_type == 2) bwt_bwtgen2(str, str2, block_size);
|
2011-01-14 09:52:12 +08:00
|
|
|
else if (algo_type == 1 || algo_type == 3) {
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
bwt = bwt_pac2bwt(str, algo_type == 3);
|
|
|
|
|
bwt_dump_bwt(str2, bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
|
|
|
|
}
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] %.2f seconds elapse.\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
2011-01-14 09:52:12 +08:00
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".bwt");
|
|
|
|
|
t = clock();
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Update BWT... ");
|
2011-01-14 09:52:12 +08:00
|
|
|
bwt = bwt_restore_bwt(str);
|
|
|
|
|
bwt_bwtupdate_core(bwt);
|
|
|
|
|
bwt_dump_bwt(str, bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
2011-01-14 09:52:12 +08:00
|
|
|
}
|
2011-10-22 00:03:14 +08:00
|
|
|
{
|
2016-02-14 07:48:46 +08:00
|
|
|
gzFile fp = xzopen(fa, "r");
|
2011-10-22 00:03:14 +08:00
|
|
|
t = clock();
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Pack forward-only FASTA... ");
|
2011-10-22 00:03:14 +08:00
|
|
|
l_pac = bns_fasta2bntseq(fp, prefix, 1);
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
2013-01-04 00:57:37 +08:00
|
|
|
err_gzclose(fp);
|
2011-10-22 00:03:14 +08:00
|
|
|
}
|
2011-01-14 09:52:12 +08:00
|
|
|
{
|
|
|
|
|
bwt_t *bwt;
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".bwt");
|
|
|
|
|
strcpy(str3, prefix); strcat(str3, ".sa");
|
|
|
|
|
t = clock();
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "[bwa_index] Construct SA from BWT and Occ... ");
|
2011-01-14 09:52:12 +08:00
|
|
|
bwt = bwt_restore_bwt(str);
|
|
|
|
|
bwt_cal_sa(bwt, 32);
|
|
|
|
|
bwt_dump_sa(str3, bwt);
|
|
|
|
|
bwt_destroy(bwt);
|
2016-02-14 07:48:46 +08:00
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
2024-04-02 07:49:38 +08:00
|
|
|
{
|
|
|
|
|
t = clock();
|
2024-04-02 07:42:37 +08:00
|
|
|
// build FMT-Index
|
|
|
|
|
FMTIndex *fmt;
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".fmt");
|
|
|
|
|
fmt = create_fmt_from_bwt(bwt);
|
|
|
|
|
dump_fmt(str, fmt);
|
|
|
|
|
// create Kmer-Hash
|
2024-04-02 07:49:38 +08:00
|
|
|
fmt_create_kmer_index(fmt);
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".kmer");
|
|
|
|
|
fmt_dump_kmer_idx(str, &fmt->kmer_hash);
|
|
|
|
|
if (bwa_verbose >= 3) fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
|
|
|
|
|
// create byte sa
|
|
|
|
|
bwt_cal_byte_sa(bwt, 4);
|
|
|
|
|
strcpy(str, prefix); strcat(str, ".bytesa");
|
|
|
|
|
bwt_dump_byte_sa(str, bwt);
|
2024-04-02 07:42:37 +08:00
|
|
|
}
|
2011-01-14 09:52:12 +08:00
|
|
|
}
|
2016-02-14 07:48:46 +08:00
|
|
|
free(str3); free(str2); free(str);
|
2011-01-14 09:52:12 +08:00
|
|
|
return 0;
|
|
|
|
|
}
|