make changes to bwt.c
This commit is contained in:
parent
b6d807b0b7
commit
c948c647a0
25
bwt.c
25
bwt.c
|
|
@ -97,8 +97,8 @@ inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
|
|||
if (k >= bwt->primary) --k; // because $ is not in bwt
|
||||
|
||||
// retrieve Occ at k/OCC_INTERVAL
|
||||
n = (p = bwt_occ_intv(bwt, k))[c];
|
||||
p += 4; // jump to the start of the first BWT cell
|
||||
n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
|
||||
p += sizeof(bwtint_t); // jump to the start of the first BWT cell
|
||||
|
||||
// calculate Occ up to the last k/32
|
||||
j = k >> 5 << 5;
|
||||
|
|
@ -116,10 +116,6 @@ inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c)
|
|||
inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol)
|
||||
{
|
||||
bwtint_t _k, _l;
|
||||
if (k == l) {
|
||||
*ok = *ol = bwt_occ(bwt, k, c);
|
||||
return;
|
||||
}
|
||||
_k = (k >= bwt->primary)? k-1 : k;
|
||||
_l = (l >= bwt->primary)? l-1 : l;
|
||||
if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
|
||||
|
|
@ -130,8 +126,8 @@ inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint
|
|||
uint32_t *p;
|
||||
if (k >= bwt->primary) --k;
|
||||
if (l >= bwt->primary) --l;
|
||||
n = (p = bwt_occ_intv(bwt, k))[c];
|
||||
p += 4;
|
||||
n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c];
|
||||
p += sizeof(bwtint_t);
|
||||
// calculate *ok
|
||||
j = k >> 5 << 5;
|
||||
for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2)
|
||||
|
|
@ -164,8 +160,8 @@ inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
|
|||
}
|
||||
if (k >= bwt->primary) --k; // because $ is not in bwt
|
||||
p = bwt_occ_intv(bwt, k);
|
||||
memcpy(cnt, p, 16);
|
||||
p += 4;
|
||||
memcpy(cnt, p, 4 * sizeof(bwtint_t));
|
||||
p += sizeof(bwtint_t);
|
||||
j = k >> 4 << 4;
|
||||
for (l = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; l < j; l += 16, ++p)
|
||||
x += __occ_aux4(bwt, *p);
|
||||
|
|
@ -177,11 +173,6 @@ inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4])
|
|||
inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4])
|
||||
{
|
||||
bwtint_t _k, _l;
|
||||
if (k == l) {
|
||||
bwt_occ4(bwt, k, cntk);
|
||||
memcpy(cntl, cntk, 4 * sizeof(bwtint_t));
|
||||
return;
|
||||
}
|
||||
_k = (k >= bwt->primary)? k-1 : k;
|
||||
_l = (l >= bwt->primary)? l-1 : l;
|
||||
if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) {
|
||||
|
|
@ -190,13 +181,11 @@ inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4]
|
|||
} else {
|
||||
bwtint_t i, j, x, y;
|
||||
uint32_t *p;
|
||||
int cl[4];
|
||||
if (k >= bwt->primary) --k; // because $ is not in bwt
|
||||
if (l >= bwt->primary) --l;
|
||||
cl[0] = cl[1] = cl[2] = cl[3] = 0;
|
||||
p = bwt_occ_intv(bwt, k);
|
||||
memcpy(cntk, p, 4 * sizeof(bwtint_t));
|
||||
p += 4;
|
||||
p += sizeof(bwtint_t);
|
||||
// prepare cntk[]
|
||||
j = k >> 4 << 4;
|
||||
for (i = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; i < j; i += 16, ++p)
|
||||
|
|
|
|||
16
bwt.h
16
bwt.h
|
|
@ -30,14 +30,15 @@
|
|||
|
||||
#include <stdint.h>
|
||||
|
||||
// requirement: (OCC_INTERVAL%16 == 0)
|
||||
// requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line
|
||||
#define OCC_INTERVAL 0x80
|
||||
|
||||
#ifndef BWA_UBYTE
|
||||
#define BWA_UBYTE
|
||||
typedef unsigned char ubyte_t;
|
||||
#endif
|
||||
typedef uint32_t bwtint_t;
|
||||
|
||||
typedef uint64_t bwtint_t;
|
||||
|
||||
typedef struct {
|
||||
bwtint_t primary; // S^{-1}(0), or the primary index of BWT
|
||||
|
|
@ -53,15 +54,20 @@ typedef struct {
|
|||
bwtint_t *sa;
|
||||
} bwt_t;
|
||||
|
||||
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL*12 + 4 + (k)%OCC_INTERVAL/16])
|
||||
/* For general OCC_INTERVAL, the following is correct:
|
||||
#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16])
|
||||
#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4)
|
||||
*/
|
||||
|
||||
// The following two lines are ONLY correct when OCC_INTERVAL==0x80
|
||||
#define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)])
|
||||
#define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4))
|
||||
|
||||
/* retrieve a character from the $-removed BWT string. Note that
|
||||
* bwt_t::bwt is not exactly the BWT string and therefore this macro is
|
||||
* called bwt_B0 instead of bwt_B */
|
||||
#define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3)
|
||||
|
||||
#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL*12)
|
||||
|
||||
// inverse Psi function
|
||||
#define bwt_invPsi(bwt, k) \
|
||||
(((k) == (bwt)->primary)? 0 : \
|
||||
|
|
|
|||
|
|
@ -1545,7 +1545,7 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
|
|||
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
|
||||
{
|
||||
BWTInc *bwtInc;
|
||||
bwtInc = BWTIncConstructFromPacked(fn_pac, 4.4, 10000000, 10000000);
|
||||
bwtInc = BWTIncConstructFromPacked(fn_pac, 3.7, 10000000, 10000000);
|
||||
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
|
||||
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
|
||||
BWTIncFree(bwtInc);
|
||||
|
|
|
|||
2
bwtaln.h
2
bwtaln.h
|
|
@ -51,8 +51,8 @@ typedef uint16_t bwa_cigar_t;
|
|||
#define __cigar_create(__op, __len) ((__op)<<CIGAR_OP_SHIFT | (__len))
|
||||
|
||||
typedef struct {
|
||||
uint32_t pos;
|
||||
uint32_t n_cigar:15, gap:8, mm:8, strand:1;
|
||||
bwtint_t pos;
|
||||
bwa_cigar_t *cigar;
|
||||
} bwt_multi1_t;
|
||||
|
||||
|
|
|
|||
2
bwtio.c
2
bwtio.c
|
|
@ -10,7 +10,7 @@ void bwt_dump_bwt(const char *fn, const bwt_t *bwt)
|
|||
fp = xopen(fn, "wb");
|
||||
fwrite(&bwt->primary, sizeof(bwtint_t), 1, fp);
|
||||
fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp);
|
||||
fwrite(bwt->bwt, sizeof(bwtint_t), bwt->bwt_size, fp);
|
||||
fwrite(bwt->bwt, 4, bwt->bwt_size, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,20 +125,20 @@ void bwt_bwtupdate_core(bwt_t *bwt)
|
|||
uint32_t *buf;
|
||||
|
||||
n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1;
|
||||
bwt->bwt_size += n_occ * 4; // the new size
|
||||
bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size
|
||||
buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt
|
||||
c[0] = c[1] = c[2] = c[3] = 0;
|
||||
for (i = k = 0; i < bwt->seq_len; ++i) {
|
||||
if (i % OCC_INTERVAL == 0) {
|
||||
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
|
||||
k += 4;
|
||||
k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4)
|
||||
}
|
||||
if (i % 16 == 0) buf[k++] = bwt->bwt[i/16];
|
||||
if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2
|
||||
++c[bwt_B00(bwt, i)];
|
||||
}
|
||||
// the last element
|
||||
memcpy(buf + k, c, sizeof(bwtint_t) * 4);
|
||||
xassert(k + 4 == bwt->bwt_size, "inconsistent bwt_size");
|
||||
xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size");
|
||||
// update bwt
|
||||
free(bwt->bwt); bwt->bwt = buf;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -475,7 +475,7 @@ bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *qu
|
|||
// get Occ for the DAG
|
||||
bwtl_2occ4(target, v->tk - 1, v->tl, tcntk, tcntl);
|
||||
for (tj = 0; tj != 4; ++tj) { // descend to the children
|
||||
uint32_t qcntk[4], qcntl[4];
|
||||
bwtint_t qcntk[4], qcntl[4];
|
||||
int qj, *curr_score_mat = score_mat + tj * 4;
|
||||
khiter_t iter;
|
||||
bsw2entry_t *u;
|
||||
|
|
|
|||
Loading…
Reference in New Issue