From c948c647a0b4fad187db4c29cca2a6c2e332ae82 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Wed, 19 Oct 2011 17:42:42 -0400 Subject: [PATCH] make changes to bwt.c --- bwt.c | 25 +++++++------------------ bwt.h | 16 +++++++++++----- bwt_gen/bwt_gen.c | 2 +- bwtaln.h | 2 +- bwtio.c | 2 +- bwtmisc.c | 8 ++++---- bwtsw2_core.c | 2 +- 7 files changed, 26 insertions(+), 31 deletions(-) diff --git a/bwt.c b/bwt.c index 10b838a..390e90e 100644 --- a/bwt.c +++ b/bwt.c @@ -97,8 +97,8 @@ inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c) if (k >= bwt->primary) --k; // because $ is not in bwt // retrieve Occ at k/OCC_INTERVAL - n = (p = bwt_occ_intv(bwt, k))[c]; - p += 4; // jump to the start of the first BWT cell + n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c]; + p += sizeof(bwtint_t); // jump to the start of the first BWT cell // calculate Occ up to the last k/32 j = k >> 5 << 5; @@ -116,10 +116,6 @@ inline bwtint_t bwt_occ(const bwt_t *bwt, bwtint_t k, ubyte_t c) inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint_t *ok, bwtint_t *ol) { bwtint_t _k, _l; - if (k == l) { - *ok = *ol = bwt_occ(bwt, k, c); - return; - } _k = (k >= bwt->primary)? k-1 : k; _l = (l >= bwt->primary)? l-1 : l; if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) { @@ -130,8 +126,8 @@ inline void bwt_2occ(const bwt_t *bwt, bwtint_t k, bwtint_t l, ubyte_t c, bwtint uint32_t *p; if (k >= bwt->primary) --k; if (l >= bwt->primary) --l; - n = (p = bwt_occ_intv(bwt, k))[c]; - p += 4; + n = ((bwtint_t*)(p = bwt_occ_intv(bwt, k)))[c]; + p += sizeof(bwtint_t); // calculate *ok j = k >> 5 << 5; for (i = k/OCC_INTERVAL*OCC_INTERVAL; i < j; i += 32, p += 2) @@ -164,8 +160,8 @@ inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]) } if (k >= bwt->primary) --k; // because $ is not in bwt p = bwt_occ_intv(bwt, k); - memcpy(cnt, p, 16); - p += 4; + memcpy(cnt, p, 4 * sizeof(bwtint_t)); + p += sizeof(bwtint_t); j = k >> 4 << 4; for (l = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; l < j; l += 16, ++p) x += __occ_aux4(bwt, *p); @@ -177,11 +173,6 @@ inline void bwt_occ4(const bwt_t *bwt, bwtint_t k, bwtint_t cnt[4]) inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4], bwtint_t cntl[4]) { bwtint_t _k, _l; - if (k == l) { - bwt_occ4(bwt, k, cntk); - memcpy(cntl, cntk, 4 * sizeof(bwtint_t)); - return; - } _k = (k >= bwt->primary)? k-1 : k; _l = (l >= bwt->primary)? l-1 : l; if (_l/OCC_INTERVAL != _k/OCC_INTERVAL || k == (bwtint_t)(-1) || l == (bwtint_t)(-1)) { @@ -190,13 +181,11 @@ inline void bwt_2occ4(const bwt_t *bwt, bwtint_t k, bwtint_t l, bwtint_t cntk[4] } else { bwtint_t i, j, x, y; uint32_t *p; - int cl[4]; if (k >= bwt->primary) --k; // because $ is not in bwt if (l >= bwt->primary) --l; - cl[0] = cl[1] = cl[2] = cl[3] = 0; p = bwt_occ_intv(bwt, k); memcpy(cntk, p, 4 * sizeof(bwtint_t)); - p += 4; + p += sizeof(bwtint_t); // prepare cntk[] j = k >> 4 << 4; for (i = k / OCC_INTERVAL * OCC_INTERVAL, x = 0; i < j; i += 16, ++p) diff --git a/bwt.h b/bwt.h index 4aef38d..2f12be6 100644 --- a/bwt.h +++ b/bwt.h @@ -30,14 +30,15 @@ #include -// requirement: (OCC_INTERVAL%16 == 0) +// requirement: (OCC_INTERVAL%16 == 0); please DO NOT change this line #define OCC_INTERVAL 0x80 #ifndef BWA_UBYTE #define BWA_UBYTE typedef unsigned char ubyte_t; #endif -typedef uint32_t bwtint_t; + +typedef uint64_t bwtint_t; typedef struct { bwtint_t primary; // S^{-1}(0), or the primary index of BWT @@ -53,15 +54,20 @@ typedef struct { bwtint_t *sa; } bwt_t; -#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL*12 + 4 + (k)%OCC_INTERVAL/16]) +/* For general OCC_INTERVAL, the following is correct: +#define bwt_bwt(b, k) ((b)->bwt[(k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) + sizeof(bwtint_t)/4*4 + (k)%OCC_INTERVAL/16]) +#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL * (OCC_INTERVAL/(sizeof(uint32_t)*8/2) + sizeof(bwtint_t)/4*4) +*/ + +// The following two lines are ONLY correct when OCC_INTERVAL==0x80 +#define bwt_bwt(b, k) ((b)->bwt[((k)>>7<<4) + sizeof(bwtint_t) + (((k)&0x7f)>>4)]) +#define bwt_occ_intv(b, k) ((b)->bwt + ((k)>>7<<4)) /* retrieve a character from the $-removed BWT string. Note that * bwt_t::bwt is not exactly the BWT string and therefore this macro is * called bwt_B0 instead of bwt_B */ #define bwt_B0(b, k) (bwt_bwt(b, k)>>((~(k)&0xf)<<1)&3) -#define bwt_occ_intv(b, k) ((b)->bwt + (k)/OCC_INTERVAL*12) - // inverse Psi function #define bwt_invPsi(bwt, k) \ (((k) == (bwt)->primary)? 0 : \ diff --git a/bwt_gen/bwt_gen.c b/bwt_gen/bwt_gen.c index 0dcc7f4..5caaf5f 100644 --- a/bwt_gen/bwt_gen.c +++ b/bwt_gen/bwt_gen.c @@ -1545,7 +1545,7 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) { BWTInc *bwtInc; - bwtInc = BWTIncConstructFromPacked(fn_pac, 4.4, 10000000, 10000000); + bwtInc = BWTIncConstructFromPacked(fn_pac, 3.7, 10000000, 10000000); printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone); BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0); BWTIncFree(bwtInc); diff --git a/bwtaln.h b/bwtaln.h index c2faa98..0dc7363 100644 --- a/bwtaln.h +++ b/bwtaln.h @@ -51,8 +51,8 @@ typedef uint16_t bwa_cigar_t; #define __cigar_create(__op, __len) ((__op)<primary, sizeof(bwtint_t), 1, fp); fwrite(bwt->L2+1, sizeof(bwtint_t), 4, fp); - fwrite(bwt->bwt, sizeof(bwtint_t), bwt->bwt_size, fp); + fwrite(bwt->bwt, 4, bwt->bwt_size, fp); fclose(fp); } diff --git a/bwtmisc.c b/bwtmisc.c index 1082065..8d20287 100644 --- a/bwtmisc.c +++ b/bwtmisc.c @@ -125,20 +125,20 @@ void bwt_bwtupdate_core(bwt_t *bwt) uint32_t *buf; n_occ = (bwt->seq_len + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; - bwt->bwt_size += n_occ * 4; // the new size + bwt->bwt_size += n_occ * sizeof(bwtint_t); // the new size buf = (uint32_t*)calloc(bwt->bwt_size, 4); // will be the new bwt c[0] = c[1] = c[2] = c[3] = 0; for (i = k = 0; i < bwt->seq_len; ++i) { if (i % OCC_INTERVAL == 0) { memcpy(buf + k, c, sizeof(bwtint_t) * 4); - k += 4; + k += sizeof(bwtint_t); // in fact: sizeof(bwtint_t)=4*(sizeof(bwtint_t)/4) } - if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; + if (i % 16 == 0) buf[k++] = bwt->bwt[i/16]; // 16 == sizeof(uint32_t)/2 ++c[bwt_B00(bwt, i)]; } // the last element memcpy(buf + k, c, sizeof(bwtint_t) * 4); - xassert(k + 4 == bwt->bwt_size, "inconsistent bwt_size"); + xassert(k + sizeof(bwtint_t) == bwt->bwt_size, "inconsistent bwt_size"); // update bwt free(bwt->bwt); bwt->bwt = buf; } diff --git a/bwtsw2_core.c b/bwtsw2_core.c index 03360a3..4d5984c 100644 --- a/bwtsw2_core.c +++ b/bwtsw2_core.c @@ -475,7 +475,7 @@ bwtsw2_t **bsw2_core(const bsw2opt_t *opt, const bwtl_t *target, const bwt_t *qu // get Occ for the DAG bwtl_2occ4(target, v->tk - 1, v->tl, tcntk, tcntl); for (tj = 0; tj != 4; ++tj) { // descend to the children - uint32_t qcntk[4], qcntl[4]; + bwtint_t qcntk[4], qcntl[4]; int qj, *curr_score_mat = score_mat + tj * 4; khiter_t iter; bsw2entry_t *u;