first attempt to make bwt_gen work for >4GB seq

This commit is contained in:
Heng Li 2011-10-18 23:41:26 -04:00
parent 3114edcb7c
commit 95b1ab7e96
4 changed files with 227 additions and 316 deletions

View File

@ -36,59 +36,29 @@
#include "QSufSort.h"
// Static functions
static void QSufSortSortSplit(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar);
static int QSufSortChoosePivot(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar);
static void QSufSortInsertSortSplit(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar);
static void QSufSortBucketSort(int* __restrict V, int* __restrict I, const int numChar, const int alphabetSize);
static int QSufSortTransform(int* __restrict V, int* __restrict I, const int numChar, const int largestInputSymbol,
const int smallestInputSymbol, const int maxNewAlphabetSize, int *numSymbolAggregated);
// from MiscUtilities.c
static unsigned int leadingZero(const unsigned int input) {
unsigned int l;
const static unsigned int leadingZero8bit[256] = {8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
if (input & 0xFFFF0000) {
if (input & 0xFF000000) {
l = leadingZero8bit[input >> 24];
} else {
l = 8 + leadingZero8bit[input >> 16];
}
} else {
if (input & 0x0000FF00) {
l = 16 + leadingZero8bit[input >> 8];
} else {
l = 24 + leadingZero8bit[input];
}
}
return l;
}
static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar);
static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar);
static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar);
static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize);
static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated);
/* Makes suffix array p of x. x becomes inverse of p. p and x are both of size
n+1. Contents of x[0...n-1] are integers in the range l...k-1. Original
contents of x[n] is disregarded, the n-th symbol being regarded as
end-of-string smaller than all other symbols.*/
void QSufSortSuffixSort(int* __restrict V, int* __restrict I, const int numChar, const int largestInputSymbol,
const int smallestInputSymbol, const int skipTransform) {
void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
const qsint_t smallestInputSymbol, const int skipTransform) {
int i, j;
int s, negatedSortedGroupLength;
int numSymbolAggregated;
int maxNumInputSymbol;
int numSortedPos = 1;
int newAlphabetSize;
qsint_t i, j;
qsint_t s, negatedSortedGroupLength;
qsint_t numSymbolAggregated;
qsint_t maxNumInputSymbol;
qsint_t numSortedPos = 1;
qsint_t newAlphabetSize;
maxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;
@ -102,7 +72,7 @@ void QSufSortSuffixSort(int* __restrict V, int* __restrict I, const int numChar,
numSortedPos = numSymbolAggregated;
}
while ((int)(I[0]) >= -(int)numChar) {
while ((qsint_t)(I[0]) >= -(qsint_t)numChar) {
i = 0;
negatedSortedGroupLength = 0;
do {
@ -129,9 +99,9 @@ void QSufSortSuffixSort(int* __restrict V, int* __restrict I, const int numChar,
}
void QSufSortGenerateSaFromInverse(const int* V, int* __restrict I, const int numChar) {
void QSufSortGenerateSaFromInverse(const qsint_t* V, qsint_t* __restrict I, const qsint_t numChar) {
int i;
qsint_t i;
for (i=0; i<=numChar; i++) {
I[V[i]] = i + 1;
}
@ -143,21 +113,14 @@ void QSufSortGenerateSaFromInverse(const int* V, int* __restrict I, const int nu
quicksort taken from Bentley & McIlroy, "Engineering a Sort Function",
Software -- Practice and Experience 23(11), 1249-1265 (November 1993). This
function is based on Program 7.*/
static void QSufSortSortSplit(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar) {
static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar) {
int a, b, c, d;
int l, m;
int f, v, s, t;
int tmp;
int numItem;
#ifdef DEBUG
if (lowestPos > highestPos) {
fprintf(stderr, "QSufSortSortSplit(): lowestPos > highestPos!\n");
exit(1);
}
#endif
qsint_t a, b, c, d;
qsint_t l, m;
qsint_t f, v, s, t;
qsint_t tmp;
qsint_t numItem;
numItem = highestPos - lowestPos + 1;
@ -171,7 +134,7 @@ static void QSufSortSortSplit(int* __restrict V, int* __restrict I, const int lo
a = b = lowestPos;
c = d = highestPos;
while (TRUE) {
while (1) {
while (c >= b && (f = KEY(V, I, b, numSortedChar)) <= v) {
if (f == v) {
swap(I[a], I[b], tmp);
@ -235,31 +198,17 @@ static void QSufSortSortSplit(int* __restrict V, int* __restrict I, const int lo
}
/* Algorithm by Bentley & McIlroy.*/
static int QSufSortChoosePivot(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar) {
static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar) {
int m;
int keyl, keym, keyn;
int key1, key2, key3;
int s;
int numItem;
#ifdef DEBUG
if (lowestPos > highestPos) {
fprintf(stderr, "QSufSortChoosePivot(): lowestPos > highestPos!\n");
exit(1);
}
#endif
qsint_t m;
qsint_t keyl, keym, keyn;
qsint_t key1, key2, key3;
qsint_t s;
qsint_t numItem;
numItem = highestPos - lowestPos + 1;
#ifdef DEBUG
if (numItem <= INSERT_SORT_NUM_ITEM) {
fprintf(stderr, "QSufSortChoosePivot(): number of items <= INSERT_SORT_NUM_ITEM!\n");
exit(1);
}
#endif
m = lowestPos + numItem / 2;
s = numItem / 8;
@ -282,39 +231,19 @@ static int QSufSortChoosePivot(int* __restrict V, int* __restrict I, const int l
}
/* Quadratic sorting method to use for small subarrays. */
static void QSufSortInsertSortSplit(int* __restrict V, int* __restrict I, const int lowestPos,
const int highestPos, const int numSortedChar) {
static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos,
const qsint_t highestPos, const qsint_t numSortedChar) {
int i, j;
int tmpKey, tmpPos;
int numItem;
int key[INSERT_SORT_NUM_ITEM], pos[INSERT_SORT_NUM_ITEM];
int negativeSortedLength;
int groupNum;
#ifdef DEBUG
if (lowestPos > highestPos) {
fprintf(stderr, "QSufSortInsertSortSplit(): lowestPos > highestPos!\n");
exit(1);
}
#endif
qsint_t i, j;
qsint_t tmpKey, tmpPos;
qsint_t numItem;
qsint_t key[INSERT_SORT_NUM_ITEM], pos[INSERT_SORT_NUM_ITEM];
qsint_t negativeSortedLength;
qsint_t groupNum;
numItem = highestPos - lowestPos + 1;
#ifdef DEBUG
if (numItem > INSERT_SORT_NUM_ITEM) {
fprintf(stderr, "QSufSortInsertSortSplit(): number of items > INSERT_SORT_NUM_ITEM!\n");
exit(1);
}
#endif
for (i=0; i<numItem; i++) {
#ifdef DEBUG
if (I[lowestPos + i] < 0) {
fprintf(stderr, "QSufSortInsertSortSplit(): I < 0 in unsorted region!\n");
exit(1);
}
#endif
pos[i] = I[lowestPos + i];
key[i] = V[pos[i] + numSortedChar];
}
@ -366,12 +295,12 @@ static void QSufSortInsertSortSplit(int* __restrict V, int* __restrict I, const
Output: x is V and p is I after the initial sorting stage of the refined
suffix sorting algorithm.*/
static void QSufSortBucketSort(int* __restrict V, int* __restrict I, const int numChar, const int alphabetSize) {
static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize) {
int i, c;
int d;
int groupNum;
int currentIndex;
qsint_t i, c;
qsint_t d;
qsint_t groupNum;
qsint_t currentIndex;
// mark linked list empty
for (i=0; i<alphabetSize; i++) {
@ -381,14 +310,14 @@ static void QSufSortBucketSort(int* __restrict V, int* __restrict I, const int n
// insert to linked list
for (i=0; i<=numChar; i++) {
c = V[i];
V[i] = (int)(I[c]);
V[i] = (qsint_t)(I[c]);
I[c] = i;
}
currentIndex = numChar;
for (i=alphabetSize; i>0; i--) {
c = I[i-1];
d = (int)(V[c]);
d = (qsint_t)(V[c]);
groupNum = currentIndex;
V[c] = groupNum;
if (d >= 0) {
@ -424,20 +353,20 @@ static void QSufSortBucketSort(int* __restrict V, int* __restrict I, const int n
Output: Returns an integer j in the range 1...q representing the size of the
new alphabet. If j<=n+1, the alphabet is compacted. The global variable r is
set to the number of old symbols grouped into one. Only x[n] is 0.*/
static int QSufSortTransform(int* __restrict V, int* __restrict I, const int numChar, const int largestInputSymbol,
const int smallestInputSymbol, const int maxNewAlphabetSize, int *numSymbolAggregated) {
static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
const qsint_t smallestInputSymbol, const qsint_t maxNewAlphabetSize, qsint_t *numSymbolAggregated) {
int c, i, j;
int a; // numSymbolAggregated
int mask;
int minSymbolInChunk = 0, maxSymbolInChunk = 0;
int newAlphabetSize;
int maxNumInputSymbol, maxNumBit, maxSymbol;
qsint_t c, i, j;
qsint_t a; // numSymbolAggregated
qsint_t mask;
qsint_t minSymbolInChunk = 0, maxSymbolInChunk = 0;
qsint_t newAlphabetSize;
qsint_t maxNumInputSymbol, maxNumBit, maxSymbol;
maxNumInputSymbol = largestInputSymbol - smallestInputSymbol + 1;
maxNumBit = BITS_IN_WORD - leadingZero(maxNumInputSymbol);
maxSymbol = INT_MAX >> maxNumBit;
for (maxNumBit = 0, i = maxNumInputSymbol; i; i >>= 1) ++maxNumBit;
maxSymbol = QSINT_MAX >> maxNumBit;
c = maxNumInputSymbol;
for (a = 0; a < numChar && maxSymbolInChunk <= maxSymbol && c <= maxNewAlphabetSize; a++) {
@ -449,14 +378,6 @@ static int QSufSortTransform(int* __restrict V, int* __restrict I, const int num
mask = (1 << (a-1) * maxNumBit) - 1; /* mask masks off top old symbol from chunk.*/
V[numChar] = smallestInputSymbol - 1; /* emulate zero terminator.*/
#ifdef DEBUG
// Section of code for maxSymbolInChunk > numChar removed!
if (maxSymbolInChunk > numChar) {
fprintf(stderr, "QSufSortTransform(): maxSymbolInChunk > numChar!\n");
exit(1);
}
#endif
/* bucketing possible, compact alphabet.*/
for (i=0; i<=maxSymbolInChunk; i++) {
I[i] = 0; /* zero transformation table.*/

View File

@ -29,12 +29,17 @@
#ifndef __QSUFSORT_H__
#define __QSUFSORT_H__
#include <stdint.h>
#define KEY(V, I, p, h) ( V[ I[p] + h ] )
#define INSERT_SORT_NUM_ITEM 16
void QSufSortSuffixSort(int* __restrict V, int* __restrict I, const int numChar, const int largestInputSymbol,
const int smallestInputSymbol, const int skipTransform);
void QSufSortGenerateSaFromInverse(const int *V, int* __restrict I, const int numChar);
typedef int64_t qsint_t;
#define QSINT_MAX INT64_MAX
void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol,
const qsint_t smallestInputSymbol, const int skipTransform);
void QSufSortGenerateSaFromInverse(const qsint_t *V, qsint_t* __restrict I, const qsint_t numChar);
#endif

View File

@ -25,22 +25,27 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "bwt_gen.h"
#include "QSufSort.h"
static unsigned int TextLengthFromBytePacked(unsigned int bytePackedLength, unsigned int bitPerChar,
#define MIN_AVAILABLE_WORD 0x10000
static bgint_t TextLengthFromBytePacked(bgint_t bytePackedLength, unsigned int bitPerChar,
unsigned int lastByteLength)
{
if (bytePackedLength > ALL_ONE_MASK / (BITS_IN_BYTE / bitPerChar)) {
fprintf(stderr, "TextLengthFromBytePacked(): text length > 2^32!\n");
exit(1);
}
return (bytePackedLength - 1) * (BITS_IN_BYTE / bitPerChar) + lastByteLength;
}
static void initializeVAL(unsigned int *startAddr, const unsigned int length, const unsigned int initValue)
static void initializeVAL(unsigned int *startAddr, const bgint_t length, const unsigned int initValue)
{
unsigned int i;
bgint_t i;
for (i=0; i<length; i++) startAddr[i] = initValue;
}
static void initializeVAL_bg(bgint_t *startAddr, const bgint_t length, const bgint_t initValue)
{
bgint_t i;
for (i=0; i<length; i++) startAddr[i] = initValue;
}
@ -60,25 +65,25 @@ static void GenerateDNAOccCountTable(unsigned int *dnaDecodeTable)
}
// for BWTIncCreate()
static unsigned int BWTOccValueMajorSizeInWord(const unsigned int numChar)
static bgint_t BWTOccValueMajorSizeInWord(const bgint_t numChar)
{
unsigned int numOfOccValue;
unsigned int numOfOccIntervalPerMajor;
bgint_t numOfOccValue;
unsigned numOfOccIntervalPerMajor;
numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
numOfOccIntervalPerMajor = OCC_INTERVAL_MAJOR / OCC_INTERVAL;
return (numOfOccValue + numOfOccIntervalPerMajor - 1) / numOfOccIntervalPerMajor * ALPHABET_SIZE;
}
// for BWTIncCreate()
static unsigned int BWTOccValueMinorSizeInWord(const unsigned int numChar)
static bgint_t BWTOccValueMinorSizeInWord(const bgint_t numChar)
{
unsigned int numOfOccValue;
bgint_t numOfOccValue;
numOfOccValue = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL + 1; // Value at both end for bi-directional encoding
return (numOfOccValue + OCC_VALUE_PER_WORD - 1) / OCC_VALUE_PER_WORD * ALPHABET_SIZE;
}
// for BWTIncCreate()
static unsigned int BWTResidentSizeInWord(const unsigned int numChar) {
static bgint_t BWTResidentSizeInWord(const bgint_t numChar) {
unsigned int numCharRoundUpToOccInterval;
bgint_t numCharRoundUpToOccInterval;
// The $ in BWT at the position of inverseSa0 is not encoded
numCharRoundUpToOccInterval = (numChar + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL;
@ -89,13 +94,13 @@ static unsigned int BWTResidentSizeInWord(const unsigned int numChar) {
static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
{
unsigned int maxBuildSize;
bgint_t maxBuildSize;
if (bwtInc->bwt->textLength == 0) {
// initial build
// Minus 2 because n+1 entries of seq and rank needed for n char
maxBuildSize = (bwtInc->availableWord - 2 - OCC_INTERVAL / CHAR_PER_WORD)
/ (2 * CHAR_PER_WORD + 1) * CHAR_PER_WORD;
maxBuildSize = (bwtInc->availableWord - 2 * (sizeof(bgint_t) / 4) - OCC_INTERVAL / CHAR_PER_WORD)
/ (2 * CHAR_PER_WORD + 1) * CHAR_PER_WORD / (sizeof(bgint_t) / 4);
if (bwtInc->initialMaxBuildSize > 0) {
bwtInc->buildSize = min(bwtInc->initialMaxBuildSize, maxBuildSize);
} else {
@ -104,9 +109,9 @@ static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
} else {
// Minus 3 because n+1 entries of sorted rank, seq and rank needed for n char
// Minus numberOfIterationDone because bwt slightly shift to left in each iteration
maxBuildSize = (bwtInc->availableWord - bwtInc->bwt->bwtSizeInWord - bwtInc->bwt->occSizeInWord - 3
maxBuildSize = (bwtInc->availableWord - bwtInc->bwt->bwtSizeInWord - bwtInc->bwt->occSizeInWord - 3 * (sizeof(bgint_t) / 4)
- bwtInc->numberOfIterationDone * OCC_INTERVAL / BIT_PER_CHAR)
/ 3;
/ 3 / (sizeof(bgint_t) / 4);
if (maxBuildSize < CHAR_PER_WORD) {
fprintf(stderr, "BWTIncSetBuildSizeAndTextAddr(): Not enough space allocated to continue construction!\n");
exit(1);
@ -116,9 +121,8 @@ static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
} else {
bwtInc->buildSize = maxBuildSize;
}
if (bwtInc->buildSize < CHAR_PER_WORD) {
if (bwtInc->buildSize < CHAR_PER_WORD)
bwtInc->buildSize = CHAR_PER_WORD;
}
}
if (bwtInc->buildSize < CHAR_PER_WORD) {
@ -128,9 +132,8 @@ static void BWTIncSetBuildSizeAndTextAddr(BWTInc *bwtInc)
bwtInc->buildSize = bwtInc->buildSize / CHAR_PER_WORD * CHAR_PER_WORD;
bwtInc->packedText = bwtInc->workingMemory + 2 * (bwtInc->buildSize + 1);
bwtInc->textBuffer = (unsigned char*)(bwtInc->workingMemory + bwtInc->buildSize + 1);
bwtInc->packedText = bwtInc->workingMemory + 2 * (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4);
bwtInc->textBuffer = (unsigned char*)(bwtInc->workingMemory + (bwtInc->buildSize + 1) * (sizeof(bgint_t) / 4));
}
// for ceilLog2()
@ -186,17 +189,17 @@ static unsigned int BitPerWordPackedChar(const unsigned int alphabetSize)
}
static void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned int *output, const unsigned int alphabetSize,
const unsigned int textLength)
const bgint_t textLength)
{
unsigned int i, j, k;
unsigned int c;
bgint_t i;
unsigned int j, k, c;
unsigned int bitPerBytePackedChar;
unsigned int bitPerWordPackedChar;
unsigned int charPerWord;
unsigned int charPerByte;
unsigned int bytePerIteration;
unsigned int byteProcessed = 0;
unsigned int wordProcessed = 0;
bgint_t byteProcessed = 0;
bgint_t wordProcessed = 0;
unsigned int mask, shift;
unsigned int buffer[BITS_IN_WORD];
@ -250,7 +253,7 @@ static void ConvertBytePackedToWordPacked(const unsigned char *input, unsigned i
output[wordProcessed] = c;
}
BWT *BWTCreate(const unsigned int textLength, unsigned int *decodeTable)
BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
{
BWT *bwt;
@ -258,8 +261,8 @@ BWT *BWTCreate(const unsigned int textLength, unsigned int *decodeTable)
bwt->textLength = 0;
bwt->cumulativeFreq = (unsigned*)calloc((ALPHABET_SIZE + 1), sizeof(unsigned int*));
initializeVAL(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0);
bwt->cumulativeFreq = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
initializeVAL_bg(bwt->cumulativeFreq, ALPHABET_SIZE + 1, 0);
bwt->bwtSizeInWord = 0;
@ -272,7 +275,7 @@ BWT *BWTCreate(const unsigned int textLength, unsigned int *decodeTable)
}
bwt->occMajorSizeInWord = BWTOccValueMajorSizeInWord(textLength);
bwt->occValueMajor = (unsigned*)calloc(bwt->occMajorSizeInWord, sizeof(unsigned int));
bwt->occValueMajor = (bgint_t*)calloc(bwt->occMajorSizeInWord, sizeof(bgint_t));
bwt->occSizeInWord = 0;
bwt->occValue = NULL;
@ -297,8 +300,8 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit,
bwtInc->initialMaxBuildSize = initialMaxBuildSize;
bwtInc->incMaxBuildSize = incMaxBuildSize;
bwtInc->targetNBit = targetNBit;
bwtInc->cumulativeCountInCurrentBuild = (unsigned*)calloc((ALPHABET_SIZE + 1), sizeof(unsigned int));
initializeVAL(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
bwtInc->cumulativeCountInCurrentBuild = (bgint_t*)calloc((ALPHABET_SIZE + 1), sizeof(bgint_t));
initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
// Build frequently accessed data
bwtInc->packedShift = (unsigned*)calloc(CHAR_PER_WORD, sizeof(unsigned int));
@ -306,7 +309,8 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit,
bwtInc->packedShift[i] = BITS_IN_WORD - (i+1) * BIT_PER_CHAR;
}
bwtInc->availableWord = (unsigned int)((textLength + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL / BITS_IN_WORD * bwtInc->targetNBit);
bwtInc->availableWord = (bgint_t)((textLength + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL / BITS_IN_WORD * bwtInc->targetNBit);
if (bwtInc->availableWord < MIN_AVAILABLE_WORD) bwtInc->availableWord = MIN_AVAILABLE_WORD;
if (bwtInc->availableWord < BWTResidentSizeInWord(textLength) + BWTOccValueMinorSizeInWord(textLength)) {
fprintf(stderr, "BWTIncCreate() : targetNBit is too low!\n");
exit(1);
@ -317,14 +321,16 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit,
}
// for BWTIncConstruct()
static void BWTIncPutPackedTextToRank(const unsigned int *packedText, unsigned int* __restrict rank,
unsigned int* __restrict cumulativeCount, const unsigned int numChar)
static void BWTIncPutPackedTextToRank(const unsigned int *packedText, bgint_t* __restrict rank,
bgint_t* __restrict cumulativeCount, const bgint_t numChar)
{
unsigned int i, j;
bgint_t i;
unsigned int j;
unsigned int c, t;
unsigned int packedMask;
unsigned int rankIndex;
unsigned int lastWord, numCharInLastWord;
bgint_t rankIndex;
bgint_t lastWord;
unsigned int numCharInLastWord;
lastWord = (numChar - 1) / CHAR_PER_WORD;
numCharInLastWord = numChar - lastWord * CHAR_PER_WORD;
@ -359,17 +365,17 @@ static void BWTIncPutPackedTextToRank(const unsigned int *packedText, unsigned i
}
static void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigned int index,
unsigned int* __restrict occCount, const unsigned int* dnaDecodeTable)
static void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const bgint_t index,
bgint_t* __restrict occCount, const unsigned int* dnaDecodeTable)
{
static const unsigned int truncateRightMask[16] = { 0x00000000, 0xC0000000, 0xF0000000, 0xFC000000,
0xFF000000, 0xFFC00000, 0xFFF00000, 0xFFFC0000,
0xFFFF0000, 0xFFFFC000, 0xFFFFF000, 0xFFFFFC00,
0xFFFFFF00, 0xFFFFFFC0, 0xFFFFFFF0, 0xFFFFFFFC };
unsigned int iteration, wordToCount, charToCount;
unsigned int i, j, c;
unsigned int sum;
bgint_t iteration, i;
unsigned int wordToCount, charToCount;
unsigned int j, c, sum;
occCount[0] = 0;
occCount[1] = 0;
@ -432,13 +438,14 @@ static void ForwardDNAAllOccCountNoLimit(const unsigned int* dna, const unsigne
occCount[3] += sum;
}
static void BWTIncBuildPackedBwt(const unsigned int *relativeRank, unsigned int* __restrict bwt, const unsigned int numChar,
const unsigned int *cumulativeCount, const unsigned int *packedShift) {
static void BWTIncBuildPackedBwt(const bgint_t *relativeRank, unsigned int* __restrict bwt, const bgint_t numChar,
const bgint_t *cumulativeCount, const unsigned int *packedShift) {
unsigned int i, c, r;
unsigned int previousRank, currentRank;
unsigned int wordIndex, charIndex;
unsigned int inverseSa0;
bgint_t i, r;
unsigned int c;
bgint_t previousRank, currentRank;
bgint_t wordIndex, charIndex;
bgint_t inverseSa0;
inverseSa0 = previousRank = relativeRank[0];
@ -463,10 +470,10 @@ static void BWTIncBuildPackedBwt(const unsigned int *relativeRank, unsigned int*
}
}
static inline unsigned int BWTOccValueExplicit(const BWT *bwt, const unsigned int occIndexExplicit,
static inline bgint_t BWTOccValueExplicit(const BWT *bwt, const bgint_t occIndexExplicit,
const unsigned int character)
{
unsigned int occIndexMajor;
bgint_t occIndexMajor;
occIndexMajor = occIndexExplicit * OCC_INTERVAL / OCC_INTERVAL_MAJOR;
@ -546,46 +553,43 @@ static unsigned int BackwardDNAOccCount(const unsigned int* dna, const unsigned
}
unsigned int BWTOccValue(const BWT *bwt, unsigned int index, const unsigned int character) {
unsigned int occValue;
unsigned int occExplicitIndex, occIndex;
bgint_t BWTOccValue(const BWT *bwt, bgint_t index, const unsigned int character)
{
bgint_t occValue;
bgint_t occExplicitIndex, occIndex;
// $ is supposed to be positioned at inverseSa0 but it is not encoded
// therefore index is subtracted by 1 for adjustment
if (index > bwt->inverseSa0) {
if (index > bwt->inverseSa0)
index--;
}
occExplicitIndex = (index + OCC_INTERVAL / 2 - 1) / OCC_INTERVAL; // Bidirectional encoding
occIndex = occExplicitIndex * OCC_INTERVAL;
occValue = BWTOccValueExplicit(bwt, occExplicitIndex, character);
if (occIndex == index) {
if (occIndex == index)
return occValue;
}
if (occIndex < index) {
return occValue + ForwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, index - occIndex, character, bwt->decodeTable);
} else {
return occValue - BackwardDNAOccCount(bwt->bwtCode + occIndex / CHAR_PER_WORD, occIndex - index, character, bwt->decodeTable);
}
}
static unsigned int BWTIncGetAbsoluteRank(BWT *bwt, unsigned int* __restrict absoluteRank, unsigned int* __restrict seq,
const unsigned int *packedText, const unsigned int numChar,
const unsigned int* cumulativeCount, const unsigned int firstCharInLastIteration)
static bgint_t BWTIncGetAbsoluteRank(BWT *bwt, bgint_t* __restrict absoluteRank, bgint_t* __restrict seq,
const unsigned int *packedText, const bgint_t numChar,
const bgint_t* cumulativeCount, const unsigned int firstCharInLastIteration)
{
unsigned int saIndex;
unsigned int lastWord;
bgint_t saIndex;
bgint_t lastWord;
unsigned int packedMask;
unsigned int i, j;
unsigned int c, t;
unsigned int rankIndex;
bgint_t i;
unsigned int c, t, j;
bgint_t rankIndex;
unsigned int shift;
unsigned int seqIndexFromStart[ALPHABET_SIZE];
unsigned int seqIndexFromEnd[ALPHABET_SIZE];
bgint_t seqIndexFromStart[ALPHABET_SIZE];
bgint_t seqIndexFromEnd[ALPHABET_SIZE];
for (i=0; i<ALPHABET_SIZE; i++) {
seqIndexFromStart[i] = cumulativeCount[i];
@ -625,16 +629,16 @@ static unsigned int BWTIncGetAbsoluteRank(BWT *bwt, unsigned int* __restrict abs
return seqIndexFromStart[firstCharInLastIteration];
}
static void BWTIncSortKey(unsigned int* __restrict key, unsigned int* __restrict seq, const unsigned int numItem)
static void BWTIncSortKey(bgint_t* __restrict key, bgint_t* __restrict seq, const bgint_t numItem)
{
#define EQUAL_KEY_THRESHOLD 4 // Partition for equal key if data array size / the number of data with equal value with pivot < EQUAL_KEY_THRESHOLD
int lowIndex, highIndex, midIndex;
int lowPartitionIndex, highPartitionIndex;
int lowStack[32], highStack[32];
int64_t lowIndex, highIndex, midIndex;
int64_t lowPartitionIndex, highPartitionIndex;
int64_t lowStack[32], highStack[32];
int stackDepth;
int i, j;
unsigned int tempSeq, tempKey;
int64_t i, j;
bgint_t tempSeq, tempKey;
int numberOfEqualKey;
if (numItem < 2) return;
@ -803,15 +807,15 @@ static void BWTIncSortKey(unsigned int* __restrict key, unsigned int* __restrict
}
static void BWTIncBuildRelativeRank(unsigned int* __restrict sortedRank, unsigned int* __restrict seq,
unsigned int* __restrict relativeRank, const unsigned int numItem,
unsigned int oldInverseSa0, const unsigned int *cumulativeCount)
static void BWTIncBuildRelativeRank(bgint_t* __restrict sortedRank, bgint_t* __restrict seq,
bgint_t* __restrict relativeRank, const bgint_t numItem,
bgint_t oldInverseSa0, const bgint_t *cumulativeCount)
{
unsigned int i, c;
unsigned int s, r;
unsigned int lastRank, lastIndex;
unsigned int oldInverseSa0RelativeRank = 0;
unsigned int freq;
bgint_t i, c;
bgint_t s, r;
bgint_t lastRank, lastIndex;
bgint_t oldInverseSa0RelativeRank = 0;
bgint_t freq;
lastIndex = numItem;
lastRank = sortedRank[numItem];
@ -831,14 +835,12 @@ static void BWTIncBuildRelativeRank(unsigned int* __restrict sortedRank, unsigne
for (i=numItem; i--;) { // from numItem - 1 to 0
r = sortedRank[i];
if (r > oldInverseSa0) {
if (r > oldInverseSa0)
sortedRank[i]--; // to prepare for merging; $ is not encoded in bwt
}
s = seq[i];
if (i < freq) {
if (lastIndex >= freq) {
if (lastIndex >= freq)
lastRank++; // to trigger the group across alphabet boundary to be split
}
c--;
freq = cumulativeCount[c];
}
@ -846,10 +848,10 @@ static void BWTIncBuildRelativeRank(unsigned int* __restrict sortedRank, unsigne
relativeRank[s] = lastIndex;
} else {
if (i == lastIndex - 1) {
if (lastIndex < numItem && (int)seq[lastIndex + 1] < 0) {
if (lastIndex < numItem && (sbgint_t)seq[lastIndex + 1] < 0) {
seq[lastIndex] = seq[lastIndex + 1] - 1;
} else {
seq[lastIndex] = (unsigned int)-1;
seq[lastIndex] = (bgint_t)-1;
}
}
lastIndex = i;
@ -865,11 +867,12 @@ static void BWTIncBuildRelativeRank(unsigned int* __restrict sortedRank, unsigne
}
static void BWTIncBuildBwt(unsigned int* seq, const unsigned int *relativeRank, const unsigned int numChar,
const unsigned int *cumulativeCount)
static void BWTIncBuildBwt(unsigned int* insertBwt, const bgint_t *relativeRank, const bgint_t numChar,
const bgint_t *cumulativeCount)
{
unsigned int i, c;
unsigned int previousRank, currentRank;
unsigned int c;
bgint_t i;
bgint_t previousRank, currentRank;
previousRank = relativeRank[0];
@ -877,20 +880,20 @@ static void BWTIncBuildBwt(unsigned int* seq, const unsigned int *relativeRank,
currentRank = relativeRank[i];
c = (previousRank >= cumulativeCount[1]) + (previousRank >= cumulativeCount[2])
+ (previousRank >= cumulativeCount[3]);
seq[currentRank] = c;
insertBwt[currentRank] = c;
previousRank = currentRank;
}
}
static void BWTIncMergeBwt(const unsigned int *sortedRank, const unsigned int* oldBwt, const unsigned int *insertBwt,
unsigned int* __restrict mergedBwt, const unsigned int numOldBwt, const unsigned int numInsertBwt)
static void BWTIncMergeBwt(const bgint_t *sortedRank, const unsigned int* oldBwt, const unsigned int *insertBwt,
unsigned int* __restrict mergedBwt, const bgint_t numOldBwt, const bgint_t numInsertBwt)
{
unsigned int bitsInWordMinusBitPerChar;
unsigned int leftShift, rightShift;
unsigned int o;
unsigned int oIndex, iIndex, mIndex;
unsigned int mWord, mChar, oWord, oChar;
unsigned int numInsert;
bgint_t leftShift, rightShift;
bgint_t o;
bgint_t oIndex, iIndex, mIndex;
bgint_t mWord, mChar, oWord, oChar;
bgint_t numInsert;
bitsInWordMinusBitPerChar = BITS_IN_WORD - BIT_PER_CHAR;
@ -997,9 +1000,9 @@ static void BWTIncMergeBwt(const unsigned int *sortedRank, const unsigned int* o
void BWTClearTrailingBwtCode(BWT *bwt)
{
unsigned int bwtResidentSizeInWord;
unsigned int wordIndex, offset;
unsigned int i;
bgint_t bwtResidentSizeInWord;
bgint_t wordIndex, offset;
bgint_t i;
bwtResidentSizeInWord = BWTResidentSizeInWord(bwt->textLength);
@ -1020,18 +1023,18 @@ void BWTClearTrailingBwtCode(BWT *bwt)
void BWTGenerateOccValueFromBwt(const unsigned int* bwt, unsigned int* __restrict occValue,
unsigned int* __restrict occValueMajor,
const unsigned int textLength, const unsigned int* decodeTable)
bgint_t* __restrict occValueMajor,
const bgint_t textLength, const unsigned int* decodeTable)
{
unsigned int numberOfOccValueMajor, numberOfOccValue;
bgint_t numberOfOccValueMajor, numberOfOccValue;
unsigned int wordBetweenOccValue;
unsigned int numberOfOccIntervalPerMajor;
bgint_t numberOfOccIntervalPerMajor;
unsigned int c;
unsigned int i, j;
unsigned int occMajorIndex;
unsigned int occIndex, bwtIndex;
unsigned int sum;
unsigned int tempOccValue0[ALPHABET_SIZE], tempOccValue1[ALPHABET_SIZE];
bgint_t i, j;
bgint_t occMajorIndex;
bgint_t occIndex, bwtIndex;
bgint_t sum; // perhaps unsigned is big enough
bgint_t tempOccValue0[ALPHABET_SIZE], tempOccValue1[ALPHABET_SIZE];
wordBetweenOccValue = OCC_INTERVAL / CHAR_PER_WORD;
@ -1231,31 +1234,25 @@ void BWTGenerateOccValueFromBwt(const unsigned int* bwt, unsigned int* __restri
}
static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
static void BWTIncConstruct(BWTInc *bwtInc, const bgint_t numChar)
{
unsigned int i;
unsigned int mergedBwtSizeInWord, mergedOccSizeInWord;
bgint_t mergedBwtSizeInWord, mergedOccSizeInWord;
unsigned int firstCharInThisIteration;
unsigned int *relativeRank, *seq, *sortedRank, *insertBwt, *mergedBwt;
unsigned int newInverseSa0RelativeRank, oldInverseSa0RelativeRank, newInverseSa0;
#ifdef DEBUG
if (numChar > bwtInc->buildSize) {
fprintf(stderr, "BWTIncConstruct(): numChar > buildSize!\n");
exit(1);
}
#endif
bgint_t *relativeRank, *seq, *sortedRank;
unsigned int *insertBwt, *mergedBwt;
bgint_t newInverseSa0RelativeRank, oldInverseSa0RelativeRank, newInverseSa0;
mergedBwtSizeInWord = BWTResidentSizeInWord(bwtInc->bwt->textLength + numChar);
mergedOccSizeInWord = BWTOccValueMinorSizeInWord(bwtInc->bwt->textLength + numChar);
initializeVAL(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
initializeVAL_bg(bwtInc->cumulativeCountInCurrentBuild, ALPHABET_SIZE + 1, 0);
if (bwtInc->bwt->textLength == 0) { // Initial build
// Set address
seq = bwtInc->workingMemory;
seq = (bgint_t*)bwtInc->workingMemory;
relativeRank = seq + bwtInc->buildSize + 1;
mergedBwt = insertBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord; // build in place
@ -1265,7 +1262,7 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
relativeRank[numChar] = 0;
// Sort suffix
QSufSortSuffixSort((int*)relativeRank, (int*)seq, (int)numChar, (int)ALPHABET_SIZE - 1, 0, FALSE);
QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)ALPHABET_SIZE - 1, 0, FALSE);
newInverseSa0 = relativeRank[0];
// Clear BWT area
@ -1279,9 +1276,9 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
} else { // Incremental build
// Set address
sortedRank = bwtInc->workingMemory;
sortedRank = (bgint_t*)bwtInc->workingMemory;
seq = sortedRank + bwtInc->buildSize + 1;
insertBwt = seq;
insertBwt = (unsigned*)seq; // insertBwt and seq share memory
relativeRank = seq + bwtInc->buildSize + 1;
// Store the first character of this iteration
@ -1318,15 +1315,10 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
// build relative rank; sortedRank is updated for merging to cater for the fact that $ is not encoded in bwt
// the cumulative freq information is used to make sure that inverseSa0 and suffix beginning with different characters are kept in different unsorted groups)
BWTIncBuildRelativeRank(sortedRank, seq, relativeRank, numChar, bwtInc->bwt->inverseSa0, bwtInc->cumulativeCountInCurrentBuild);
#ifdef DEBUG
if (relativeRank[numChar] != oldInverseSa0RelativeRank) {
fprintf(stderr, "BWTIncConstruct(): relativeRank[numChar] != oldInverseSa0RelativeRank!\n");
exit(1);
}
#endif
assert(relativeRank[numChar] == oldInverseSa0RelativeRank);
// Sort suffix
QSufSortSuffixSort((int*)relativeRank, (int*)seq, (int)numChar, (int)numChar, 1, TRUE);
QSufSortSuffixSort((qsint_t*)relativeRank, (qsint_t*)seq, (qsint_t)numChar, (qsint_t)numChar, 1, TRUE);
newInverseSa0RelativeRank = relativeRank[0];
newInverseSa0 = sortedRank[newInverseSa0RelativeRank] + newInverseSa0RelativeRank;
@ -1334,7 +1326,7 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
sortedRank[newInverseSa0RelativeRank] = 0; // a special value so that this is skipped in the merged bwt
// Build BWT
BWTIncBuildBwt(seq, relativeRank, numChar, bwtInc->cumulativeCountInCurrentBuild);
BWTIncBuildBwt(insertBwt, relativeRank, numChar, bwtInc->cumulativeCountInCurrentBuild);
// Merge BWT
mergedBwt = bwtInc->workingMemory + bwtInc->availableWord - mergedBwtSizeInWord
@ -1349,10 +1341,7 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
bwtInc->bwt->bwtCode = mergedBwt;
bwtInc->bwt->bwtSizeInWord = mergedBwtSizeInWord;
bwtInc->bwt->occSizeInWord = mergedOccSizeInWord;
if (mergedBwt < bwtInc->workingMemory + mergedOccSizeInWord) {
fprintf(stderr, "BWTIncConstruct() : Not enough memory allocated!\n");
exit(1);
}
assert(mergedBwt >= bwtInc->workingMemory + mergedOccSizeInWord);
bwtInc->bwt->occValue = mergedBwt - mergedOccSizeInWord;
@ -1376,14 +1365,14 @@ static void BWTIncConstruct(BWTInc *bwtInc, const unsigned int numChar)
}
BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetNBit,
const unsigned int initialMaxBuildSize, const unsigned int incMaxBuildSize)
bgint_t initialMaxBuildSize, bgint_t incMaxBuildSize)
{
FILE *packedFile;
unsigned int packedFileLen;
unsigned int totalTextLength;
unsigned int textToLoad, textSizeInByte;
unsigned int processedTextLength;
bgint_t packedFileLen;
bgint_t totalTextLength;
bgint_t textToLoad, textSizeInByte;
bgint_t processedTextLength;
unsigned char lastByteLength;
BWTInc *bwtInc;
@ -1397,10 +1386,6 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN
fseek(packedFile, -1, SEEK_END);
packedFileLen = ftell(packedFile);
if ((int)packedFileLen < 0) {
fprintf(stderr, "BWTIncConstructFromPacked: Cannot determine file length!\n");
exit(1);
}
fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
@ -1416,9 +1401,9 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN
textSizeInByte = textToLoad / CHAR_PER_BYTE; // excluded the odd byte
fseek(packedFile, -2, SEEK_CUR);
fseek(packedFile, -((int)textSizeInByte), SEEK_CUR);
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte + 1, packedFile);
fseek(packedFile, -((int)textSizeInByte + 1), SEEK_CUR);
fseek(packedFile, -((long)textSizeInByte + 1), SEEK_CUR);
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
BWTIncConstruct(bwtInc, textToLoad);
@ -1431,15 +1416,15 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN
textToLoad = totalTextLength - processedTextLength;
}
textSizeInByte = textToLoad / CHAR_PER_BYTE;
fseek(packedFile, -((int)textSizeInByte), SEEK_CUR);
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
fread(bwtInc->textBuffer, sizeof(unsigned char), textSizeInByte, packedFile);
fseek(packedFile, -((int)textSizeInByte), SEEK_CUR);
fseek(packedFile, -((long)textSizeInByte), SEEK_CUR);
ConvertBytePackedToWordPacked(bwtInc->textBuffer, bwtInc->packedText, ALPHABET_SIZE, textToLoad);
BWTIncConstruct(bwtInc, textToLoad);
processedTextLength += textToLoad;
if (bwtInc->numberOfIterationDone % 10 == 0) {
printf("[BWTIncConstructFromPacked] %u iterations done. %u characters processed.\n",
bwtInc->numberOfIterationDone, processedTextLength);
printf("[BWTIncConstructFromPacked] %lu iterations done. %lu characters processed.\n",
(long)bwtInc->numberOfIterationDone, (long)processedTextLength);
}
}
return bwtInc;
@ -1464,7 +1449,7 @@ void BWTIncFree(BWTInc *bwtInc)
free(bwtInc);
}
static unsigned int BWTFileSizeInWord(const unsigned int numChar)
static bgint_t BWTFileSizeInWord(const bgint_t numChar)
{
// The $ in BWT at the position of inverseSa0 is not encoded
return (numChar + CHAR_PER_WORD - 1) / CHAR_PER_WORD;
@ -1474,7 +1459,7 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
{
FILE *bwtFile;
/* FILE *occValueFile; */
unsigned int bwtLength;
bgint_t bwtLength;
bwtFile = (FILE*)fopen(bwtFileName, "wb");
if (bwtFile == NULL) {
@ -1482,8 +1467,8 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
exit(1);
}
fwrite(&bwt->inverseSa0, sizeof(unsigned int), 1, bwtFile);
fwrite(bwt->cumulativeFreq + 1, sizeof(unsigned int), ALPHABET_SIZE, bwtFile);
fwrite(&bwt->inverseSa0, sizeof(bgint_t), 1, bwtFile);
fwrite(bwt->cumulativeFreq + 1, sizeof(bgint_t), ALPHABET_SIZE, bwtFile);
bwtLength = BWTFileSizeInWord(bwt->textLength);
fwrite(bwt->bwtCode, sizeof(unsigned int), bwtLength, bwtFile);
fclose(bwtFile);

View File

@ -27,8 +27,8 @@
#include <stdint.h>
//typedef int64_t bgint_t;
typedef unsigned bgint_t;
typedef uint64_t bgint_t;
typedef int64_t sbgint_t;
#define ALPHABET_SIZE 4
#define BIT_PER_CHAR 2
@ -69,20 +69,20 @@ typedef struct BWT {
unsigned int *occValue; // Occurrence values stored explicitly
bgint_t *occValueMajor; // Occurrence values stored explicitly
unsigned int *decodeTable; // For decoding BWT by table lookup
unsigned int bwtSizeInWord; // Temporary variable to hold the memory allocated
unsigned int occSizeInWord; // Temporary variable to hold the memory allocated
unsigned int occMajorSizeInWord; // Temporary variable to hold the memory allocated
bgint_t bwtSizeInWord; // Temporary variable to hold the memory allocated
bgint_t occSizeInWord; // Temporary variable to hold the memory allocated
bgint_t occMajorSizeInWord; // Temporary variable to hold the memory allocated
} BWT;
typedef struct BWTInc {
BWT *bwt;
unsigned int numberOfIterationDone;
unsigned int *cumulativeCountInCurrentBuild;
unsigned int availableWord;
bgint_t *cumulativeCountInCurrentBuild;
bgint_t availableWord;
float targetNBit;
unsigned int buildSize;
unsigned int initialMaxBuildSize;
unsigned int incMaxBuildSize;
bgint_t buildSize;
bgint_t initialMaxBuildSize;
bgint_t incMaxBuildSize;
unsigned int firstCharInLastIteration;
unsigned int *workingMemory;
unsigned int *packedText;