From 573ad0c98f9c20bcf5308f861a691c521cc7c563 Mon Sep 17 00:00:00 2001 From: Heng Li Date: Tue, 18 Oct 2011 23:48:46 -0400 Subject: [PATCH] merge bwt_gen.h to bwt_gen.c --- bwt_gen/QSufSort.c | 58 ++++++++++++----------------- bwt_gen/bwt_gen.c | 67 ++++++++++++++++++++++++++++++++- bwt_gen/bwt_gen.h | 93 ---------------------------------------------- 3 files changed, 88 insertions(+), 130 deletions(-) delete mode 100644 bwt_gen/bwt_gen.h diff --git a/bwt_gen/QSufSort.c b/bwt_gen/QSufSort.c index 92a8594..e437ac3 100644 --- a/bwt_gen/QSufSort.c +++ b/bwt_gen/QSufSort.c @@ -32,9 +32,12 @@ #include #include #include -#include "bwt_gen.h" #include "QSufSort.h" +#define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) ) +#define med3(a, b, c) ( ac ? b : a>c ? c : a)) +#define swap(a, b, t); t = a; a = b; b = t; + // Static functions static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, const qsint_t highestPos, const qsint_t numSortedChar); @@ -51,8 +54,8 @@ static qsint_t QSufSortTransform(qsint_t* __restrict V, qsint_t* __restrict I, c contents of x[n] is disregarded, the n-th symbol being regarded as end-of-string smaller than all other symbols.*/ void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t largestInputSymbol, - const qsint_t smallestInputSymbol, const int skipTransform) { - + const qsint_t smallestInputSymbol, const int skipTransform) +{ qsint_t i, j; qsint_t s, negatedSortedGroupLength; qsint_t numSymbolAggregated; @@ -96,16 +99,13 @@ void QSufSortSuffixSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsin } numSortedPos *= 2; /* double sorted-depth.*/ } - } -void QSufSortGenerateSaFromInverse(const qsint_t* V, qsint_t* __restrict I, const qsint_t numChar) { - +void QSufSortGenerateSaFromInverse(const qsint_t* V, qsint_t* __restrict I, const qsint_t numChar) +{ qsint_t i; - for (i=0; i<=numChar; i++) { + for (i=0; i<=numChar; i++) I[V[i]] = i + 1; - } - } /* Sorting routine called for each unsorted group. Sorts the array of integers @@ -149,9 +149,8 @@ static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, cons } c--; } - if (b > c) { + if (b > c) break; - } swap(I[b], I[c], tmp); b++; c--; @@ -173,9 +172,8 @@ static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, cons s = b - a; t = d - c; - if (s > 0) { + if (s > 0) QSufSortSortSplit(V, I, lowestPos, lowestPos + s - 1, numSortedChar); - } // Update group number for equal portion a = lowestPos + s; @@ -186,14 +184,12 @@ static void QSufSortSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, cons I[a] = -1; } else { // Unsorted group - for (c=a; c<=b; c++) { + for (c=a; c<=b; c++) V[I[c]] = b; - } } - if (t > 0) { + if (t > 0) QSufSortSortSplit(V, I, highestPos - t + 1, highestPos, numSortedChar); - } } @@ -232,8 +228,8 @@ static qsint_t QSufSortChoosePivot(qsint_t* __restrict V, qsint_t* __restrict I, /* Quadratic sorting method to use for small subarrays. */ static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t lowestPos, - const qsint_t highestPos, const qsint_t numSortedChar) { - + const qsint_t highestPos, const qsint_t numSortedChar) +{ qsint_t i, j; qsint_t tmpKey, tmpPos; qsint_t numItem; @@ -269,9 +265,8 @@ static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I if (key[i-1] == key[i]) { negativeSortedLength = 0; } else { - if (negativeSortedLength < 0) { + if (negativeSortedLength < 0) I[i+lowestPos] = negativeSortedLength; - } groupNum = i + lowestPos - 1; negativeSortedLength--; } @@ -280,10 +275,8 @@ static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I I[lowestPos] = pos[0]; V[I[lowestPos]] = groupNum; - if (negativeSortedLength < 0) { + if (negativeSortedLength < 0) I[lowestPos] = negativeSortedLength; - } - } /* Bucketsort for first iteration. @@ -295,17 +288,16 @@ static void QSufSortInsertSortSplit(qsint_t* __restrict V, qsint_t* __restrict I Output: x is V and p is I after the initial sorting stage of the refined suffix sorting algorithm.*/ -static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize) { - +static void QSufSortBucketSort(qsint_t* __restrict V, qsint_t* __restrict I, const qsint_t numChar, const qsint_t alphabetSize) +{ qsint_t i, c; qsint_t d; qsint_t groupNum; qsint_t currentIndex; // mark linked list empty - for (i=0; i #include #include -#include "bwt_gen.h" +#include #include "QSufSort.h" +typedef uint64_t bgint_t; +typedef int64_t sbgint_t; + +#define ALPHABET_SIZE 4 +#define BIT_PER_CHAR 2 +#define CHAR_PER_WORD 16 +#define CHAR_PER_BYTE 4 + +#define BITS_IN_WORD 32 +#define BITS_IN_BYTE 8 +#define BYTES_IN_WORD 4 + +#define ALL_ONE_MASK 0xFFFFFFFF +#define DNA_OCC_CNT_TABLE_SIZE_IN_WORD 65536 + +#define BITS_PER_OCC_VALUE 16 +#define OCC_VALUE_PER_WORD 2 +#define OCC_INTERVAL 256 +#define OCC_INTERVAL_MAJOR 65536 + +#define TRUE 1 +#define FALSE 0 + +#define BWTINC_INSERT_SORT_NUM_ITEM 7 + #define MIN_AVAILABLE_WORD 0x10000 +#define average(value1, value2) ( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 ) +#define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) ) +#define max(value1, value2) ( ((value1) > (value2)) ? (value1) : (value2) ) +#define med3(a, b, c) ( ac ? b : a>c ? c : a)) +#define swap(a, b, t); t = a; a = b; b = t; +#define truncateLeft(value, offset) ( (value) << (offset) >> (offset) ) +#define truncateRight(value, offset) ( (value) >> (offset) << (offset) ) +#define DNA_OCC_SUM_EXCEPTION(sum) ((sum & 0xfefefeff) == 0) + +typedef struct BWT { + bgint_t textLength; // length of the text + bgint_t inverseSa0; // SA-1[0] + bgint_t *cumulativeFreq; // cumulative frequency + unsigned int *bwtCode; // BWT code + unsigned int *occValue; // Occurrence values stored explicitly + bgint_t *occValueMajor; // Occurrence values stored explicitly + unsigned int *decodeTable; // For decoding BWT by table lookup + bgint_t bwtSizeInWord; // Temporary variable to hold the memory allocated + bgint_t occSizeInWord; // Temporary variable to hold the memory allocated + bgint_t occMajorSizeInWord; // Temporary variable to hold the memory allocated +} BWT; + +typedef struct BWTInc { + BWT *bwt; + unsigned int numberOfIterationDone; + bgint_t *cumulativeCountInCurrentBuild; + bgint_t availableWord; + float targetNBit; + bgint_t buildSize; + bgint_t initialMaxBuildSize; + bgint_t incMaxBuildSize; + unsigned int firstCharInLastIteration; + unsigned int *workingMemory; + unsigned int *packedText; + unsigned char *textBuffer; + unsigned int *packedShift; +} BWTInc; + static bgint_t TextLengthFromBytePacked(bgint_t bytePackedLength, unsigned int bitPerChar, unsigned int lastByteLength) { @@ -1477,7 +1540,7 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o void bwt_bwtgen(const char *fn_pac, const char *fn_bwt) { BWTInc *bwtInc; - bwtInc = BWTIncConstructFromPacked(fn_pac, 2.5, 10000000, 10000000); + bwtInc = BWTIncConstructFromPacked(fn_pac, 5.0, 10000000, 10000000); printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone); BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0); BWTIncFree(bwtInc); diff --git a/bwt_gen/bwt_gen.h b/bwt_gen/bwt_gen.h deleted file mode 100644 index 954c1c0..0000000 --- a/bwt_gen/bwt_gen.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - - BWTConstruct.h BWT-Index Construction - - This module constructs BWT and auxiliary data structures. - - Copyright (C) 2004, Wong Chi Kwong. - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License - as published by the Free Software Foundation; either version 2 - of the License, or (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - -*/ - -#ifndef BWT_GEN_H -#define BWT_GEN_H - -#include - -typedef uint64_t bgint_t; -typedef int64_t sbgint_t; - -#define ALPHABET_SIZE 4 -#define BIT_PER_CHAR 2 -#define CHAR_PER_WORD 16 -#define CHAR_PER_BYTE 4 - -#define BITS_IN_WORD 32 -#define BITS_IN_BYTE 8 -#define BYTES_IN_WORD 4 - -#define ALL_ONE_MASK 0xFFFFFFFF -#define DNA_OCC_CNT_TABLE_SIZE_IN_WORD 65536 - -#define BITS_PER_OCC_VALUE 16 -#define OCC_VALUE_PER_WORD 2 -#define OCC_INTERVAL 256 -#define OCC_INTERVAL_MAJOR 65536 - -#define TRUE 1 -#define FALSE 0 - -#define BWTINC_INSERT_SORT_NUM_ITEM 7 - -#define average(value1, value2) ( ((value1) & (value2)) + ((value1) ^ (value2)) / 2 ) -#define min(value1, value2) ( ((value1) < (value2)) ? (value1) : (value2) ) -#define max(value1, value2) ( ((value1) > (value2)) ? (value1) : (value2) ) -#define med3(a, b, c) ( ac ? b : a>c ? c : a)) -#define swap(a, b, t); t = a; a = b; b = t; -#define truncateLeft(value, offset) ( (value) << (offset) >> (offset) ) -#define truncateRight(value, offset) ( (value) >> (offset) << (offset) ) -#define DNA_OCC_SUM_EXCEPTION(sum) ((sum & 0xfefefeff) == 0) - -typedef struct BWT { - bgint_t textLength; // length of the text - bgint_t inverseSa0; // SA-1[0] - bgint_t *cumulativeFreq; // cumulative frequency - unsigned int *bwtCode; // BWT code - unsigned int *occValue; // Occurrence values stored explicitly - bgint_t *occValueMajor; // Occurrence values stored explicitly - unsigned int *decodeTable; // For decoding BWT by table lookup - bgint_t bwtSizeInWord; // Temporary variable to hold the memory allocated - bgint_t occSizeInWord; // Temporary variable to hold the memory allocated - bgint_t occMajorSizeInWord; // Temporary variable to hold the memory allocated -} BWT; - -typedef struct BWTInc { - BWT *bwt; - unsigned int numberOfIterationDone; - bgint_t *cumulativeCountInCurrentBuild; - bgint_t availableWord; - float targetNBit; - bgint_t buildSize; - bgint_t initialMaxBuildSize; - bgint_t incMaxBuildSize; - unsigned int firstCharInLastIteration; - unsigned int *workingMemory; - unsigned int *packedText; - unsigned char *textBuffer; - unsigned int *packedShift; -} BWTInc; - -#endif