more carefully computed the availableWords

This commit is contained in:
Heng Li 2011-10-20 23:49:03 -04:00
parent ec307a10e6
commit 66629512d9
2 changed files with 14 additions and 23 deletions

View File

@ -345,17 +345,14 @@ BWT *BWTCreate(const bgint_t textLength, unsigned int *decodeTable)
return bwt;
}
BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit,
const unsigned int initialMaxBuildSize, const unsigned int incMaxBuildSize)
BWTInc *BWTIncCreate(const bgint_t textLength, unsigned int initialMaxBuildSize, unsigned int incMaxBuildSize)
{
BWTInc *bwtInc;
unsigned int i;
unsigned int i, n_iter;
if (textLength < incMaxBuildSize) incMaxBuildSize = textLength;
if (textLength < initialMaxBuildSize) initialMaxBuildSize = textLength;
if (targetNBit == 0) {
fprintf(stderr, "BWTIncCreate() : targetNBit = 0!\n");
exit(1);
}
bwtInc = (BWTInc*)calloc(1, sizeof(BWTInc));
bwtInc->numberOfIterationDone = 0;
bwtInc->bwt = BWTCreate(textLength, NULL);
@ -369,16 +366,15 @@ BWTInc *BWTIncCreate(const unsigned int textLength, const float targetNBit,
for (i=0; i<CHAR_PER_WORD; i++)
bwtInc->packedShift[i] = BITS_IN_WORD - (i+1) * BIT_PER_CHAR;
bwtInc->availableWord = (bgint_t)((textLength + OCC_INTERVAL - 1) / OCC_INTERVAL * OCC_INTERVAL / BITS_IN_WORD * targetNBit);
n_iter = (textLength - initialMaxBuildSize) / incMaxBuildSize + 1;
bwtInc->availableWord = BWTResidentSizeInWord(textLength) + BWTOccValueMinorSizeInWord(textLength) // minimal memory requirement
+ OCC_INTERVAL / BIT_PER_CHAR * n_iter * 2 * (sizeof(bgint_t) / 4) // buffer at the end of occ array
+ incMaxBuildSize/5 * 3 * (sizeof(bgint_t) / 4); // space for the 3 temporary arrays in each iteration
if (bwtInc->availableWord < MIN_AVAILABLE_WORD) bwtInc->availableWord = MIN_AVAILABLE_WORD; // lh3: otherwise segfaul when availableWord is too small
if (bwtInc->availableWord < BWTResidentSizeInWord(textLength) + BWTOccValueMinorSizeInWord(textLength)) {
fprintf(stderr, "BWTIncCreate() : targetNBit is too low!\n");
exit(1);
}
fprintf(stderr, "[%s] textLength=%ld, availableWord=%ld\n", __func__, (long)textLength, (long)bwtInc->availableWord);
bwtInc->workingMemory = (unsigned*)calloc(bwtInc->availableWord, BYTES_IN_WORD);
return bwtInc;
}
// for BWTIncConstruct()
static void BWTIncPutPackedTextToRank(const unsigned int *packedText, bgint_t* __restrict rank,
@ -699,7 +695,7 @@ static void BWTIncSortKey(bgint_t* __restrict key, bgint_t* __restrict seq, cons
int stackDepth;
int64_t i, j;
bgint_t tempSeq, tempKey;
int numberOfEqualKey;
int64_t numberOfEqualKey;
if (numItem < 2) return;
@ -1432,8 +1428,7 @@ static void BWTIncConstruct(BWTInc *bwtInc, const bgint_t numChar)
}
BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetNBit,
bgint_t initialMaxBuildSize, bgint_t incMaxBuildSize)
BWTInc *BWTIncConstructFromPacked(const char *inputFileName, bgint_t initialMaxBuildSize, bgint_t incMaxBuildSize)
{
FILE *packedFile;
@ -1457,7 +1452,7 @@ BWTInc *BWTIncConstructFromPacked(const char *inputFileName, const float targetN
fread(&lastByteLength, sizeof(unsigned char), 1, packedFile);
totalTextLength = TextLengthFromBytePacked(packedFileLen, BIT_PER_CHAR, lastByteLength);
bwtInc = BWTIncCreate(totalTextLength, targetNBit, initialMaxBuildSize, incMaxBuildSize);
bwtInc = BWTIncCreate(totalTextLength, initialMaxBuildSize, incMaxBuildSize);
BWTIncSetBuildSizeAndTextAddr(bwtInc);
@ -1545,7 +1540,7 @@ void BWTSaveBwtCodeAndOcc(const BWT *bwt, const char *bwtFileName, const char *o
void bwt_bwtgen(const char *fn_pac, const char *fn_bwt)
{
BWTInc *bwtInc;
bwtInc = BWTIncConstructFromPacked(fn_pac, 3.7, 10000000, 10000000);
bwtInc = BWTIncConstructFromPacked(fn_pac, 10000000, 10000000);
printf("[bwt_gen] Finished constructing BWT in %u iterations.\n", bwtInc->numberOfIterationDone);
BWTSaveBwtCodeAndOcc(bwtInc->bwt, fn_bwt, 0);
BWTIncFree(bwtInc);

View File

@ -100,10 +100,6 @@ int bwa_index(int argc, char *argv[])
fprintf(stderr, "%.2f sec\n", (float)(clock() - t) / CLOCKS_PER_SEC);
}
}
if (l_pac > 0xffffffffu) {
fprintf(stderr, "[%s] BWA only works with reference sequences shorter than 4GB in total. Abort!\n", __func__);
return 1;
}
if (algo_type == 0) algo_type = l_pac > 50000000? 2 : 3; // set the algorithm for generating BWT
{
strcpy(str, prefix); strcat(str, ".pac");