From bcb8b420baaec126da0d423f93d166db7da8491b Mon Sep 17 00:00:00 2001 From: zzh Date: Thu, 17 Apr 2025 18:29:12 +0800 Subject: [PATCH] =?UTF-8?q?=E6=AD=A3=E5=B8=B8=E8=AF=BB=E5=AE=8C=E6=96=87?= =?UTF-8?q?=E4=BB=B6=EF=BC=8C=E7=84=B6=E5=90=8E=E9=80=80=E5=87=BA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/sort/sam_io.cpp | 2 +- src/sort/sort.cpp | 16 +++++++++++++--- src/sort/sort_impl.h | 2 +- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/sort/sam_io.cpp b/src/sort/sam_io.cpp index a9d5ac0..775ca72 100644 --- a/src/sort/sam_io.cpp +++ b/src/sort/sam_io.cpp @@ -59,7 +59,7 @@ void parseSamHeader(FILE *fp, HeaderBuf &hdrBuf) { blockLen = unpackInt16(&fBuf[16]) + 1; readState = fread(&fBuf[BLOCK_HEADER_LENGTH], 1, blockLen - BLOCK_HEADER_LENGTH, fp); header = sam_hdr_init(); - + spdlog::info("Header file size: {}", blockLen); uint32_t crc = le_to_u32(fBuf + blockLen - 8); int ret = bgzfUncompress(uData.data, &dlen, (Bytef *)fBuf + 18, blockLen - 18, crc); diff --git a/src/sort/sort.cpp b/src/sort/sort.cpp index acd66e2..fa5432b 100644 --- a/src/sort/sort.cpp +++ b/src/sort/sort.cpp @@ -269,6 +269,7 @@ static void *nonBlockingUncompress(void *data) { else ((SortData *)data)->round++; + // spdlog::info("block arr size: {}", ((SortData *)data)->blockAddrArr->size()); kt_for(nsgv::gSortArg.NUM_THREADS, mtUncompressBlock, data, ((SortData *)data)->blockAddrArr->size()); // kt_for(nsgv::gSortArg.NUM_THREADS, mtUncompressBlock, data, nsgv::gSortArg.NUM_THREADS); PROF_G_END(uncompress); @@ -343,13 +344,16 @@ int doSort() { // 测试读取bam的速度 + int max_bam_len = 0; bam1_t *bamp = bam_init1(); while (sam_read1(inBamFp, inBamHdr, bamp) >= 0) { + if (max_bam_len < bamp->l_data) max_bam_len = bamp->l_data; if (bamp->l_data > 1000) { spdlog::info("large record len: {}", bamp->l_data); } } sam_close(inBamFp); + spdlog::info("max record len: {}", max_bam_len); return 0; #endif @@ -365,6 +369,8 @@ int doSort() { // 打开文件 FILE *fpr = fopen(nsgv::gSortArg.INPUT_FILE.c_str(), "rb"); + // threadRead(fpr); exit(0); + parseSamHeader(fpr, nsgv::gInHdr); // FILE *fpw = fopen(nsgv::gSortArg.OUTPUT_FILE.c_str(), "rb"); @@ -449,6 +455,7 @@ int doSort() { PROF_G_END(read); while (readState > 0) { + //spdlog::info("readState-0: {}", readState); // while (readState > 0) { PROF_G_BEG(parse_block); curStartAddrArr->clear(); @@ -477,6 +484,7 @@ int doSort() { // exit(0); READ_BLOCKS; + // spdlog::info("read block len: {}", blockLen); // spdlog::info("cur block size: {}", curStartAddrArr->size()); @@ -513,12 +521,14 @@ int doSort() { SWITCH_POINTER; PROF_G_BEG(read); - // readState = fread(curBuf, 1, READ_BUFSIZE, fpr); + readState = fread(curBuf, 1, READ_BUFSIZE, fpr); + //spdlog::info("readState: {}", readState); PROF_G_BEG(mem_copy); - memcpy(curBuf, fbuf[4], READ_BUFSIZE); readState = READ_BUFSIZE; + // memcpy(curBuf, fbuf[4], readState); readState = READ_BUFSIZE; + memcpy(curBuf, fbuf[4], readState); PROF_G_END(mem_copy); PROF_G_END(read); - if (fsize >= 6245369164) break; + // if (fsize >= 6245369164) break; } pthread_join(uncompressTid, NULL); PROF_G_END(mid_all); diff --git a/src/sort/sort_impl.h b/src/sort/sort_impl.h index ebe7e11..6948de2 100644 --- a/src/sort/sort_impl.h +++ b/src/sort/sort_impl.h @@ -20,7 +20,7 @@ struct TemplateCoordinateKey { // Struct which contains the a record, and the pointer to the sort tag (if any) or // a combined ref / position / strand. // Used to speed up sorts (coordinate, by-tag, and template-coordinate). -typedef struct BamSortTag { +struct BamSortTag { bam1_t *bam_record; union { const uint8_t *tag;