/* Description: 读入sam/bam时,开辟一个大的buf,存放这些数据 Copyright : All right reserved by ICT Author : Zhang Zhonghai Date : 2019/11/27 */ #include "bam_buf.h" /* * BamBuf类 */ // 读取数据直到读完,或者缓冲区满 int BamBuf::ReadBam() { int read_num = 0; if (handle_last) { // 处理上次读入的最后一个bam if (has_enough_space()) { // 必须调用,在边界处调整memffset ++read_num; append_one_bam(); } else { return read_num; // 还是没空间 } } while (read_stat_ >= 0 && (read_stat_ = sam_read1(fp, hdr, bw->b)) >= 0) { bw->end_pos_ = BamWrap::BamEndPos(bw->b); if (has_enough_space()) { // 还有空间 append_one_bam(); ++read_num; // 放进缓存才算读取到 } else { break; } } if (read_stat_ >= 0) { handle_last = true; } else { handle_last = false; } return read_num; } // 初始化缓存 void BamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) { this->fp = fp; this->hdr = hdr; this->mem_size = mem_size; this->mem = (uint8_t *)malloc(mem_size); this->bw = (BamWrap *)malloc(sizeof(BamWrap)); this->bw->b = bam_init1(); if (bw == NULL || this->mem == NULL || this->bw->b == NULL) { fprintf(stderr, "allocate memory failed! Abort\n"); exit(-1); } } void BamBuf::ClearBeforeIdx(size_t idxInBv) { if (idxInBv < 1) return; int i = 0, j = idxInBv; for (; j < bv.size(); ++i, ++j) { bv[i] = bv[j]; } bv.resize(i); prepare_read(); } void BamBuf::ClearAll() { bv.clear(); prepare_read(); } // 为下一次读取做准备, 计算一些边界条件 inline void BamBuf::prepare_read() { // 计算余留的下次计算可能用到的bam所占的位置 if (bv.size() > 0) { BamWrap *bw = bv[0]; legacy_start = (int64_t)bw - (int64_t)mem; bw = bv.back(); legacy_end = (int64_t)bw + bw->length() - (int64_t)mem; } else { legacy_start = legacy_end = 0; mem_offset = 0; // 上次没剩下,那就从头存储 } } // 检查缓存是否还有空间 inline bool BamBuf::has_enough_space() { const uint32_t bam_len = bw->length(); int64_t potential_end = mem_offset + bam_len; if (legacy_start <= legacy_end) legacy_start += mem_size; if (potential_end >= legacy_start) { return false; } if (potential_end >= mem_size) { mem_offset = 0; } int64_t virtual_offset = mem_offset; if (virtual_offset < legacy_end) virtual_offset += mem_size; potential_end = virtual_offset + bam_len; return potential_end < legacy_start; } // 处理一个读取后的bam inline void BamBuf::append_one_bam() { BamWrap *bwp = (BamWrap *)(mem + mem_offset); *bwp = *bw; bwp->b = (bam1_t *)((char *)bwp + sizeof(*bwp)); bam1_t *bp = bwp->b; *bp = *bw->b; bp->data = (uint8_t *)((char *)bwp->b + sizeof(bam1_t)); memcpy(bp->data, bw->b->data, bw->b->l_data); // 更新下次存储的位置 mem_offset = (mem_offset + bw->length() + 8 - 1) & ~((size_t)(8 - 1)); // cout << "size: " << bv.size() << " " << buf_name << endl; bv.push_back(bwp); } // 处理上次读入的最后一个read inline bool BamBuf::handle_last_read() { if (handle_last) { // 处理上次读入的最后一个bam if (has_enough_space()) { // 必须调用,在边界处调整memffset append_one_bam(); handle_last = false; return true; } } return false; } /* * AsyncIoBamBuf 类 */ // 初始化缓存 void AsyncIoBamBuf::Init(samFile *fp, sam_hdr_t *hdr, int64_t mem_size) { if (use_async_io_) { buf1_.Init(fp, hdr, mem_size >> 1); buf2_.Init(fp, hdr, mem_size >> 1); pi_ = &buf1_; po_ = &buf2_; tid_ = (pthread_t *)malloc(sizeof(pthread_t)); } else { buf1_.Init(fp, hdr, mem_size); pi_ = &buf1_; } } // 读取数据 int AsyncIoBamBuf::ReadBam() { if (use_async_io_) { return async_read_bam(); } else { return sync_read_bam(); } } int AsyncIoBamBuf::sync_read_bam() { int read_num = 0; if (clear_all_) { clear_all_ = false; pi_->ClearAll(); } else if (clear_before_idx_ > 0) { pi_->ClearBeforeIdx(clear_before_idx_); clear_before_idx_ = 0; } read_num = pi_->ReadBam(); refresh_bam_arr(); return read_num; } int AsyncIoBamBuf::async_read_bam() { int read_num = 0; if (first_read_) { read_num = pi_->ReadBam(); first_read_ = false; refresh_bam_arr(); } else { // join, 交换缓冲区指针 pthread_join(*tid_, 0); resize_buf(); if (need_read_) { // 需要交换指针 BamBuf *tmp = pi_; pi_ = po_; po_ = tmp; } read_num = last_read_num_; refresh_bam_arr(); } // 异步读 pthread_create(tid_, 0, async_read, this); return read_num; } void *AsyncIoBamBuf::async_read(void *data) { AsyncIoBamBuf *ab = (AsyncIoBamBuf *)data; if (ab->need_read_ && ab->ReadStat() >= 0) { // 需要读取 ab->last_read_num_ = ab->po_->ReadBam(); } else { ab->last_read_num_ = 0; } pthread_exit(0); } // 为下一次读取做准备, 计算一些边界条件,延迟操作,因为此时可能po_对应的buf正在读取 void AsyncIoBamBuf::ClearBeforeIdx(size_t idxInBv) { clear_before_idx_ = idxInBv; } // 清空上一次所有读入的数据,延迟操作,因为此时可能po_对应的buf正在读取 void AsyncIoBamBuf::ClearAll() { clear_all_ = true; } inline void AsyncIoBamBuf::resize_buf() { if (clear_all_) { // 清理上一轮的数据 clear_all_ = false; po_->ClearBeforeIdx(legacy_size_); pi_->ClearAll(); if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存 { last_read_num_ += 1; legacy_size_ = pi_->Size(); // 应该只有一个read need_read_ = true; } else // 没空间存放,则不交换指针,或者文件已经读取完毕 { legacy_size_ = 0; need_read_ = false; } } else if (clear_before_idx_ > 0) { if (clear_before_idx_ < legacy_size_) { po_->ClearBeforeIdx(clear_before_idx_); legacy_size_ -= clear_before_idx_; // 不需要交换指针,不需要读取 need_read_ = false; } else { po_->ClearBeforeIdx(legacy_size_); pi_->ClearBeforeIdx(clear_before_idx_ - legacy_size_); if (pi_->handle_last_read()) // 上次读取有一个read没放入缓存 { last_read_num_ += 1; legacy_size_ = pi_->Size(); // 应该只有一个read need_read_ = true; } else // 没空间存放,则不交换指针,或者文件已经读取完毕 { legacy_size_ = 0; need_read_ = false; } } clear_before_idx_ = 0; } }