FastBQSR/src/util/vcf_parser.h

53 lines
1.4 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

/*
Description: 解析knownsites vcf文件的类
Copyright : All right reserved by ICT
Author : Zhang Zhonghai
Date : 2025/12/24
*/
#pragma once
#include <htslib/sam.h>
#include <deque>
#include <fstream>
#include <string>
#include "interval.h"
#include "linear_index.h"
using std::deque;
using std::ifstream;
using std::string;
// 需要支持vcf idxtbicsi三种索引方式
// vcf和idx是一对
// vcf.gz和tbi或csi是一对
// 解析knownSites
struct VCFParser {
deque<Interval> knownSites; // 已知的变异位点
char* buf = nullptr; // // 数据buffer
uint32_t bufLen = 4 * 1024; // 数据buffer长度
LinearIndex index; // vcf文件索引
ifstream inStm; // vcf文件流
VCFParser() { Init(); }
VCFParser(const string& vcfFileName) { Init(vcfFileName); }
VCFParser(const string& vcfFileName, sam_hdr_t* samHeader) { Init(vcfFileName, samHeader); }
void Init() { buf = (char*)malloc(bufLen); }
void Init(const string& vcfFileName) {
Init();
inStm.open(vcfFileName, ifstream::in);
string idxFileName = vcfFileName + ".idx";
if (!index.ReadIndex(idxFileName))
error("[%s] fail to load the %s index file\n", __func__, idxFileName.c_str());
}
void Init(const string& vcfFileName, sam_hdr_t* samHeader) {
index.SetHeader(samHeader);
Init(vcfFileName);
}
};