调整了计算read group的方式,当read group只有一个时,不需要计算
This commit is contained in:
parent
745963831d
commit
985875ebac
|
|
@ -5,5 +5,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|||
# set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread")
|
||||
# set(CMAKE_BUILD_TYPE Debug)
|
||||
# set(CMAKE_BUILD_TYPE Release)
|
||||
# add_definitions(-DSHOW_PERF=1)
|
||||
add_definitions(-DSHOW_PERF)
|
||||
#add_definitions(-DSHOW_PERF=1)
|
||||
add_subdirectory(src)
|
||||
|
|
|
|||
|
|
@ -28,7 +28,8 @@ struct BQSRArg {
|
|||
|
||||
int NUM_THREADS = 1;
|
||||
|
||||
size_t MAX_MEM = ((size_t)1) << 30; // 1G
|
||||
size_t MAX_MEM = ((size_t)1 << 30);
|
||||
// + ((size_t)1 << 29); // 1G 应该按照每个线程多少内存来设置
|
||||
|
||||
bool DUPLEX_IO = true; //
|
||||
|
||||
|
|
|
|||
|
|
@ -121,18 +121,21 @@ void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCo
|
|||
|
||||
// ReadGroupCovariate 协变量的方法
|
||||
void ReadGroupCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues) {
|
||||
uint8_t *rgStr = bam_aux_get(sd.bw->b, "RG");
|
||||
char* rgVal = nullptr;
|
||||
if (rgStr) rgVal = bam_aux2Z(rgStr);
|
||||
int key = 0;
|
||||
if (rgVal == nullptr || RgToId.find(rgVal) == RgToId.end()) {
|
||||
spdlog::error("The RG tag value for read can not be found in header!");
|
||||
} else {
|
||||
key = RgToId[rgVal];
|
||||
}
|
||||
for (int i = 0; i < sd.read_len; ++i) {
|
||||
CovariateUtils::SetReadGroup(key, key, key, i, values);
|
||||
}
|
||||
if (RgToId.size() > 1) { // 只有当bam文件有多个read group时,再读取每个read的RG tag
|
||||
uint8_t* rgStr = bam_aux_get(sd.bw->b, "RG");
|
||||
char* rgVal = nullptr;
|
||||
if (rgStr)
|
||||
rgVal = bam_aux2Z(rgStr);
|
||||
if (rgVal == nullptr || RgToId.find(rgVal) == RgToId.end()) {
|
||||
spdlog::error("The RG tag value for read can not be found in header!");
|
||||
} else {
|
||||
key = RgToId[rgVal];
|
||||
for (int i = 0; i < sd.read_len; ++i) {
|
||||
CovariateUtils::SetReadGroup(key, key, key, i, values);
|
||||
}
|
||||
}
|
||||
} // 因为read group默认就是0,所以只有一个read group时,不需要设置
|
||||
}
|
||||
|
||||
// BaseQualityCovariate 协变量的方法
|
||||
|
|
@ -332,11 +335,9 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar
|
|||
const int originalReadLength = sd.read_len;
|
||||
|
||||
// store the original bases and then write Ns over low quality ones
|
||||
// string strandedClippedBases(sd.bases.arr.data(), sd.read_len);
|
||||
auto &strandedClippedBases = sd.strandedClippedBases;
|
||||
strandedClippedBases.copy(sd.bases);
|
||||
// GetStrandedClippedBytes(bw, sd, strandedClippedBases, 30); // 注意这里的lowQualTail数值
|
||||
GetStrandedClippedBytes(sd, strandedClippedBases, lowQualTail); // 命名我之前看到过这个30的?
|
||||
GetStrandedClippedBytes(sd, strandedClippedBases, lowQualTail); // 明明我之前看到过这个30的?
|
||||
// spdlog::info("bases: {}", strandedClippedBases);
|
||||
auto& nBasePairContextAtEachCycle = sd.nBasePairContextAtEachCycle;
|
||||
GetReadContextAtEachPosition(strandedClippedBases, mismatchesContextSize, mismatchesKeyMask, nBasePairContextAtEachCycle);
|
||||
|
|
@ -346,6 +347,7 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar
|
|||
// this is necessary to ensure that we don't keep historical data in the ReadCovariates values
|
||||
// since the context covariate may not span the entire set of values in read covariates
|
||||
// due to the clipping of the low quality bases
|
||||
// 这段代码应该不会执行,因为clip with N不会改变read长度
|
||||
if (readLengthAfterClipping != originalReadLength) {
|
||||
// don't bother zeroing out if we are going to overwrite the whole array
|
||||
for (int i = 0; i < originalReadLength; i++) {
|
||||
|
|
|
|||
|
|
@ -116,10 +116,6 @@ struct RecalFuncs {
|
|||
// int idx = 0;
|
||||
PROF_START(TP_read_vcf);
|
||||
for (auto& vcf : vcfs) {
|
||||
// 为啥多线程环境会出现,deque的front和[0]不一样?好像是调试的时候的问题,实际运行时没再出现
|
||||
// if (vcf.knownSites.front().left != vcf.knownSites[0].left || vcf.knownSites.front().right != vcf.knownSites[0].right)
|
||||
// spdlog::error("front is different with [0]: {} - {}", vcf.knownSites.front().left, vcf.knownSites[0].left);
|
||||
|
||||
if (!vcf.knownSites.empty() && vcf.knownSites.back().left > endPos) {// 此时vcf的区域包含bam,不需要读取
|
||||
// 清理旧的interval,只有此时才有清理的必要
|
||||
while (!vcf.knownSites.empty()) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue