diff --git a/src/bqsr/aux_arg.cpp b/src/bqsr/aux_arg.cpp new file mode 100644 index 0000000..2a6baf6 --- /dev/null +++ b/src/bqsr/aux_arg.cpp @@ -0,0 +1,3 @@ +#include "aux_arg.h" + +int64_t AuxVar::processedReads = 0; \ No newline at end of file diff --git a/src/bqsr/aux_arg.h b/src/bqsr/aux_arg.h index 1649443..f89a7c9 100644 --- a/src/bqsr/aux_arg.h +++ b/src/bqsr/aux_arg.h @@ -15,13 +15,21 @@ #include "util/vcf_parser.h" #include "recal_tables.h" +#include "covariate.h" +#include "util/bam_wrap.h" +#include "util/sam_data.h" +#include "util/stable_array.h" +#include "util/bam_buf.h" using std::vector; // 解析后的一些参数,文件,数据等 struct AuxVar { - const static int REF_CONTEXT_PAD = 3; // 需要做一些填充 - const static int REFERENCE_HALF_WINDOW_LENGTH = 150; // 需要额外多取出一些ref序列,防止边界效应 + //const static int REF_CONTEXT_PAD = 3; // 需要做一些填充 + //const static int REFERENCE_HALF_WINDOW_LENGTH = 150; // 需要额外多取出一些ref序列,防止边界效应 + + static constexpr int BAM_BLOCK_NUM = 1000; // 每个线程每次处理1k个bam记录 + static int64_t processedReads; sam_hdr_t* header = nullptr; // bam header faidx_t* faidx = nullptr; // reference index @@ -29,7 +37,16 @@ struct AuxVar { int ref_len = 0; // reference sequence length int offset = 0; // 在要求的ref序列两边,多余取出的碱基数量 + BamArray *bamArr = nullptr; // bam数据数组 + vector vcfArr; // 从vcf中获取已知位点 - RecalTables recalTables; // 每个线程对应一个recal tables + PerReadCovariateMatrix readCovariates; // 每个read对应的协变量矩阵 + RecalTables recalTables; // 每个线程对应一个recal tables + + SamData sd; + StableArray isSNP, isIns, isDel; // 该位置是否是SNP, indel位置,0不是,1是 + StableArray baqArray; + StableArray snpErrors, insErrors, delErrors; + StableArray skips; // 该位置是否是已知位点 }; \ No newline at end of file diff --git a/src/bqsr/bqsr_entry.cpp b/src/bqsr/bqsr_entry.cpp index c478953..3c4ae7c 100644 --- a/src/bqsr/bqsr_entry.cpp +++ b/src/bqsr/bqsr_entry.cpp @@ -13,6 +13,7 @@ Date : 2023/10/23 #include #include #include +#include #include #include @@ -110,14 +111,21 @@ void roundTableValues(RecalTables& rt) { } // 串行bqsr -int SerialBQSR() { +int SerialBQSR(AuxVar &aux) { BamBufType inBamBuf(nsgv::gBqsrArg.DUPLEX_IO); inBamBuf.Init(nsgv::gInBamFp, nsgv::gInBamHeader, nsgv::gBqsrArg.MAX_MEM, bqsrReadFilterOut); int64_t readNumSum = 0; int round = 0; PerReadCovariateMatrix readCovariates; CovariateUtils::InitPerReadCovMat(readCovariates); - RecalTables& recalTables = nsgv::gAuxVars[0].recalTables; + RecalTables& recalTables = aux.recalTables; + + SamData& sd = aux.sd; + StableArray&isSNP = aux.isSNP, &isIns = aux.isIns, &isDel = aux.isDel; // 该位置是否是SNP, indel位置,0不是,1是 + StableArray &baqArray = aux.baqArray; + StableArray &snpErrors = aux.snpErrors, &insErrors = aux.insErrors, &delErrors = aux.delErrors; + StableArray &skips = aux.skips; // 该位置是否是已知位点 + while (true) { ++round; // 一. 读取bam数据 @@ -131,12 +139,6 @@ int SerialBQSR() { spdlog::info("{} reads processed in {} round", readNum, round); // 二. 遍历每个bam(read)记录,进行处理 - SamData sd; - StableArray isSNP, isIns, isDel; // 该位置是否是SNP, indel位置,0不是,1是 - StableArray baqArray; - StableArray snpErrors, insErrors, delErrors; - StableArray skips; // 该位置是否是已知位点 - for (int i = 0; i < bams.size(); ++i) { // 1. 对每个read,需要检查cigar是否合法,即没有两个连续的相同的cigar,而且需要将首尾的deletion处理掉,目前看好像没啥影响,我们忽略这一步 // 2. 对质量分数长度跟碱基长度不匹配的read,缺少的质量分数用默认值补齐,先忽略,后边有需要再处理 @@ -164,7 +166,7 @@ int SerialBQSR() { PROF_END(gprof[GP_clip_read], clip_read); // 6. 更新每个read的platform信息,好像没啥用,暂时忽略 - const int nErrors = RecalFuncs::calculateIsSNPOrIndel(nsgv::gAuxVars[0], sd, isSNP, isIns, isDel); + const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel); /*fprintf(gf[0], "%d\t", sd.read_len); for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[0], "%d ", isSNP[ii]); @@ -194,7 +196,7 @@ int SerialBQSR() { // 8. 计算这条read对应的协变量 PROF_START(covariate); - CovariateUtils::ComputeCovariates(sd, nsgv::gInBamHeader, readCovariates, true); + CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true); PROF_END(gprof[GP_covariate], covariate); // fprintf(gf[3], "%ld %ld %d %ld\n", sd.rid, readCovariates.size(), sd.read_len, readCovariates[0][0].size()); @@ -220,7 +222,7 @@ int SerialBQSR() { // 9. 计算这条read需要跳过的位置 PROF_START(read_vcf); - RecalFuncs::calculateKnownSites(sd, nsgv::gAuxVars[0].vcfArr, nsgv::gInBamHeader, skips); + RecalFuncs::calculateKnownSites(sd, aux.vcfArr, aux.header, skips); for (int ii = 0; ii < sd.read_len; ++ii) { skips[ii] = skips[ii] || (ContextCovariate::baseIndexMap[sd.bases[ii]] == -1) || sd.base_quals[ii] < nsgv::gBqsrArg.PRESERVE_QSCORES_LESS_THAN; @@ -264,6 +266,109 @@ int SerialBQSR() { return 0; } +static void thread_worker(void* data, long idx, int tid) { + AuxVar& aux = ((AuxVar*)data)[tid]; + auto& readCovariates = aux.readCovariates; + RecalTables& recalTables = aux.recalTables; + SamData& sd = aux.sd; + StableArray&isSNP = aux.isSNP, &isIns = aux.isIns, &isDel = aux.isDel; // 该位置是否是SNP, indel位置,0不是,1是 + StableArray& baqArray = aux.baqArray; + StableArray&snpErrors = aux.snpErrors, &insErrors = aux.insErrors, &delErrors = aux.delErrors; + StableArray& skips = aux.skips; // 该位置是否是已知位点 + auto &bams = *aux.bamArr; + int startIdx = idx * aux.BAM_BLOCK_NUM; + int stopIdx = std::min((size_t)(idx + 1) * aux.BAM_BLOCK_NUM, bams.size()); + for (int i = startIdx; i < stopIdx; ++i) { + BamWrap* bw = bams[i]; + sd.init(); + sd.parseBasic(bw); + sd.rid = i + aux.processedReads; + if (sd.read_len <= 0) continue; + + //PROF_START(clip_read); + ReadTransformer::hardClipAdaptorSequence(bw, sd); + if (sd.read_len <= 0) continue; + ReadTransformer::hardClipSoftClippedBases(bw, sd); + if (sd.read_len <= 0) continue; + sd.applyTransformations(); + // PROF_END(gprof[GP_clip_read], clip_read); + + const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel); + + bool baqCalculated = false; + if (nErrors == 0 || !nsgv::gBqsrArg.enableBAQ) { + baqCalculated = BAQ::flatBAQArray(sd, baqArray); + } else { + // baqCalculated = calculateBAQArray(nsgv::gAuxVars[0], baq, sd, baqArray); + } + if (!baqCalculated) continue; + + // PROF_START(covariate); + CovariateUtils::ComputeCovariates(sd, aux.header, readCovariates, true); + // PROF_END(gprof[GP_covariate], covariate); + + // PROF_START(read_vcf); + RecalFuncs::calculateKnownSites(sd, aux.vcfArr, aux.header, skips); + for (int ii = 0; ii < sd.read_len; ++ii) { + skips[ii] = + skips[ii] || (ContextCovariate::baseIndexMap[sd.bases[ii]] == -1) || sd.base_quals[ii] < nsgv::gBqsrArg.PRESERVE_QSCORES_LESS_THAN; + } + // PROF_GP_END(read_vcf); + + // PROF_START(frac_err); + RecalFuncs::calculateFractionalErrorArray(isSNP, baqArray, snpErrors); + RecalFuncs::calculateFractionalErrorArray(isIns, baqArray, insErrors); + RecalFuncs::calculateFractionalErrorArray(isDel, baqArray, delErrors); + // PROF_GP_END(frac_err); + + ReadRecalInfo info(sd, readCovariates, skips, snpErrors, insErrors, delErrors); + + //PROF_START(update_info); + RecalUtils::updateRecalTablesForRead(info, recalTables); + //PROF_END(gprof[GP_update_info], update_info); + } +} + +// 并行bqsr +int ParallelBQSR(vector& auxArr) { + BamBufType inBamBuf(nsgv::gBqsrArg.DUPLEX_IO); + inBamBuf.Init(nsgv::gInBamFp, nsgv::gInBamHeader, nsgv::gBqsrArg.MAX_MEM, bqsrReadFilterOut); + int64_t readNumSum = 0; + int round = 0; + + while (true) { + ++round; + // 一. 读取bam数据 + size_t readNum = 0; + if (inBamBuf.ReadStat() >= 0) readNum = inBamBuf.ReadBam(); + if (readNum < 1) { break; } + auto bams = inBamBuf.GetBamArr(); + for_each(auxArr.begin(), auxArr.end(), [&](AuxVar& aux) { + aux.bamArr = &bams; + }); + spdlog::info("{} reads processed in {} round", readNum, round); + + kt_for(nsgv::gBqsrArg.NUM_THREADS, thread_worker, auxArr.data(), (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM); + + readNumSum += readNum; + AuxVar::processedReads += readNum; + inBamBuf.ClearAll(); // + } + spdlog::info("read count: {}", readNumSum); + +// // 12. 创建总结数据 +// collapseQualityScoreTableToReadGroupTable(recalTables.readGroupTable, recalTables.qualityScoreTable); +// roundTableValues(recalTables); +// +// // 13. 量化质量分数 +// QuantizationInfo quantInfo(recalTables, nsgv::gBqsrArg.QUANTIZING_LEVELS); +// +// // 14. 输出结果 +// RecalUtils::outputRecalibrationReport(nsgv::gBqsrArg, quantInfo, recalTables); +// + return 0; +} + // 在进行数据处理之前,初始化一些全局数据 static void globalInit() { open_debug_files(); @@ -277,7 +382,8 @@ static void globalInit() { nsgv::gInBamHeader = sam_hdr_read(nsgv::gInBamFp); // header /* 并行读取bam数据 */ - htsThreadPool htsPoolRead = {NULL, 0}; // , + htsThreadPool htsPoolRead = {NULL, 0}; // + int readThreadNum = min(nsgv::gBqsrArg.NUM_THREADS, 8); htsPoolRead.pool = hts_tpool_init(nsgv::gBqsrArg.NUM_THREADS); if (!htsPoolRead.pool ) { spdlog::error("[{}] failed to set up thread pool", __LINE__); @@ -313,6 +419,7 @@ static void globalInit() { } // 注意初始化顺序,这个必须在协变量初始化之后,因为需要用到MaximumKeyValue nsgv::gAuxVars[i].recalTables.init(nsgv::gInBamHeader->hrecs->nrg); + CovariateUtils::InitPerReadCovMat(nsgv::gAuxVars[i].readCovariates); } // 0. 初始化一些全局数据 @@ -348,7 +455,8 @@ int BaseRecalibrator() { PROF_START(whole_process); globalInit(); - ret = SerialBQSR(); // 串行处理数据,生成recal table + // ret = SerialBQSR(nsgv::gAuxVars[0]); // 串行处理数据,生成recal table + ret = ParallelBQSR(nsgv::gAuxVars); // 串行处理数据,生成recal table globalDestroy(); sam_close(nsgv::gInBamFp); PROF_END(gprof[GP_whole_process], whole_process); diff --git a/src/bqsr/covariate.cpp b/src/bqsr/covariate.cpp index 0f02b62..2cba189 100644 --- a/src/bqsr/covariate.cpp +++ b/src/bqsr/covariate.cpp @@ -31,6 +31,85 @@ void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCo CycleCovariate::RecordValues(sd, header, values, recordIndelValues); } +/* +// kernel fusion +void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues) { +#define __bq_set_all_cov_indel(ins, del) \ + do { \ + for (int i = 0; i < sd.read_len; ++i) { \ + const int vi = negativeStrand ? sd.read_len - 1 - i : i; \ + const int cycleM = CycleCovariate::CycleKey(sd, i, false, CycleCovariate::MAXIMUM_CYCLE_VALUE); \ + const int cycleIndel = CycleCovariate::CycleKey(sd, i, true, CycleCovariate::MAXIMUM_CYCLE_VALUE); \ + CovariateUtils::SetAllCovs(crg, quals[i], nBasePairContextAtEachCycle[vi], cycleM, crg, (ins), indelKeys[vi], cycleIndel, crg, (del), \ + indelKeys[vi], cycleIndel, i, values); \ + } \ + } while (0) + +#define __bq_set_all_cov(ins, del) \ + do { \ + for (int i = 0; i < sd.read_len; ++i) { \ + const int vi = negativeStrand ? sd.read_len - 1 - i : i; \ + const int cycleM = CycleCovariate::CycleKey(sd, i, false, CycleCovariate::MAXIMUM_CYCLE_VALUE); \ + CovariateUtils::SetAllCovs(crg, quals[i], nBasePairContextAtEachCycle[vi], cycleM, crg, (ins), 0, 0, crg, (del), 0, 0, i, values); \ + } \ + } while (0) + + // 1. read group + uint8_t* rgStr = bam_aux_get(sd.bw->b, "RG"); + char* rgVal = nullptr; + if (rgStr) rgVal = bam_aux2Z(rgStr); + int crg = 0; + if (rgVal == nullptr || ReadGroupCovariate::RgToId.find(rgVal) == ReadGroupCovariate::RgToId.end()) { + spdlog::error("The RG tag value for read can not be found in header!"); + } else { + crg = ReadGroupCovariate::RgToId[rgVal]; + } + + // 2. base quality + // 在前面的处理过后,quals应该和base长度一致了 +#define __bq_set_cov(ins, del) \ + do { \ + for (int i = 0; i < sd.read_len; ++i) { \ + CovariateUtils::SetBaseQual(quals[i], (ins), (del), i, values); \ + } \ + } while (0) + + const int readLength = sd.read_len; + const bool negativeStrand = sd.bw->GetReadNegativeStrandFlag(); + const int INDEL_QUAL = 45; + auto& quals = sd.base_quals; + auto& strandedClippedBases = sd.strandedClippedBases; + strandedClippedBases.copy(sd.bases); + ContextCovariate::GetStrandedClippedBytes(sd, strandedClippedBases, ContextCovariate::lowQualTail); + auto& nBasePairContextAtEachCycle = sd.nBasePairContextAtEachCycle; + ContextCovariate::GetReadContextAtEachPosition(strandedClippedBases, ContextCovariate::mismatchesContextSize, ContextCovariate::mismatchesKeyMask, + nBasePairContextAtEachCycle); + if (recordIndelValues) { + // context + auto& indelKeys = sd.indelKeys; + ContextCovariate::GetReadContextAtEachPosition(strandedClippedBases, ContextCovariate::indelsContextSize, ContextCovariate::indelsKeyMask, + indelKeys); + // cycle cov is in the loop + + // quality + uint8_t* insQualPtr = bam_aux_get(sd.bw->b, "BI"); // base qualities for insertions + uint8_t* delQualPtr = bam_aux_get(sd.bw->b, "BD"); // base qualities for deletions + if (insQualPtr == nullptr && delQualPtr == nullptr) { + __bq_set_all_cov_indel(INDEL_QUAL, INDEL_QUAL); + } else if (insQualPtr == nullptr) { + uint8_t* delQuals = (uint8_t*)bam_aux2Z(delQualPtr) + sd.left_clip; + __bq_set_all_cov_indel(INDEL_QUAL, delQuals[i]); + } else { + uint8_t* insQuals = (uint8_t*)bam_aux2Z(insQualPtr) + sd.left_clip; + __bq_set_all_cov_indel(insQuals[i], INDEL_QUAL); + } + } else { + __bq_set_all_cov(0, 0); + } + +} +*/ + // ReadGroupCovariate 协变量的方法 void ReadGroupCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues) { uint8_t *rgStr = bam_aux_get(sd.bw->b, "RG"); @@ -99,7 +178,7 @@ static char SimpleComplement(const char base) { } } -static void ClipLowQualEndsWithN(string& bases, const StableArray &quals, uint8_t lowQTail, bool isNegativeStrand) { +static void ClipLowQualEndsWithN(StableArray& bases, const StableArray& quals, uint8_t lowQTail, bool isNegativeStrand) { // 处理左边 int left = 0; int readLen = bases.size(); @@ -118,10 +197,10 @@ static void ClipLowQualEndsWithN(string& bases, const StableArray &qual break; } } - if (left == readLen) { - bases.clear(); - return; - } + //if (left == readLen) { + // bases.clear(); + // return; + //} // 处理右边 int right = (int)bases.size() - 1; if (isNegativeStrand) { @@ -139,13 +218,12 @@ static void ClipLowQualEndsWithN(string& bases, const StableArray &qual break; } } - if (right < left) - bases.clear(); - + // if (right < left) + // bases.clear(); } // 获取去除低质量分数碱基之后的read碱基序列(将低质量分数的碱基变成N) -void ContextCovariate::GetStrandedClippedBytes(SamData& sd, string& clippedBases, uint8_t lowQTail) { +void ContextCovariate::GetStrandedClippedBytes(SamData& sd, StableArray& clippedBases, uint8_t lowQTail) { if (sd.bw->GetReadNegativeStrandFlag()) { // 反向互补 for (int i = 0; i < sd.read_len; ++i) clippedBases[i] = SimpleComplement(sd.bases[sd.read_len - 1 - i]); } @@ -160,7 +238,7 @@ void ContextCovariate::GetStrandedClippedBytes(SamData& sd, string& clippedBases * @param end the end position in the array (exclusive) * @return the key representing the dna sequence */ -int ContextCovariate::KeyFromContext(const string& dna, const int start, const int end) { +int ContextCovariate::KeyFromContext(const StableArray& dna, const int start, const int end) { int key = end - start; int bitOffset = LENGTH_BITS; for (int i = start; i < end; i++) { @@ -188,7 +266,7 @@ int ContextCovariate::KeyFromContext(const string& dna, const int start, const i * @return a list that has the same length as the read and contains the (preceding) n-base context at each position. * */ -void ContextCovariate::GetReadContextAtEachPosition(const string& bases, const int contextSize, const int mask, vector& keys) { +void ContextCovariate::GetReadContextAtEachPosition(const StableArray& bases, const int contextSize, const int mask, StableArray& keys) { int readLength = bases.size(); keys.resize(readLength); int keyIdx = 0; @@ -245,11 +323,13 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar const int originalReadLength = sd.read_len; // store the original bases and then write Ns over low quality ones - string strandedClippedBases(sd.bases.arr.data(), sd.read_len); + // string strandedClippedBases(sd.bases.arr.data(), sd.read_len); + auto &strandedClippedBases = sd.strandedClippedBases; + strandedClippedBases.copy(sd.bases); // GetStrandedClippedBytes(bw, sd, strandedClippedBases, 30); // 注意这里的lowQualTail数值 GetStrandedClippedBytes(sd, strandedClippedBases, lowQualTail); // 命名我之前看到过这个30的? // spdlog::info("bases: {}", strandedClippedBases); - vector nBasePairContextAtEachCycle; + auto& nBasePairContextAtEachCycle = sd.nBasePairContextAtEachCycle; GetReadContextAtEachPosition(strandedClippedBases, mismatchesContextSize, mismatchesKeyMask, nBasePairContextAtEachCycle); const int readLengthAfterClipping = strandedClippedBases.size(); @@ -268,7 +348,7 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar const bool negativeStrand = sd.bw->GetReadNegativeStrandFlag(); // Note: duplicated the loop to avoid checking recordIndelValues on each iteration if (recordIndelValues) { - vector indelKeys; + auto& indelKeys = sd.indelKeys; GetReadContextAtEachPosition(strandedClippedBases, indelsContextSize, indelsKeyMask, indelKeys); for (int i = 0; i < readLengthAfterClipping; i++) { const int readOffset = GetStrandedOffset(negativeStrand, i, readLengthAfterClipping); diff --git a/src/bqsr/covariate.h b/src/bqsr/covariate.h index 408436a..f0f227a 100644 --- a/src/bqsr/covariate.h +++ b/src/bqsr/covariate.h @@ -199,11 +199,11 @@ struct ContextCovariate { } // 获取去除低质量分数碱基之后的read碱基序列(将低质量分数的碱基变成N) - static void GetStrandedClippedBytes(SamData& ad, string& clippedBases, uint8_t lowQTail); + static void GetStrandedClippedBytes(SamData& ad, StableArray& clippedBases, uint8_t lowQTail); // Creates a int representation of a given dna string. - static int KeyFromContext(const string& dna, const int start, const int end); + static int KeyFromContext(const StableArray& dna, const int start, const int end); // For each position of the read, calculate the n-base-pair *read* base context (as opposed to the reference context). - static void GetReadContextAtEachPosition(const string& bases, const int contextSize, const int mask, vector& keys); + static void GetReadContextAtEachPosition(const StableArray& bases, const int contextSize, const int mask, StableArray& keys); // 设置协变量的值 static void RecordValues(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues); @@ -296,6 +296,25 @@ struct CovariateUtils { matrix[EventType::BASE_DELETION.index][readOffset].cycle = deletion; } + static inline void SetAllCovs(int m1, int i1, int d1, int m2, int i2, int d2, int m3, int i3, int d3, int m4, int i4, int d4, int readOffset, + PerReadCovariateMatrix& matrix) { + CovariateValues *cvp = &matrix[EventType::BASE_SUBSTITUTION.index][readOffset]; + cvp->readGroup = m1; + cvp->baseQuality = m2; + cvp->context = m3; + cvp->cycle = m4; + cvp = &matrix[EventType::BASE_INSERTION.index][readOffset]; + cvp->readGroup = i1; + cvp->baseQuality = i2; + cvp->context = i3; + cvp->cycle = i4; + cvp = &matrix[EventType::BASE_DELETION.index][readOffset]; + cvp->readGroup = d1; + cvp->baseQuality = d2; + cvp->context = d3; + cvp->cycle = d4; + } + // 对一条read计算协变量(该协变量被上一个read用过) static void ComputeCovariates(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues); }; \ No newline at end of file diff --git a/src/util/sam_data.h b/src/util/sam_data.h index a260ee1..eb10ae8 100644 --- a/src/util/sam_data.h +++ b/src/util/sam_data.h @@ -56,6 +56,11 @@ struct SamData { StableArray del_quals; // delete质量分数, BD (大部分应该都没有) StableArray cigars; + // 用作临时buffer + StableArray strandedClippedBases; // for context covariate + StableArray nBasePairContextAtEachCycle; // for context covariate + StableArray indelKeys; // for context covariate + int64_t& softStart() { return start_pos; } int64_t& softEnd() { return end_pos; } diff --git a/src/util/stable_array.h b/src/util/stable_array.h index 0d0aeb1..2c8bc75 100644 --- a/src/util/stable_array.h +++ b/src/util/stable_array.h @@ -19,7 +19,7 @@ struct StableArray { vector arr; size_t idx = 0; void clear() { idx = 0; } - size_t size() { return idx; } + size_t size() const { return idx; } bool empty() { return idx == 0; } void reserve(size_t _size) { if (arr.size() < _size) @@ -49,6 +49,12 @@ struct StableArray { idx++; } } + void copy(const StableArray& other) { + resize(other.size()); + for (size_t i = 0; i < other.size(); ++i) { + arr[i] = other.arr[i]; + } + } inline T& operator[](size_t pos) { return arr[pos]; } //inline const T& operator[](size_t pos) { return arr[pos]; } inline const T& operator[](size_t pos) const { return arr[pos]; }