对5.1G的数据,串行结果正确了,并行结果还没测试
This commit is contained in:
parent
81cbd6831c
commit
65878bbf96
|
|
@ -189,22 +189,24 @@ int SerialBQSR(AuxVar &aux) {
|
|||
sd.applyTransformations();
|
||||
PROF_END(gprof[GP_clip_read], clip_read);
|
||||
|
||||
const char* qname = bam_get_qname(sd.bw->b);
|
||||
//const char* qname = bam_get_qname(sd.bw->b);
|
||||
// fprintf(gf[4], "%ld %d %d %d\n", sd.rid, sd.read_len, 1 + BamWrap::bam_pos(sd.start_pos), 1 + BamWrap::bam_pos(sd.end_pos));
|
||||
// fprintf(gf[4], "%s %d %d %d %d\n", qname, sd.bw->b->core.flag, sd.read_len, 1 + BamWrap::bam_pos(sd.start_pos), 1 + BamWrap::bam_pos(sd.end_pos));
|
||||
//fprintf(gf[4], "%s %d %d %d %d\n", qname, sd.bw->b->core.flag, sd.read_len, 1 + BamWrap::bam_pos(sd.start_pos), 1 + BamWrap::bam_pos(sd.end_pos));
|
||||
|
||||
// 6. 更新每个read的platform信息,好像没啥用,暂时忽略
|
||||
// 这里计算snp和indel有点问题,snp和del结果不对,白天调试一下
|
||||
const int nErrors = RecalFuncs::calculateIsSNPOrIndel(aux, sd, isSNP, isIns, isDel);
|
||||
|
||||
// fprintf(gf[0], "%d\t", sd.read_len);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[0], "%d ", isSNP[ii]);
|
||||
// fprintf(gf[0], "\n");
|
||||
// fprintf(gf[1], "%d\t", sd.read_len);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[1], "%d ", isIns[ii]);
|
||||
// fprintf(gf[1], "\n");
|
||||
// fprintf(gf[2], "%d\t", sd.read_len);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[2], "%d ", isDel[ii]);
|
||||
// fprintf(gf[2], "\n");
|
||||
// fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[4], "%d ", skips[ii] ? 1 : 0);
|
||||
// fprintf(gf[4], "\n");
|
||||
|
||||
// fprintf(gf[0], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[0], "%d ", isSNP[ii]); fprintf(gf[0], "\n");
|
||||
// fprintf(gf[1], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[1], "%d ", isIns[ii]); fprintf(gf[1], "\n");
|
||||
// fprintf(gf[2], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[2], "%d ", isDel[ii]); fprintf(gf[2], "\n");
|
||||
|
||||
// 7. 计算baqArray
|
||||
// BAQ = base alignment quality
|
||||
|
|
@ -257,9 +259,9 @@ int SerialBQSR(AuxVar &aux) {
|
|||
//spdlog::info("{}", ss.str());
|
||||
PROF_GP_END(read_vcf);
|
||||
|
||||
// fprintf(gf[4], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[4], "%d ", skips[ii] ? 1 : 0);
|
||||
// fprintf(gf[4], "\n");
|
||||
// fprintf(gf[3], "%s %d %ld ", bam_get_qname(sd.bw->b), sd.bw->b->core.flag, sd.rid);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[3], "%d ", skips[ii] ? 1 : 0);
|
||||
// fprintf(gf[3], "\n");
|
||||
|
||||
// fprintf(gf[0], "%ld %d\t", sd.rid, sd.read_len);
|
||||
// for (int ii = 0; ii < sd.read_len; ++ii) fprintf(gf[0], "%d ", skips[ii] ? 1 : 0);
|
||||
|
|
@ -290,7 +292,7 @@ int SerialBQSR(AuxVar &aux) {
|
|||
collapseQualityScoreTableToReadGroupTable(recalTables.readGroupTable, recalTables.qualityScoreTable);
|
||||
roundTableValues(recalTables);
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
printRecalTables(recalTables);
|
||||
#endif
|
||||
|
||||
|
|
@ -304,8 +306,8 @@ int SerialBQSR(AuxVar &aux) {
|
|||
}
|
||||
|
||||
// 多线程处理bam数据, tmd是乱序的?
|
||||
// static void thread_worker(void* data, long idx, int tid, int steal) {
|
||||
static void thread_worker(void* data, long idx, int tid) {
|
||||
static void thread_worker(void* data, long idx, int tid, int steal) {
|
||||
// static void thread_worker(void* data, long idx, int tid) {
|
||||
AuxVar& aux = (*(vector<AuxVar>*)data)[tid];
|
||||
auto& readCovariates = aux.readCovariates;
|
||||
RecalTables& recalTables = aux.recalTables;
|
||||
|
|
@ -315,7 +317,7 @@ static void thread_worker(void* data, long idx, int tid) {
|
|||
StableArray<double>&snpErrors = aux.snpErrors, &insErrors = aux.insErrors, &delErrors = aux.delErrors;
|
||||
StableArray<uint8_t>& skips = aux.skips; // 该位置是否是已知位点
|
||||
auto &bams = *aux.bamArr;
|
||||
// if (steal) for (auto& vcf : aux.vcfArr) vcf.knownSites.clear();
|
||||
if (steal) for (auto& vcf : aux.vcfArr) vcf.knownSites.clear();
|
||||
#if 1
|
||||
int startIdx = idx * aux.BAM_BLOCK_NUM;
|
||||
int stopIdx = std::min((size_t)(idx + 1) * aux.BAM_BLOCK_NUM, bams.size());
|
||||
|
|
@ -405,7 +407,7 @@ int ParallelBQSR(vector<AuxVar>& auxArr) {
|
|||
spdlog::info("{} reads processed in {} round", readNum, round);
|
||||
|
||||
#if 1
|
||||
kt_for_no_steal(auxArr.size(), thread_worker, &auxArr, (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM);
|
||||
kt_for_steal(auxArr.size(), thread_worker, &auxArr, (readNum + AuxVar::BAM_BLOCK_NUM - 1) / AuxVar::BAM_BLOCK_NUM);
|
||||
#else
|
||||
kt_for_steal(auxArr.size(), thread_worker, &auxArr, auxArr.size());
|
||||
#endif
|
||||
|
|
@ -514,6 +516,7 @@ static void globalInit() {
|
|||
RecalDatum::StaticInit();
|
||||
QualityUtils::StaticInit();
|
||||
MathUtils::StaticInit();
|
||||
BaseUtils::StaticInit();
|
||||
|
||||
// 初始化需要计算的event types
|
||||
RecalUtils::initEventTypes(nsgv::gBqsrArg.computeIndelBQSRTables);
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ struct RecalFuncs {
|
|||
PROF_START(read_ref);
|
||||
read_ref_base(aux, interval.left, interval);
|
||||
PROF_GP_END(read_ref);
|
||||
string refBases(aux.ref_seq);
|
||||
const char *refBases = aux.ref_seq;
|
||||
|
||||
// 2. 遍历cigar,计算每个碱基是否是SNP或Indel
|
||||
PROF_START(calc_snp);
|
||||
|
|
@ -48,7 +48,7 @@ struct RecalFuncs {
|
|||
if (c == 'M' || c == '=' || c == 'X') {
|
||||
for (int j = 0; j < len; ++j) {
|
||||
// 按位置将read和ref碱基进行比较,不同则是snp,注意read起始位置要加上left_clip
|
||||
int snpInt = sd.bases[readPos] == refBases[refPos] ? 0 : 1;
|
||||
int snpInt = BaseUtils::basesAreEqual(sd.bases[readPos], refBases[refPos]) ? 0 : 1;
|
||||
isSNP[readPos] = snpInt;
|
||||
nEvents += snpInt;
|
||||
readPos++;
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
#include "base_utils.h"
|
||||
|
||||
const char BaseUtils::cBaseToChar[16] = {'N', 'A', 'C', 'N', 'G', 'N', 'N', 'N', 'T', 'N', 'N', 'N', 'N', 'N', 'N', 'N'};
|
||||
int BaseUtils::baseIndexMap[256];
|
||||
|
|
@ -8,7 +8,28 @@
|
|||
*/
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
|
||||
struct BaseUtils {
|
||||
|
||||
static void StaticInit() {
|
||||
// init baseIndexMap
|
||||
for (int i = 0; i < 256; ++i) {
|
||||
baseIndexMap[i] = -1;
|
||||
}
|
||||
baseIndexMap['A'] = 0;
|
||||
baseIndexMap['a'] = 0;
|
||||
baseIndexMap['*'] = 0;
|
||||
baseIndexMap['C'] = 1;
|
||||
baseIndexMap['c'] = 1;
|
||||
baseIndexMap['G'] = 2;
|
||||
baseIndexMap['g'] = 2;
|
||||
baseIndexMap['T'] = 3;
|
||||
baseIndexMap['t'] = 3;
|
||||
}
|
||||
|
||||
static int baseIndexMap[256];
|
||||
|
||||
// uint8_t转碱基字符
|
||||
static const char cBaseToChar[16];
|
||||
|
||||
|
|
@ -17,4 +38,19 @@ struct BaseUtils {
|
|||
|
||||
// 该操作符是否消耗参考基因组的碱基
|
||||
static bool consumeRefBases(char cigar) { return cigar == 'M' || cigar == '=' || cigar == 'X' || cigar == 'D' || cigar == 'N'; }
|
||||
|
||||
/**
|
||||
* Converts a simple base to a base index
|
||||
*
|
||||
* @param base [AaCcGgTt]
|
||||
* @return 0, 1, 2, 3, or -1 if the base can't be understood
|
||||
*/
|
||||
static inline int simpleBaseToBaseIndex(const char base) {
|
||||
// we want to make sure we treat the base as an unsigned byte....so that we can access this array with it.
|
||||
return baseIndexMap[(uint8_t)base];
|
||||
}
|
||||
|
||||
static bool basesAreEqual(const char base1, const char base2) {
|
||||
return simpleBaseToBaseIndex(base1) == simpleBaseToBaseIndex(base2);
|
||||
}
|
||||
};
|
||||
Loading…
Reference in New Issue