修改多维数组实现方式,更有利于连续访存,修改reclatable的索引次序
This commit is contained in:
parent
b526306e87
commit
3815a67618
|
|
@ -85,9 +85,7 @@ void collapseQualityScoreTableToReadGroupTable(Array2D<RecalDatum> &byReadGroupT
|
||||||
// 遍历quality table
|
// 遍历quality table
|
||||||
_Foreach3DK(byQualTable, qualDatum, {
|
_Foreach3DK(byQualTable, qualDatum, {
|
||||||
if (qualDatum.numObservations > 0) {
|
if (qualDatum.numObservations > 0) {
|
||||||
int rgKey = k1;
|
byReadGroupTable(k1, k2).combine(qualDatum);
|
||||||
int eventIndex = k3;
|
|
||||||
byReadGroupTable[rgKey][eventIndex].combine(qualDatum);
|
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ int CycleCovariate::MAXIMUM_CYCLE_VALUE;
|
||||||
// 对一条read计算协变量(该协变量被上一个read用过)
|
// 对一条read计算协变量(该协变量被上一个read用过)
|
||||||
void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values,
|
void CovariateUtils::ComputeCovariates(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values,
|
||||||
bool recordIndelValues) {
|
bool recordIndelValues) {
|
||||||
// ReadGroupCovariate::RecordValues(sd, header, values, recordIndelValues);
|
ReadGroupCovariate::RecordValues(sd, header, values, recordIndelValues);
|
||||||
BaseQualityCovariate::RecordValues(sd, header, values, recordIndelValues);
|
BaseQualityCovariate::RecordValues(sd, header, values, recordIndelValues);
|
||||||
ContextCovariate::RecordValues(sd, header, values, recordIndelValues);
|
ContextCovariate::RecordValues(sd, header, values, recordIndelValues);
|
||||||
CycleCovariate::RecordValues(sd, header, values, recordIndelValues);
|
CycleCovariate::RecordValues(sd, header, values, recordIndelValues);
|
||||||
|
|
@ -43,7 +43,7 @@ void ReadGroupCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCov
|
||||||
key = RgToId[rgVal];
|
key = RgToId[rgVal];
|
||||||
}
|
}
|
||||||
for (int i = 0; i < sd.read_len; ++i) {
|
for (int i = 0; i < sd.read_len; ++i) {
|
||||||
CovariateUtils::SetCovariate(key, key, key, i, ReadGroupCovariate::index, values);
|
CovariateUtils::SetReadGroup(key, key, key, i, values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -51,11 +51,11 @@ void ReadGroupCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCov
|
||||||
void BaseQualityCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values,
|
void BaseQualityCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovariateMatrix& values,
|
||||||
bool recordIndelValues) {
|
bool recordIndelValues) {
|
||||||
// 在前面的处理过后,quals应该和base长度一致了
|
// 在前面的处理过后,quals应该和base长度一致了
|
||||||
#define __bq_set_cov(ins, del) \
|
#define __bq_set_cov(ins, del) \
|
||||||
do { \
|
do { \
|
||||||
for (int i = 0; i < sd.read_len; ++i) { \
|
for (int i = 0; i < sd.read_len; ++i) { \
|
||||||
CovariateUtils::SetCovariate(quals[i], (ins), (del), i, BaseQualityCovariate::index, values); \
|
CovariateUtils::SetBaseQual(quals[i], (ins), (del), i, values); \
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
const int INDEL_QUAL = 45;
|
const int INDEL_QUAL = 45;
|
||||||
|
|
@ -261,7 +261,7 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar
|
||||||
// don't bother zeroing out if we are going to overwrite the whole array
|
// don't bother zeroing out if we are going to overwrite the whole array
|
||||||
for (int i = 0; i < originalReadLength; i++) {
|
for (int i = 0; i < originalReadLength; i++) {
|
||||||
// this base has been clipped off, so zero out the covariate values here
|
// this base has been clipped off, so zero out the covariate values here
|
||||||
CovariateUtils::SetCovariate(0, 0, 0, i, ContextCovariate::index, values);
|
CovariateUtils::SetContext(0, 0, 0, i, values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -273,12 +273,12 @@ void ContextCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovar
|
||||||
for (int i = 0; i < readLengthAfterClipping; i++) {
|
for (int i = 0; i < readLengthAfterClipping; i++) {
|
||||||
const int readOffset = GetStrandedOffset(negativeStrand, i, readLengthAfterClipping);
|
const int readOffset = GetStrandedOffset(negativeStrand, i, readLengthAfterClipping);
|
||||||
const int indelKey = indelKeys[i];
|
const int indelKey = indelKeys[i];
|
||||||
CovariateUtils::SetCovariate(nBasePairContextAtEachCycle[i], indelKey, indelKey, readOffset, ContextCovariate::index, values);
|
CovariateUtils::SetContext(nBasePairContextAtEachCycle[i], indelKey, indelKey, readOffset, values);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < readLengthAfterClipping; i++) {
|
for (int i = 0; i < readLengthAfterClipping; i++) {
|
||||||
const int readOffset = GetStrandedOffset(negativeStrand, i, readLengthAfterClipping);
|
const int readOffset = GetStrandedOffset(negativeStrand, i, readLengthAfterClipping);
|
||||||
CovariateUtils::SetCovariate(nBasePairContextAtEachCycle[i], 0, 0, readOffset, ContextCovariate::index, values);
|
CovariateUtils::SetContext(nBasePairContextAtEachCycle[i], 0, 0, readOffset, values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -330,12 +330,12 @@ void CycleCovariate::RecordValues(SamData& sd, sam_hdr_t* header, PerReadCovaria
|
||||||
for (int i = 0; i < readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
const int substitutionKey = CycleKey(sd, i, false, MAXIMUM_CYCLE_VALUE);
|
const int substitutionKey = CycleKey(sd, i, false, MAXIMUM_CYCLE_VALUE);
|
||||||
const int indelKey = CycleKey(sd, i, true, MAXIMUM_CYCLE_VALUE);
|
const int indelKey = CycleKey(sd, i, true, MAXIMUM_CYCLE_VALUE);
|
||||||
CovariateUtils::SetCovariate(substitutionKey, indelKey, indelKey, i, CycleCovariate::index, values);
|
CovariateUtils::SetCycle(substitutionKey, indelKey, indelKey, i, values);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
for (int i = 0; i < readLength; i++) {
|
for (int i = 0; i < readLength; i++) {
|
||||||
const int substitutionKey = CycleKey(sd, i, false, MAXIMUM_CYCLE_VALUE);
|
const int substitutionKey = CycleKey(sd, i, false, MAXIMUM_CYCLE_VALUE);
|
||||||
CovariateUtils::SetCovariate(substitutionKey, 0, 0, i, CycleCovariate::index, values);
|
CovariateUtils::SetCycle(substitutionKey, 0, 0, i, values);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -25,12 +25,27 @@ using std::map;
|
||||||
using std::string;
|
using std::string;
|
||||||
using std::vector;
|
using std::vector;
|
||||||
|
|
||||||
|
// 协变量的值, 4个协变量
|
||||||
|
struct CovariateValues {
|
||||||
|
int readGroup = 0;
|
||||||
|
int baseQuality = 0;
|
||||||
|
int context = -1;
|
||||||
|
int cycle = -1;
|
||||||
|
void clear() {
|
||||||
|
readGroup = 0;
|
||||||
|
baseQuality = 0;
|
||||||
|
context = -1;
|
||||||
|
cycle = -1;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This is where we store the per-read covariates, also indexed by (event type) and (read position).
|
* This is where we store the per-read covariates, also indexed by (event type) and (read position).
|
||||||
* Thus the array has shape { event type } x { read position (aka cycle) } x { covariate }.
|
* Thus the array has shape { event type } x { read position (aka cycle) } x { covariate }.
|
||||||
* For instance, { covariate } is by default 4-dimensional (read group, base quality, context, cycle).
|
* For instance, { covariate } is by default 4-dimensional (read group, base quality, context, cycle).
|
||||||
*/
|
*/
|
||||||
typedef vector<vector<vector<int>>> PerReadCovariateMatrix;
|
// 三维数组,第一维是event type,第二维是read position,第三维是协变量数组(结构体)(base quality, context, cycle)
|
||||||
|
typedef vector<vector<CovariateValues>> PerReadCovariateMatrix;
|
||||||
|
|
||||||
// 变异类型(snp, insert, deletion)
|
// 变异类型(snp, insert, deletion)
|
||||||
struct EventTypeValue {
|
struct EventTypeValue {
|
||||||
|
|
@ -48,33 +63,6 @@ struct EventType {
|
||||||
static vector<EventTypeValue> EVENTS;
|
static vector<EventTypeValue> EVENTS;
|
||||||
};
|
};
|
||||||
|
|
||||||
// 协变量相关的工具类
|
|
||||||
struct CovariateUtils {
|
|
||||||
static constexpr int MAX_READ_LENGTH = 300; // 最大read长度
|
|
||||||
static constexpr int NUM_COVARIATES = 4;
|
|
||||||
|
|
||||||
// 初始化PerReadCovariateMatrix
|
|
||||||
static void InitPerReadCovMat(PerReadCovariateMatrix& matrix) {
|
|
||||||
matrix.resize(EventType::EVENT_SIZE);
|
|
||||||
for (int event_type = 0; event_type < EventType::EVENT_SIZE; ++event_type) {
|
|
||||||
matrix[event_type].resize(MAX_READ_LENGTH);
|
|
||||||
for (int pos = 0; pos < MAX_READ_LENGTH; ++pos) {
|
|
||||||
matrix[event_type][pos].resize(NUM_COVARIATES, 0);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 设置协变量
|
|
||||||
static void SetCovariate(int mismatch, int insertion, int deletion, int readOffset, int covIndex, PerReadCovariateMatrix& matrix) {
|
|
||||||
matrix[EventType::BASE_SUBSTITUTION.index][readOffset][covIndex] = mismatch;
|
|
||||||
matrix[EventType::BASE_INSERTION.index][readOffset][covIndex] = insertion;
|
|
||||||
matrix[EventType::BASE_DELETION.index][readOffset][covIndex] = deletion;
|
|
||||||
}
|
|
||||||
|
|
||||||
// 对一条read计算协变量(该协变量被上一个read用过)
|
|
||||||
static void ComputeCovariates(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues);
|
|
||||||
};
|
|
||||||
|
|
||||||
// Read group协变量
|
// Read group协变量
|
||||||
struct ReadGroupCovariate {
|
struct ReadGroupCovariate {
|
||||||
static constexpr int index = 0; // 在协变量数组中的索引位置
|
static constexpr int index = 0; // 在协变量数组中的索引位置
|
||||||
|
|
@ -270,8 +258,44 @@ struct CycleCovariate {
|
||||||
static void RecordValues(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues);
|
static void RecordValues(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues);
|
||||||
};
|
};
|
||||||
|
|
||||||
// 好像不需要
|
// 协变量相关的工具类
|
||||||
struct StandardCovariateList {
|
struct CovariateUtils {
|
||||||
ReadGroupCovariate readGroupCovariate;
|
static constexpr int MAX_READ_LENGTH = 300; // 最大read长度
|
||||||
BaseQualityCovariate qualityScoreCovariate;
|
static constexpr int NUM_COVARIATES = 4;
|
||||||
|
|
||||||
|
// 初始化PerReadCovariateMatrix
|
||||||
|
static void InitPerReadCovMat(PerReadCovariateMatrix& matrix) {
|
||||||
|
matrix.resize(EventType::EVENT_SIZE);
|
||||||
|
for (int event_type = 0; event_type < EventType::EVENT_SIZE; ++event_type) {
|
||||||
|
matrix[event_type].resize(MAX_READ_LENGTH);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 设置协变量
|
||||||
|
static inline void SetReadGroup(int mismatch, int insertion, int deletion, int readOffset, PerReadCovariateMatrix& matrix) {
|
||||||
|
matrix[EventType::BASE_SUBSTITUTION.index][readOffset].readGroup = mismatch;
|
||||||
|
matrix[EventType::BASE_INSERTION.index][readOffset].readGroup = insertion;
|
||||||
|
matrix[EventType::BASE_DELETION.index][readOffset].readGroup = deletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void SetBaseQual(int mismatch, int insertion, int deletion, int readOffset, PerReadCovariateMatrix& matrix) {
|
||||||
|
matrix[EventType::BASE_SUBSTITUTION.index][readOffset].baseQuality = mismatch;
|
||||||
|
matrix[EventType::BASE_INSERTION.index][readOffset].baseQuality = insertion;
|
||||||
|
matrix[EventType::BASE_DELETION.index][readOffset].baseQuality = deletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void SetContext(int mismatch, int insertion, int deletion, int readOffset, PerReadCovariateMatrix& matrix) {
|
||||||
|
matrix[EventType::BASE_SUBSTITUTION.index][readOffset].context = mismatch;
|
||||||
|
matrix[EventType::BASE_INSERTION.index][readOffset].context = insertion;
|
||||||
|
matrix[EventType::BASE_DELETION.index][readOffset].context = deletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void SetCycle(int mismatch, int insertion, int deletion, int readOffset, PerReadCovariateMatrix& matrix) {
|
||||||
|
matrix[EventType::BASE_SUBSTITUTION.index][readOffset].cycle = mismatch;
|
||||||
|
matrix[EventType::BASE_INSERTION.index][readOffset].cycle = insertion;
|
||||||
|
matrix[EventType::BASE_DELETION.index][readOffset].cycle = deletion;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 对一条read计算协变量(该协变量被上一个read用过)
|
||||||
|
static void ComputeCovariates(SamData& ad, sam_hdr_t* header, PerReadCovariateMatrix& values, bool recordIndelValues);
|
||||||
};
|
};
|
||||||
|
|
@ -16,129 +16,102 @@ using std::vector;
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
struct Array2D {
|
struct Array2D {
|
||||||
vector<vector<T>> data;
|
vector<T> data;
|
||||||
Array2D() { }
|
int d1 = 0, d2 = 0;
|
||||||
|
int s1 = 0, cap = 0; // strides for each dimension
|
||||||
|
Array2D() {}
|
||||||
Array2D(int dim1, int dim2) { init(dim1, dim2); }
|
Array2D(int dim1, int dim2) { init(dim1, dim2); }
|
||||||
void init(int dim1, int dim2) { data.resize(dim1); for (auto& v : data) v.resize(dim2); }
|
void init(int dim1, int dim2) { d1 = dim1; d2 = dim2; s1 = dim2; cap = d1 * d2; data.resize(cap); }
|
||||||
inline T& get(int k1, int k2) { return data[k1][k2]; }
|
inline T& operator()(int k1, int k2) { return data[k1 * s1 + k2]; }
|
||||||
// 根据关键字,在对应位置插入数据
|
|
||||||
inline void put(const T& value, int k1, int k2) { data[k1][k2] = value; }
|
|
||||||
inline vector<T>& operator[](size_t idx) { return data[idx]; }
|
|
||||||
inline const vector<T>& operator[](size_t idx) const { return data[idx]; }
|
|
||||||
#define _Foreach2D(array, valName, codes) \
|
#define _Foreach2D(array, valName, codes) \
|
||||||
for (auto& arr1 : array.data) { \
|
for (auto& valName : array.data) { \
|
||||||
for (auto& valName : arr1) { \
|
codes; \
|
||||||
codes; \
|
|
||||||
} \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _Foreach2DK(array, valName, codes) \
|
#define _Foreach2DK(array, valName, codes) \
|
||||||
do { \
|
do { \
|
||||||
int k1 = 0; \
|
int k1 = 0, k2 = 0, kg = 0; \
|
||||||
for (auto& arr1 : array.data) { \
|
for (auto& valName : array.data) { \
|
||||||
int k2 = 0; \
|
codes; \
|
||||||
for (auto& valName : arr1) { \
|
++kg; \
|
||||||
codes; \
|
k1 = kg / (array.s1); \
|
||||||
++k2; \
|
k2 = kg % (array.s1); \
|
||||||
} \
|
|
||||||
++k1; \
|
|
||||||
} \
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// 不能是bool
|
||||||
template <class T>
|
template <class T>
|
||||||
struct Array3D {
|
struct Array3D {
|
||||||
vector<vector<vector<T>>> data;
|
vector<T> data;
|
||||||
|
int d1 = 0, d2 = 0, d3 = 0;
|
||||||
|
int s1 = 0, s2 = 0, cap = 0; // strides for each dimension
|
||||||
Array3D() {}
|
Array3D() {}
|
||||||
Array3D(int dim1, int dim2, int dim3) { init(dim1, dim2, dim3); }
|
Array3D(int dim1, int dim2, int dim3) { init(dim1, dim2, dim3); }
|
||||||
void init(int dim1, int dim2, int dim3) {
|
void init(int dim1, int dim2, int dim3) {
|
||||||
data.resize(dim1);
|
d1 = dim1; d2 = dim2; d3 = dim3;
|
||||||
for (auto& v : data) v.resize(dim2);
|
s1 = dim2 * dim3; s2 = dim3; cap = d1 * d2 * d3;
|
||||||
for (auto& v1 : data)
|
data.resize(cap);
|
||||||
for (auto& v2 : v1) v2.resize(dim3);
|
|
||||||
}
|
}
|
||||||
inline T& get(int k1, int k2, int k3) { return data[k1][k2][k3]; }
|
inline T& operator()(int k1, int k2, int k3) {
|
||||||
// 根据关键字,在对应位置插入数据
|
return data[k1 * s1 + k2 * s2 + k3];
|
||||||
inline void put(const T& value, int k1, int k2, int k3) { data[k1][k2][k3] = value; }
|
|
||||||
inline vector<vector<T>>& operator[](size_t idx) { return data[idx]; }
|
|
||||||
#define _Foreach3D(array, valName, codes) \
|
|
||||||
for (auto& arr1 : array.data) { \
|
|
||||||
for (auto& arr2 : arr1) { \
|
|
||||||
for (auto& valName : arr2) { \
|
|
||||||
codes; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _Foreach3DK(array, valName, codes) \
|
#define _Foreach3D(array, valName, codes) \
|
||||||
do { \
|
for (auto& valName : array.data) { \
|
||||||
int k1 = 0; \
|
codes; \
|
||||||
for (auto& arr1 : array.data) { \
|
}
|
||||||
int k2 = 0; \
|
|
||||||
for (auto& arr2 : arr1) { \
|
#define _Foreach3DK(array, valName, codes) \
|
||||||
int k3 = 0; \
|
do { \
|
||||||
for (auto& valName : arr2) { \
|
int k1 = 0, k2 = 0, k3 = 0, kg = 0; \
|
||||||
codes; \
|
for (auto& valName : array.data) { \
|
||||||
++k3; \
|
codes; \
|
||||||
} \
|
++kg; \
|
||||||
++k2; \
|
const int mod1 = kg % (array.s1); \
|
||||||
} \
|
k1 = kg / (array.s1); \
|
||||||
++k1; \
|
k2 = mod1 / (array.s2); \
|
||||||
} \
|
k3 = mod1 % (array.s2); \
|
||||||
|
} \
|
||||||
} while (0)
|
} while (0)
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class T>
|
template <class T>
|
||||||
struct Array4D {
|
struct Array4D {
|
||||||
vector<vector<vector<vector<T>>>> data;
|
vector<T> data;
|
||||||
|
int d1 = 0, d2 = 0, d3 = 0, d4;
|
||||||
|
int s1 = 0, s2 = 0, s3 = 0, cap = 0; // strides for each dimension
|
||||||
Array4D() {}
|
Array4D() {}
|
||||||
Array4D(int dim1, int dim2, int dim3, int dim4) { init(dim1, dim2, dim3, dim4); }
|
Array4D(int dim1, int dim2, int dim3, int dim4) { init(dim1, dim2, dim3, dim4); }
|
||||||
void init(int dim1, int dim2, int dim3, int dim4) {
|
void init(int dim1, int dim2, int dim3, int dim4) {
|
||||||
data.resize(dim1);
|
d1 = dim1; d2 = dim2; d3 = dim3; d4 = dim4;
|
||||||
for (auto& v : data) v.resize(dim2);
|
s1 = d2 * d3 * d4; s2 = d3 * d4; s3 = d4; cap = d1 * d2 * d3 * d4;
|
||||||
for (auto& v1 : data)
|
data.resize(cap);
|
||||||
for (auto& v2 : v1) v2.resize(dim3);
|
|
||||||
for (auto& v1 : data)
|
|
||||||
for (auto& v2 : v1)
|
|
||||||
for (auto& v3 : v2) v3.resize(dim4);
|
|
||||||
}
|
}
|
||||||
inline T& get(int k1, int k2, int k3, int k4) { return data[k1][k2][k3][k4]; }
|
inline T& operator()(int k1, int k2, int k3, int k4) { return data[k1 * s1 + k2 * s2 + k3 * s3 + k4]; }
|
||||||
// 根据关键字,在对应位置插入数据
|
|
||||||
inline void put(const T& value, int k1, int k2, int k3, int k4) { data[k1][k2][k3][k4] = value; }
|
#define _Foreach4D(array, valName, codes) \
|
||||||
inline vector<vector<vector<T>>>& operator[](size_t idx) { return data[idx]; }
|
for (auto& valName : array.data) { \
|
||||||
#define _Foreach4D(array, valName, codes) \
|
codes; \
|
||||||
for (auto& arr1 : array.data) { \
|
|
||||||
for (auto& arr2 : arr1) { \
|
|
||||||
for (auto& arr3 : arr2) { \
|
|
||||||
for (auto& valName : arr3) { \
|
|
||||||
codes; \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
} \
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#define _Foreach4DK(array, valName, codes) \
|
#define _Foreach4DK(array, valName, codes) \
|
||||||
do { \
|
do { \
|
||||||
int k1 = 0; \
|
int k1 = 0, k2 = 0, k3 = 0, k4 = 0, kg = 0; \
|
||||||
for (auto& arr1 : array.data) { \
|
for (auto& valName : array.data) { \
|
||||||
int k2 = 0; \
|
codes; \
|
||||||
for (auto& arr2 : arr1) { \
|
++kg; \
|
||||||
int k3 = 0; \
|
const int mod1 = kg % (array.s1); \
|
||||||
for (auto& arr3 : arr2) { \
|
const int mod2 = mod1 % (array.s2); \
|
||||||
int k4 = 0; \
|
k1 = kg / (array.s1); \
|
||||||
for (auto& valName : arr3) { \
|
k2 = mod1 / (array.s2); \
|
||||||
codes; \
|
k3 = mod2 / (array.s3); \
|
||||||
++k4; \
|
k4 = mod2 % (array.s3); \
|
||||||
} \
|
} \
|
||||||
++k3; \
|
|
||||||
} \
|
|
||||||
++k2; \
|
|
||||||
} \
|
|
||||||
++k1; \
|
|
||||||
} \
|
|
||||||
} while (0)
|
} while (0)
|
||||||
};
|
};
|
||||||
|
|
||||||
// 类似一个tensor
|
// 类似一个tensor,性能太低,不用了
|
||||||
template <class T>
|
template <class T>
|
||||||
struct NestedArray {
|
struct NestedArray {
|
||||||
vector<T> data;
|
vector<T> data;
|
||||||
|
|
|
||||||
|
|
@ -97,13 +97,13 @@ struct RecalDatum {
|
||||||
empiricalQuality = UNINITIALIZED_EMPIRICAL_QUALITY;
|
empiricalQuality = UNINITIALIZED_EMPIRICAL_QUALITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
void increment(const uint64_t incObservations, const double incMismatches) {
|
inline void increment(const uint64_t incObservations, const double incMismatches) {
|
||||||
numObservations += incObservations;
|
numObservations += incObservations;
|
||||||
numMismatches += (incMismatches * MULTIPLIER); // the multiplier used to avoid underflow, or something like that.
|
numMismatches += (incMismatches * MULTIPLIER); // the multiplier used to avoid underflow, or something like that.
|
||||||
empiricalQuality = UNINITIALIZED_EMPIRICAL_QUALITY;
|
empiricalQuality = UNINITIALIZED_EMPIRICAL_QUALITY;
|
||||||
}
|
}
|
||||||
|
|
||||||
void increment(const uint64_t incObservations, const double incMismatches, int baseQuality) {
|
inline void increment(const uint64_t incObservations, const double incMismatches, int baseQuality) {
|
||||||
numObservations += incObservations;
|
numObservations += incObservations;
|
||||||
numMismatches += (incMismatches * MULTIPLIER); // the multiplier used to avoid underflow, or something like that.
|
numMismatches += (incMismatches * MULTIPLIER); // the multiplier used to avoid underflow, or something like that.
|
||||||
reportedQuality = baseQuality;
|
reportedQuality = baseQuality;
|
||||||
|
|
|
||||||
|
|
@ -34,11 +34,11 @@ struct RecalTables {
|
||||||
void init(int _numReadGroups) {
|
void init(int _numReadGroups) {
|
||||||
numReadGroups = _numReadGroups;
|
numReadGroups = _numReadGroups;
|
||||||
// 初始化readgroup和quality两个table
|
// 初始化readgroup和quality两个table
|
||||||
readGroupTable.init(numReadGroups, eventDimension);
|
readGroupTable.init(eventDimension, numReadGroups);
|
||||||
qualityScoreTable.init(numReadGroups, qualDimension, eventDimension);
|
qualityScoreTable.init(eventDimension, numReadGroups, qualDimension);
|
||||||
|
|
||||||
// 初始化context和cycle两个table
|
// 初始化context和cycle两个table
|
||||||
contextTable.init(numReadGroups, qualDimension, ContextCovariate::MaximumKeyValue() + 1, eventDimension);
|
contextTable.init(eventDimension, numReadGroups, qualDimension, ContextCovariate::MaximumKeyValue() + 1);
|
||||||
cycleTable.init(numReadGroups, qualDimension, CycleCovariate::MaximumKeyValue() + 1, eventDimension);
|
cycleTable.init(eventDimension, numReadGroups, qualDimension, CycleCovariate::MaximumKeyValue() + 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
@ -55,44 +55,25 @@ struct RecalUtils {
|
||||||
Array4D<RecalDatum>& cycleTable = recalTables.cycleTable;
|
Array4D<RecalDatum>& cycleTable = recalTables.cycleTable;
|
||||||
|
|
||||||
int readLength = read.read_len;
|
int readLength = read.read_len;
|
||||||
for (int offset = 0; offset < readLength; ++offset) {
|
for (int idx = 0; idx < validEventTypes.size(); ++idx) {
|
||||||
// if (read.rid == 46114) {
|
for (int offset = 0; offset < readLength; ++offset) {
|
||||||
// fprintf(gf[3], "%d %d\n", offset, info.skips[offset] ? 1 : 0);
|
if (!info.skips[offset]) {
|
||||||
// }
|
|
||||||
if (!info.skips[offset]) {
|
|
||||||
// if (true){ // 不跳过当前位置
|
|
||||||
for (int idx = 0; idx < validEventTypes.size(); ++idx) {
|
|
||||||
// 获取四个值,readgroup / qualityscore / context / cycle
|
// 获取四个值,readgroup / qualityscore / context / cycle
|
||||||
EventTypeValue& event = validEventTypes[idx];
|
EventTypeValue& event = validEventTypes[idx];
|
||||||
vector<int>& covariatesAtOffset = readCovars[event.index][offset];
|
CovariateValues& cv = readCovars[event.index][offset];
|
||||||
uint8_t qual = info.getQual(event, offset);
|
uint8_t qual = info.getQual(event, offset);
|
||||||
double isError = info.getErrorFraction(event, offset);
|
double isError = info.getErrorFraction(event, offset);
|
||||||
|
|
||||||
int readGroup = covariatesAtOffset[ReadGroupCovariate::index];
|
// 处理quality score covariate
|
||||||
int baseQuality = covariatesAtOffset[BaseQualityCovariate::index];
|
qualityScoreTable(event.index, cv.readGroup, cv.baseQuality).increment(1, isError, cv.baseQuality);
|
||||||
|
|
||||||
// 处理base quality score协变量
|
|
||||||
// RecalUtils::IncrementDatum3keys(qualityScoreTable, qual, isError, readGroup, baseQuality, event.index);
|
|
||||||
//if (read.rid == 46114) {
|
|
||||||
// fprintf(gf[3], "%d %d %f\n", offset, baseQuality, isError);
|
|
||||||
//}
|
|
||||||
qualityScoreTable[readGroup][baseQuality][event.index].increment(1, isError, baseQuality);
|
|
||||||
|
|
||||||
auto& d = qualityScoreTable[readGroup][baseQuality][event.index];
|
|
||||||
// spdlog::info("isError {} : {}, mis {}, obs {}", isError, info.snp_errs[offset], d.numMismatches, d.numObservations);
|
|
||||||
|
|
||||||
// 处理context covariate
|
// 处理context covariate
|
||||||
int contextCovariate = covariatesAtOffset[ContextCovariate::index];
|
if (cv.context >= 0)
|
||||||
if (contextCovariate >= 0)
|
contextTable(event.index, cv.readGroup, cv.baseQuality, cv.context).increment(1, isError, cv.baseQuality);
|
||||||
contextTable[readGroup][baseQuality][contextCovariate][event.index].increment(1, isError, baseQuality);
|
|
||||||
// RecalUtils::IncrementDatum4keys(nsgv::gRecalTables.contextTable, qual, isError, readGroup, baseQuality, contextCovariate,
|
|
||||||
// event.index);
|
|
||||||
// 处理cycle covariate
|
// 处理cycle covariate
|
||||||
int cycleCovariate = covariatesAtOffset[CycleCovariate::index];
|
if (cv.cycle >= 0)
|
||||||
if (cycleCovariate >= 0)
|
cycleTable(event.index, cv.readGroup, cv.baseQuality, cv.cycle).increment(1, isError, cv.baseQuality);
|
||||||
cycleTable[readGroup][baseQuality][cycleCovariate][event.index].increment(1, isError, baseQuality);
|
|
||||||
// RecalUtils::IncrementDatum4keys(nsgv::gRecalTables.cycleTable, qual, isError, readGroup, baseQuality, cycleCovariate,
|
|
||||||
// event.index);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -207,8 +188,8 @@ struct RecalUtils {
|
||||||
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
||||||
if (dat.getNumObservations() > 0) {
|
if (dat.getNumObservations() > 0) {
|
||||||
table.addRowData({
|
table.addRowData({
|
||||||
ReadGroupCovariate::IdToRg[k1],
|
ReadGroupCovariate::IdToRg[k2],
|
||||||
ReportUtil::ToString(EventType::EVENTS[k2].representation),
|
ReportUtil::ToString(EventType::EVENTS[k1].representation),
|
||||||
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
||||||
ReportUtil::ToString(dat.getReportedQuality(), 4),
|
ReportUtil::ToString(dat.getReportedQuality(), 4),
|
||||||
ReportUtil::ToString(dat.getNumObservations()),
|
ReportUtil::ToString(dat.getNumObservations()),
|
||||||
|
|
@ -233,9 +214,9 @@ struct RecalUtils {
|
||||||
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
||||||
if (dat.getNumObservations() > 0) {
|
if (dat.getNumObservations() > 0) {
|
||||||
table.addRowData({
|
table.addRowData({
|
||||||
ReadGroupCovariate::IdToRg[k1],
|
ReadGroupCovariate::IdToRg[k2],
|
||||||
ReportUtil::ToString(k2),
|
ReportUtil::ToString(k3),
|
||||||
ReportUtil::ToString(EventType::EVENTS[k3].representation),
|
ReportUtil::ToString(EventType::EVENTS[k1].representation),
|
||||||
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
||||||
ReportUtil::ToString(dat.getNumObservations()),
|
ReportUtil::ToString(dat.getNumObservations()),
|
||||||
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
||||||
|
|
@ -260,11 +241,11 @@ struct RecalUtils {
|
||||||
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
||||||
if (dat.getNumObservations() > 0) {
|
if (dat.getNumObservations() > 0) {
|
||||||
table.addRowData({
|
table.addRowData({
|
||||||
ReadGroupCovariate::IdToRg[k1],
|
ReadGroupCovariate::IdToRg[k2],
|
||||||
ReportUtil::ToString(k2),
|
ReportUtil::ToString(k3),
|
||||||
ReportUtil::ToString(ContextCovariate::ContextFromKey(k3)),
|
ReportUtil::ToString(ContextCovariate::ContextFromKey(k4)),
|
||||||
"Context",
|
"Context",
|
||||||
ReportUtil::ToString(EventType::EVENTS[k4].representation),
|
ReportUtil::ToString(EventType::EVENTS[k1].representation),
|
||||||
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
||||||
ReportUtil::ToString(dat.getNumObservations()),
|
ReportUtil::ToString(dat.getNumObservations()),
|
||||||
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
||||||
|
|
@ -276,11 +257,11 @@ struct RecalUtils {
|
||||||
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
RecalDatum &dat = const_cast<RecalDatum&>(datum);
|
||||||
if (dat.getNumObservations() > 0) {
|
if (dat.getNumObservations() > 0) {
|
||||||
table.addRowData({
|
table.addRowData({
|
||||||
ReadGroupCovariate::IdToRg[k1],
|
ReadGroupCovariate::IdToRg[k2],
|
||||||
ReportUtil::ToString(k2),
|
ReportUtil::ToString(k3),
|
||||||
ReportUtil::ToString(CycleCovariate::CycleFromKey(k3)),
|
ReportUtil::ToString(CycleCovariate::CycleFromKey(k4)),
|
||||||
"Cycle",
|
"Cycle",
|
||||||
ReportUtil::ToString(EventType::EVENTS[k4].representation),
|
ReportUtil::ToString(EventType::EVENTS[k1].representation),
|
||||||
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
ReportUtil::ToString(dat.getEmpiricalQuality(), 4),
|
||||||
ReportUtil::ToString(dat.getNumObservations()),
|
ReportUtil::ToString(dat.getNumObservations()),
|
||||||
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
ReportUtil::ToString(dat.getNumMismatches(), 2)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue