diff --git a/CommonLib/CommonLib.vcxproj b/CommonLib/CommonLib.vcxproj
index 8836c8c..cdba565 100644
--- a/CommonLib/CommonLib.vcxproj
+++ b/CommonLib/CommonLib.vcxproj
@@ -107,9 +107,7 @@
-
- false
-
+
diff --git a/CommonLib/CommonLib.vcxproj.filters b/CommonLib/CommonLib.vcxproj.filters
index 7d4df5e..1380e3e 100644
--- a/CommonLib/CommonLib.vcxproj.filters
+++ b/CommonLib/CommonLib.vcxproj.filters
@@ -17,9 +17,7 @@
-
-
-
+
diff --git a/CommonLib/matlab_io.cpp b/CommonLib/matlab_io.cpp
index 4831c3b..77e8513 100644
--- a/CommonLib/matlab_io.cpp
+++ b/CommonLib/matlab_io.cpp
@@ -14,6 +14,92 @@
#include "matlab_io.h"
using namespace std;
+// 将matlab存储方式转换成c存储方式
+#define TRANS_ROW_COL(dst, src, rowNum, colNum) \
+ for (int rowI = 0; rowI < rowNum; ++rowI) { \
+ for (int colJ = 0; colJ < colNum; ++colJ) { \
+ dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
+ } \
+ }
+
+/* 读取结构体中的二维字符串矩阵(一维的cell,每个cell又有一层cell,每个cell是一个字符串)*/
+bool ReadChildString2D(const string& filePath, const string& parentName, const string& selfName, vector >& vvStr) {
+ MATFile* pMatFile = nullptr;
+ mxArray* pMxArray = nullptr;
+ mxArray* pCell = nullptr;
+ int rowNum, colNum;
+ char strBuf[STRING_BUF_SIZE];
+
+ pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
+ if (pMatFile == nullptr) {
+ cout << "filePath is error!" << endl;
+ return false;
+ }
+ mxArray* pMxParent = matGetVariable(pMatFile, parentName.c_str()); //获取G变量
+
+ // 读取字符串
+ pMxArray = mxGetField(pMxParent, 0, selfName.c_str()); // ds
+ rowNum = (int)mxGetM(pMxArray);
+ colNum = (int)mxGetN(pMxArray);
+ for (int i = 0; i < rowNum; ++i) {
+ for (int j = 0; j < colNum; ++j) {
+ pCell = mxGetCell(pMxArray, j * rowNum + i);
+ int childRowNum = (int)mxGetM(pCell);
+ int childColNum = (int)mxGetN(pCell);
+ vvStr.push_back(vector());
+ vvStr.back().resize(childRowNum * childColNum);
+ for (int ii = 0; ii < childRowNum; ii++) {
+ for (int jj = 0; jj < childColNum; jj++) {
+ mxArray* pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
+ if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
+ cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
+ return false;
+ }
+ vvStr.back()[ii * childColNum + jj] = strBuf;
+ // auto& lastStr = vvStr.back()[ii * childColNum + jj];
+ // transform(lastStr.begin(), lastStr.end(), lastStr.begin(), ::toupper); // 转成大写
+ }
+ }
+ }
+ }
+ mxDestroyArray(pMxArray);
+ return true;
+}
+
+/* 读取结构体中的二维double矩阵(一维的cell,每个cell又有一层cell,每个cell是一个字符串)*/
+bool ReadChildDouble2D(const string& filePath, const string& parentName, const string& selfName, vector >& vvDouble) {
+ MATFile* pMatFile = nullptr;
+ mxArray* pMxArray = nullptr;
+ mxArray* pCell = nullptr;
+ int rowNum, colNum;
+
+ pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
+ if (pMatFile == nullptr) {
+ cout << "filePath is error!" << endl;
+ return false;
+ }
+ mxArray* pMxParent = matGetVariable(pMatFile, parentName.c_str()); //获取G变量
+
+ // 读取double数据
+ pMxArray = mxGetField(pMxParent, 0, selfName.c_str()); // ds
+ rowNum = (int)mxGetM(pMxArray);
+ colNum = (int)mxGetN(pMxArray);
+ for (int i = 0; i < rowNum; ++i) {
+ for (int j = 0; j < colNum; ++j) {
+ pCell = mxGetCell(pMxArray, j * rowNum + i);
+ int childRowNum = (int)mxGetM(pCell);
+ int childColNum = (int)mxGetN(pCell);
+
+ vvDouble.push_back(vector());
+ vvDouble.back().resize(childRowNum * childColNum);
+ double* pVal = (double*)mxGetData(pCell); //获取指针
+ TRANS_ROW_COL(vvDouble.back(), pVal, childRowNum, childColNum); // 行列存储方式转换
+ }
+ }
+ mxDestroyArray(pMxArray);
+ return true;
+}
+
/* 读取字符串矩阵 */
bool ReadMtxString(const string& filePath, const string& mtxName,
vector& vStr, int* pRowNum, int* pColNum) {
diff --git a/CommonLib/matlab_io.h b/CommonLib/matlab_io.h
index 37cab9d..e160dff 100644
--- a/CommonLib/matlab_io.h
+++ b/CommonLib/matlab_io.h
@@ -15,7 +15,7 @@
#include
using namespace std;
-#define STRING_BUF_SIZE 10240
+#define STRING_BUF_SIZE 204800
typedef double T;
/*
注意参数的区别,读取时候直接传递文件路径名,写入的时候需要提供文件指针,
@@ -31,4 +31,11 @@ T* ReadMtxDouble(const string& filePath, const string& mtxName, int* pRowNum, in
/* 将数据写入mat文件中,用给定的名称命名 */
bool SaveMtxDouble(T* src, MATFile* pMatFile, string matrixName, int rowNum, int colNum);
+/* 读取结构体中的字符串数据 */
+bool ReadChildString2D(const string& filePath, const string& parentName, const string& selfName, vector >& vvStr);
+
+/* 读取结构体中的二维double矩阵(一维的cell,每个cell又有一层cell,每个cell是一维double数组)*/
+bool ReadChildDouble2D(const string& filePath, const string& parentName, const string& selfName, vector >& vvDouble);
+
+
#endif
\ No newline at end of file
diff --git a/CppRun/CppRun.vcxproj b/CppRun/CppRun.vcxproj
index 63d4327..4ca5721 100644
--- a/CppRun/CppRun.vcxproj
+++ b/CppRun/CppRun.vcxproj
@@ -129,9 +129,7 @@
-
- false
-
+
diff --git a/CppRun/CppRun.vcxproj.filters b/CppRun/CppRun.vcxproj.filters
index e5360c6..d65e643 100644
--- a/CppRun/CppRun.vcxproj.filters
+++ b/CppRun/CppRun.vcxproj.filters
@@ -33,8 +33,6 @@
-
-
-
+
\ No newline at end of file
diff --git a/CppRun/calc_entropy.cpp b/CppRun/calc_entropy.cpp
index ded1596..d00d3cc 100644
--- a/CppRun/calc_entropy.cpp
+++ b/CppRun/calc_entropy.cpp
@@ -92,7 +92,7 @@ bool ReadInfoFromMat(const string & filePath, vector >&vvDs, vect
mxArray* pMxArray = nullptr;
mxArray* pCell = nullptr;
int rowNum, colNum;
- char strBuf[STRING_BUF_SIZE];
+ char *strBuf = new char[STRING_BUF_SIZE];
const string& parrentName = "G";
const string& firstChildName = "ds";
const string& secondChildName = "fr";
@@ -112,6 +112,7 @@ bool ReadInfoFromMat(const string & filePath, vector >&vvDs, vect
INNTER_FOR_BEGIN
if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
+ delete[]strBuf;
return false;
}
vvDs.back()[ii * childColNum + jj] = strBuf;
@@ -127,9 +128,10 @@ bool ReadInfoFromMat(const string & filePath, vector >&vvDs, vect
vvFr.back().resize(childRowNum * childColNum);
double *pVal = (double*)mxGetData(pCell); //获取指针
TRANS_ROW_COL(vvFr.back(), pVal, childRowNum, childColNum); // 行列存储方式转换
- OUTER_FOR_END
+ OUTER_FOR_END
- // 没考虑完全哪些数据需要mxDestroyArray,可能会有内存泄漏
+ // 没考虑完全哪些数据需要mxDestroyArray,可能会有内存泄漏
+ delete[]strBuf;
return true;
}
@@ -233,7 +235,7 @@ void CalcEntropy(int argc, const char** argv) {
string wordMatSuffix(argv[2]); // 高频词汇矩阵对应的mat文件的后缀名(可以是全文件名,可以是文件名后缀,必须保证唯一)
fs::path outFileName(argv[4]);
int numThread = 1;
- if (argc >= 5) numThread = atoi(argv[5]);
+ if (argc > 5) numThread = atoi(argv[5]);
if (numThread < 1) numThread = 1;
/* 读入处理后的pubmed文献信息的mat文件,只读入摘要信息,即变量abs1 */
@@ -281,7 +283,7 @@ void CalcEntropy(int argc, const char** argv) {
cout << "read abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
/* 将分割结果写入mat文件 */
begin = clock();
- if (argc >= 6) {
+ if (argc > 6) {
MATFile* pMatFile = matOpen(argv[6], "w");
mxArray* pCellMtx= mxCreateCellMatrix(1, vvWordMtx.size());
for (int i = 0; i < vvWordMtx.size(); ++i) {
diff --git a/GMM/GMM.vcxproj b/GMM/GMM.vcxproj
index abfb3da..f615ea3 100644
--- a/GMM/GMM.vcxproj
+++ b/GMM/GMM.vcxproj
@@ -127,9 +127,7 @@
-
- false
-
+
diff --git a/GMM/GMM.vcxproj.filters b/GMM/GMM.vcxproj.filters
index c29d2e2..5711398 100644
--- a/GMM/GMM.vcxproj.filters
+++ b/GMM/GMM.vcxproj.filters
@@ -36,8 +36,6 @@
-
-
-
+
\ No newline at end of file
diff --git a/GMM/main.cpp b/GMM/main.cpp
index 4f5b4e2..8f134cf 100644
--- a/GMM/main.cpp
+++ b/GMM/main.cpp
@@ -128,6 +128,11 @@ T SquareAverage(vector& vVal) {
/* 计算向量x和y的相关距离, 向量维度必须相等*/
double CorrelationDistance(vector& vX, vector& vY) {
+ double xMean = Average(vX);
+ transform(vX.cbegin(), vX.cend(), vX.begin(), [&](double val) { return val - xMean; });
+ double yMean = Average(vY);
+ transform(vY.cbegin(), vY.cend(), vY.begin(), [&](double val) { return val - yMean; });
+
vector vXY(vX.size());
for (int i = 0; i < vXY.size(); ++i) {
vXY[i] = vX[i] * vY[i];
diff --git a/MexFunc/CalcEntropy.cpp b/MexFunc/CalcEntropy.cpp
new file mode 100644
index 0000000..eeeefd5
--- /dev/null
+++ b/MexFunc/CalcEntropy.cpp
@@ -0,0 +1,237 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+using std::cout;
+using std::endl;
+using namespace std;
+
+#define STRING_BUF_SIZE 204800
+
+/* 读取二层cell包裹的字符串,和数值,ds,fr */
+#define OUTER_FOR_BEGIN \
+ rowNum = (int)mxGetM(pMxArray); \
+ colNum = (int)mxGetN(pMxArray); \
+ for (int i = 0; i < rowNum; ++i) { \
+ for (int j = 0; j < colNum; ++j) { \
+ mxArray* pCell = mxGetCell(pMxArray, j * rowNum + i); \
+ int childRowNum = (int)mxGetM(pCell); \
+ int childColNum = (int)mxGetN(pCell);
+
+#define OUTER_FOR_END \
+ } \
+ }
+
+#define INNTER_FOR_BEGIN \
+ for (int ii = 0; ii < childRowNum; ii++) { \
+ for (int jj = 0; jj < childColNum; jj++) { \
+ mxArray *pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
+#define INNTER_FOR_END \
+ } \
+ }
+// 将matlab存储方式转换成c存储方式
+#define TRANS_ROW_COL(dst, src, rowNum, colNum) \
+ for (int rowI = 0; rowI < rowNum; ++rowI) { \
+ for (int colJ = 0; colJ < colNum; ++colJ) { \
+ dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
+ } \
+ }
+// 将二维索引转成一维的索引
+inline int Get1DIndex(int colNum, int row, int col) {
+ return row * colNum + col;
+}
+
+// 读取G结构体中的ds和fr
+void GetFrDs(const mxArray* pMxParent, vector >& vvDs, vector >& vvFr) {
+ // 读取ds字符串
+ int rowNum, colNum;
+ char *strBuf = new char[STRING_BUF_SIZE];
+ mxArray* pMxArray = mxGetField(pMxParent, 0, "ds"); // ds
+ OUTER_FOR_BEGIN
+ vvDs.push_back(vector());
+ vvDs.back().resize(childRowNum * childColNum);
+ INNTER_FOR_BEGIN
+ if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
+ cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
+ delete[]strBuf;
+ return;
+ }
+ vvDs.back()[ii * childColNum + jj] = strBuf;
+ auto& lastStr = vvDs.back()[ii * childColNum + jj];
+ transform(lastStr.begin(), lastStr.end(), lastStr.begin(), ::toupper); // 转成大写
+ INNTER_FOR_END
+ OUTER_FOR_END
+
+ // 读取fr数值
+ pMxArray = mxGetField(pMxParent, 0, "fr"); // fr
+ OUTER_FOR_BEGIN
+ vvFr.push_back(vector());
+ vvFr.back().resize(childRowNum * childColNum);
+ double* pVal = (double*)mxGetData(pCell); //获取指针
+ TRANS_ROW_COL(vvFr.back(), pVal, childRowNum, childColNum); // 行列存储方式转换
+ OUTER_FOR_END
+ delete[]strBuf;
+}
+
+/* 读取abs */
+void GetAbstract(const mxArray* pMxAbs, vector& vAbs) {
+ int rowNum = (int)mxGetM(pMxAbs);
+ int colNum = (int)mxGetN(pMxAbs);
+ char *strBuf = new char[STRING_BUF_SIZE];
+
+ vAbs.resize(rowNum * colNum);
+ for (int i = 0; i < rowNum; ++i) {
+ for (int j = 0; j < colNum; ++j) {
+ mxArray* pCell = mxGetCell(pMxAbs, j * rowNum + i);
+ if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
+ cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
+ delete[]strBuf;
+ return;
+ }
+ vAbs[i * colNum + j] = strBuf;
+ }
+ }
+ delete[]strBuf;
+}
+
+/*
+nlhs:输出参数数目(Number Left - hand side),等号左边
+plhs:指向输出参数的指针(Point Left - hand side),等号左边
+nrhs:输入参数数目(Number Right - hand side),等号右边
+prhs:指向输入参数的指针(Point Right - hand side),等号右边。要注意prhs是const的指针数组,即不能改变其指向内容。
+*/
+void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
+ //cout << "MexCalcEntropy" << endl;
+ //cout << nlhs << '\t' << nrhs << endl;
+ if (nrhs != 2) {
+ cout << "2 arguments should be given for this function!" << endl;
+ return;
+ }
+ clock_t begin, finish;
+ begin = clock();
+ vector > vvDs; // 每个知识颗粒的ds矩阵(词汇矩阵)
+ vector > vvFr; // 词汇对应的频率
+ GetFrDs(prhs[1], vvDs, vvFr);
+
+ vector vAbstract; // 读取abs1, 然后分割成一个一个的单词
+ GetAbstract(prhs[0], vAbstract);
+
+ /* 将摘要信息分割成一个一个的词汇 */
+ // begin = clock();
+ unordered_set usWordChars; // 能组成单词的字符,要不要考虑数字?原版matlab是提取了数字的
+ for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
+ for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
+ for (int i = 48; i <= 57; i++) usWordChars.insert(char(i)); // 0 - 9
+ usWordChars.insert('/'); usWordChars.insert('+'); usWordChars.insert('-');
+ vector > vvWordMtx(vAbstract.size()); // 初始大小为文章的个数
+ vector > vusAbsWord(vAbstract.size()); // 将每篇文章摘要的单词放入hash表
+ for (int i = 0; i < vAbstract.size(); i++) {
+ auto& strAbs = vAbstract[i];
+ // 遍历摘要字符串的每一个字符,取出每一个单词
+ vector& vWord = vvWordMtx[i];
+ if (strAbs.size() == 0) continue; // 摘要信息为空,跳过(一般不会出现这个情况)
+ int wordStartPos = 0;
+ while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
+ wordStartPos++;
+ for (int curPos = wordStartPos + 1; curPos < strAbs.size(); ++curPos) {
+ if (usWordChars.find(strAbs[curPos]) == usWordChars.end()) { // 找到了分割符
+ vWord.push_back(strAbs.substr(wordStartPos, curPos - wordStartPos));
+ wordStartPos = curPos + 1; // 找下一个词语起始位置
+ while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
+ wordStartPos++;
+ curPos = wordStartPos; // 循环会自动加1
+ }
+ }
+ // 将处理摘要之后的每个词语放入hash表
+ for (auto& word : vWord) {
+ string upWord(word);
+ transform(upWord.begin(), upWord.end(), upWord.begin(), ::toupper);
+ vusAbsWord[i].insert(upWord);
+ }
+ }
+ // finish = clock();
+ // cout << "Split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+
+ // 存放结果,用一维数组存放二维数据
+ vector hs;
+ vector hr;
+ const int numLiterature = vusAbsWord.size(); // pubmed 文件中包含的文献数量
+ const int numGroup = vvDs.size(); // ds包含的组数
+ hs.resize(numGroup * numLiterature);
+ hr.resize(numLiterature * numGroup);
+
+ for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
+ vector& vDs = vvDs[groupIdx]; // 这一组ds
+ vector& vFr = vvFr[groupIdx]; // frequency
+ const int numWord = vDs.size(); // 这一组数据中包含的单词数量
+ vector > vX(numLiterature, vector(numWord, 0));
+ // 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
+ for (int i = 0; i < numLiterature; ++i) {
+ for (int j = 0; j < numWord; ++j) {
+ if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
+ vX[i][j] = 1;
+ }
+ }
+ }
+
+ // 找词汇的最高频率
+ double maxFr = *max_element(vFr.begin(), vFr.end());
+ // 将fr的数值规范化到(0,0.368)之间
+ const double normalMax = 0.368;
+ for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
+ maxFr = normalMax;
+ // 对每个知识颗粒每一组数据,计算信息熵
+ for (int i = 0; i < numLiterature; ++i) {
+ for (int j = 0; j < numWord; ++j) {
+ if (vX[i][j] == 1) {
+ hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
+ }
+ }
+ }
+
+ // 找最高频词汇所在的索引位置
+ vector vMaxPos;
+ int idx = 0;
+ for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
+ if (val == maxFr) vMaxPos.push_back(idx);
+ idx++;
+ });
+
+ for (int i = 0; i < numLiterature; ++i) {
+ int cumulateX = 0; // 计算在最高频词汇处,x值的累加结果
+ for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
+ if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
+ hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
+ }
+ }
+ }
+
+ /* 将结果写入返回值 */
+ if (nlhs > 0) {
+ int datasize = numGroup * numLiterature;
+ double* mtxData = new double[datasize];//待存储数据转为double格式
+ mxArray* pWriteArray = NULL;//matlab格式矩阵
+ //创建一个rowNum*colNum的矩阵
+ pWriteArray = mxCreateDoubleMatrix(numGroup, numLiterature, mxREAL);
+ for (int i = 0; i < numGroup; i++) {
+ for (int j = 0; j < numLiterature; j++) {
+ mtxData[j * numGroup + i] = hs[i * numLiterature + j];
+ }
+ }
+ //把data的值赋给pWriteArray指针
+ memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize);
+ plhs[0] = pWriteArray; // 赋值给返回值
+ delete[]mtxData;
+ }
+ finish = clock();
+ // cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+}
+
+/* 供main调试调用 */
+void MexCalcEntropy(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
+ mexFunction(nlhs, plhs, nrhs, prhs);
+}
\ No newline at end of file
diff --git a/MexFunc/CorrelationDist.cpp b/MexFunc/CorrelationDist.cpp
new file mode 100644
index 0000000..7746785
--- /dev/null
+++ b/MexFunc/CorrelationDist.cpp
@@ -0,0 +1,224 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include "CommonLib/kthread.h"
+#include "CommonLib/thread_pool.h"
+using std::cout;
+using std::endl;
+using namespace std;
+
+// 线程参数
+struct TPCorDist {
+ vector>* pvvX;
+ vector* pvDist;
+ vector* pvSq;
+ int rowIdxStart;
+ int rowIdxEnd;
+ int rowNum;
+ int colNum;
+};
+
+// 线程函数
+void ThreadCalcDist(TPCorDist& param) {
+ vector>& vvX = *param.pvvX;
+ vector& vDist = *param.pvDist;
+ vector& vSq = *param.pvSq;
+ int rowIdxStart = param.rowIdxStart;
+ int rowIdxEnd = param.rowIdxEnd;
+ int rowNum = param.rowNum;
+ int colNum = param.colNum;
+ double uv = 0;
+ clock_t begin = clock(), finish;
+ __m256 vec_zero = _mm256_set1_ps(0);
+ for (int i = rowIdxStart; i < rowIdxEnd; ++i) {
+ const int baseIdx = i * (rowNum * 2 - i - 1) / 2;
+ for (int cur = i + 1, idx = 0; cur < rowNum; ++cur, ++idx) {
+ double uv = 0;
+ //for (int j = 0; j < colNum; ++j) { uv += vvX[i][j] * vvX[cur][j]; }
+ __m256 vec_uv = vec_zero;
+ int colNum8 = colNum / 8 * 8;
+ int remain = colNum - colNum8;
+ for (int j = 0; j < colNum8; j += 8) {
+ __m256 vec_u = _mm256_loadu_ps(vvX[i].data() + j);
+ __m256 vec_v = _mm256_loadu_ps(vvX[cur].data() + j);
+ vec_uv = _mm256_add_ps(_mm256_mul_ps(vec_u, vec_v), vec_uv);
+ }
+ for (int j = colNum8; j < colNum; ++j) {
+ uv += vvX[i][j] * vvX[cur][j];
+ }
+ float* pVec = (float*)&vec_uv;
+ for (int j = 0; j < 8; ++j) uv += pVec[j];
+
+ uv /= colNum;
+ const double dist = abs(1.0 - uv / sqrt(vSq[i] * vSq[cur]));
+ vDist[baseIdx + idx] = dist;
+ }
+ }
+ finish = clock();
+ //cout << rowIdxEnd << " Thread time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+}
+
+/* 入口函数 */
+void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
+ //cout << "WordSplit" << endl;
+ //cout << nlhs << '\t' << nrhs << endl;
+ //if (nrhs < 1) {
+ // cout << "At least 1 arguments should be given for this function!" << endl;
+ // return;
+ //}
+ clock_t begin = clock(), mid, finish;
+
+ int rowNum = (int)mxGetM(prhs[0]);
+ int colNum = (int)mxGetN(prhs[0]);
+ double* pData = (double*)mxGetData(prhs[0]);
+ //
+ //int numThread = 1;
+ //if (nrhs > 1) {
+ // double* pNumThread = (double*)mxGetData(prhs[1]);
+ // numThread = (int)*pNumThread;
+ // if (numThread < 1) numThread = 1;
+ //}
+
+
+ //for (int i = 0; i < rowNum; ++i) {
+ // for (int j = 0; j < colNum; ++j) {
+ // cout << pData[j * rowNum + i] << '\t';
+ // }
+ // cout << endl;
+ //}
+
+// int rowNum = 2;
+// int colNum = 5;
+// vector pData = { 1,1,2,2,3,9,4,4,5,4 };
+
+ cout << rowNum << '\t' << colNum << endl;
+
+ /* 计算每一行的平均数 */
+ mid = clock();
+ vector vMean(rowNum);
+ for (int j = 0; j < colNum; ++j) {
+ for (int i = 0; i < rowNum; ++i) {
+ vMean[i] += pData[j * rowNum + i];
+ }
+ }
+ for (int i = 0; i < rowNum; ++i) {
+ vMean[i] /= colNum;
+ }
+ finish = clock();
+ cout << "计算平均数: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
+ /* 减去平均值, 计算平方 */
+ mid = clock();
+ vector> vvX(rowNum, vector(colNum));
+ vector vSq(rowNum);
+ for (int i = 0; i < rowNum; ++i) {
+ for (int j = 0; j < colNum; ++j) {
+ vvX[i][j] = pData[j * rowNum + i] - vMean[i];
+ vSq[i] += vvX[i][j] * vvX[i][j];
+ }
+ }
+ for (auto& val : vSq) { val /= colNum; }
+ finish = clock();
+ cout << "计算平方: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
+
+ /* 计算相关距离 */
+// clock_t mid0 = clock();
+// const int distSize = rowNum * (rowNum - 1) / 2;
+// const int row2 = 2 * rowNum;
+// vector vDist(distSize, 0.0);
+// //__m256d vec_zero = _mm256_set1_pd(0);
+// __m256 vec_zero = _mm256_set1_ps(0);
+// for (int i = 0; i < rowNum; ++i) {
+// if (i % 100 == 0) {
+// finish = clock();
+// cout << "time " << i << ": " << (double)(finish - mid) / CLOCKS_PER_SEC << " s; " <<
+// (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+// mid = clock();
+// }
+// const int baseIdx = i * (row2 - i - 1) / 2;
+// for (int cur = i + 1, idx = 0; cur < rowNum; ++cur, ++idx) {
+// double uv = 0;
+// //for (int j = 0; j < colNum; ++j) { uv += vvX[i][j] * vvX[cur][j]; }
+// //__m256d vec_uv = vec_zero;
+// //int colNum4 = colNum / 4 * 4;
+// //int remain = colNum - colNum4;
+// //for (int j = 0; j < colNum4; j += 4) {
+// // __m256d vec_u = _mm256_loadu_pd(vvX[i].data() + j);
+// // __m256d vec_v = _mm256_loadu_pd(vvX[cur].data() + j);
+// // vec_uv = _mm256_add_pd(_mm256_mul_pd(vec_u, vec_v), vec_uv);
+// //}
+// //for (int j = colNum4; j < colNum; ++j) {
+// // uv += vvX[i][j] * vvX[cur][j];
+// //}
+// //double* pVec = (double*)&vec_uv;
+// //for (int j = 0; j < 4; ++j) uv += pVec[j];
+//
+// __m256 vec_uv = vec_zero;
+// int colNum8 = colNum / 8 * 8;
+// int remain = colNum - colNum8;
+// for (int j = 0; j < colNum8; j += 8) {
+// __m256 vec_u = _mm256_loadu_ps(vvX[i].data() + j);
+// __m256 vec_v = _mm256_loadu_ps(vvX[cur].data() + j);
+// vec_uv = _mm256_add_ps(_mm256_mul_ps(vec_u, vec_v), vec_uv);
+// }
+// for (int j = colNum8; j < colNum; ++j) {
+// uv += vvX[i][j] * vvX[cur][j];
+// }
+// float* pVec = (float*)&vec_uv;
+// for (int j = 0; j < 8; ++j) uv += pVec[j];
+//
+// uv /= colNum;
+// const double dist = abs(1.0 - uv / sqrt(vSq[i] * vSq[cur]));
+// vDist[baseIdx + idx] = dist;
+// }
+// }
+// finish = clock();
+// cout << "计算相关距离: " << (double)(finish - mid0) / CLOCKS_PER_SEC << " s" << endl;
+// for (auto& val : vDist) {cout << val << endl;}
+
+ /* 多线程计算相关距离 */
+ const int distSize = rowNum * (rowNum - 1) / 2;
+ const int row2 = 2 * rowNum;
+ vector vDist(distSize, 0.0);
+ mid = clock();
+ vector vTP;
+ ThreadPool thPool(6);
+ int span = 32;
+ int rowNumSpan = rowNum / span * span;
+ int i = 0;
+ for (i = 0; i < rowNumSpan; i += span) {
+ // vTP.push_back({&vvX, &vDist, &vSq, i, rowNum, colNum});
+ TPCorDist tp = { &vvX, &vDist, &vSq, i, i + span, rowNum, colNum };
+ thPool.enqueue(ThreadCalcDist, tp);
+ //vTP.push_back(tp);
+ }
+ if (i < rowNum) {
+ TPCorDist tp = { &vvX, &vDist, &vSq, i, rowNum, rowNum, colNum };
+ thPool.enqueue(ThreadCalcDist, tp);
+ //vTP.push_back(tp);
+ }
+ thPool.~ThreadPool();
+ //kt_for(6, ThreadCalcDist, vTP);
+
+ finish = clock();
+ cout << "多线程计算相关距离: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
+ // for (auto& val : vDist) {cout << val << endl;}
+
+ /* 写入结果 */
+ if (nlhs > 0) { // b
+ mxArray* pWriteArray = NULL;
+ //创建一个rowNum*colNum的矩阵
+ pWriteArray = mxCreateDoubleMatrix(1, distSize, mxREAL);
+ memcpy((void*)(mxGetPr(pWriteArray)), (void*)vDist.data(), sizeof(double) * 6);
+ plhs[0] = pWriteArray; // 赋值给返回值
+ }
+
+ finish = clock();
+ cout << "Correlation Dist Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+}
\ No newline at end of file
diff --git a/MexFunc/GaussFit.cpp b/MexFunc/GaussFit.cpp
new file mode 100644
index 0000000..7b3d7e5
--- /dev/null
+++ b/MexFunc/GaussFit.cpp
@@ -0,0 +1,1256 @@
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+using std::cout;
+using std::endl;
+using namespace std;
+#define M_PI 3.1415926535897932384626433832795
+
+
+class KMeans
+{
+public:
+ enum InitMode
+ {
+ InitRandom,
+ InitManual,
+ InitUniform,
+ };
+
+ KMeans(int dimNum = 1, int clusterNum = 1);
+ ~KMeans();
+
+ void SetMean(int i, const double* u) { memcpy(m_means[i], u, sizeof(double) * m_dimNum); }
+ void SetInitMode(int i) { m_initMode = i; }
+ void SetMaxIterNum(int i) { m_maxIterNum = i; }
+ void SetEndError(double f) { m_endError = f; }
+
+ double* GetMean(int i) { return m_means[i]; }
+ int GetInitMode() { return m_initMode; }
+ int GetMaxIterNum() { return m_maxIterNum; }
+ double GetEndError() { return m_endError; }
+
+
+ /* SampleFile: ...
+ LabelFile: