#include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifdef _WIN32 #include #include #define F_OK 0 #else #include #endif #include #include "gmm.h" #include "thread_pool.h" using namespace std; using std::cout; using std::vector; namespace fs = std::filesystem; /* 从mat文件中读取给定名称的矩阵数据,并获取矩阵的行列数值 */ template T* ReadMatlabMat(const string &filePath, const string &mtxName, int *pRowNum, int *pColNum) { T* dst = nullptr; MATFile* pMatFile = nullptr; mxArray* pMxArray = nullptr; int rowNum, colNum; double* matData; pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件 if (pMatFile == nullptr) { cerr << "filePath is error!" << endl; return nullptr; } pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵 rowNum = mxGetM(pMxArray); colNum = mxGetN(pMxArray); // cout << rowNum << " " << colNum << endl; matData = (double*)mxGetData(pMxArray); //获取指针 dst = new T[rowNum * colNum]; for (int i = 0; i < rowNum; ++i) { for (int j = 0; j < colNum; ++j) { dst[i * colNum + j] = T(matData[j * rowNum + i]); } } mxDestroyArray(pMxArray); //释放内存 matClose(pMatFile); // 关闭文件 *pRowNum = rowNum; *pColNum = colNum; return dst; } /* 将数据写入mat文件中,用给定的名称命名 */ template bool SaveMatrix(T* src, MATFile* pMatFile, string matrixName, int rowNum, int colNum) { //转置存储 int datasize = colNum * rowNum; double* mtxData = new double[datasize];//待存储数据转为double格式 // memset(mtxData, 0, datasize * sizeof(double)); for (int i = 0; i < rowNum; i++) { for (int j = 0; j < colNum; j++) { mtxData[j * rowNum + i] = double(src[i * colNum + j]); // *(mtxData + j * rowNum + i) = (double)src[i * colNum + j]; 可消除警告 } } mxArray* pWriteArray = NULL;//matlab格式矩阵 if (pMatFile == nullptr) { cerr << "mat file pointer is error!" << endl; return false; } //创建一个rowNum*colNum的矩阵 pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL); //把data的值赋给pWriteArray指针 memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize); //给矩阵命名为matrixName matPutVariable(pMatFile, matrixName.c_str(), pWriteArray); mxDestroyArray(pWriteArray);//release resource delete[]mtxData;//release resource return true; } /* 将x向量放到宽度为binWidth大小的桶中,功能类似matlab的hist*/ void PutXtoBin(double* x, int xSize, double binWidth, vector& vXBin, vector& vYBin) { double maxX = 0.0; for (int i = 0; i < xSize; ++i) { if (maxX < x[i]) maxX = x[i]; } int binSize = (int)((maxX + binWidth / 2) / binWidth + 1); double binMaxVal = (binSize - 1) * binWidth; if (binMaxVal > maxX) { // 确保最后一个bin不大于maxX,而且不小于maxX-binWidth binSize -= 1; } vXBin.resize(xSize); vYBin.resize(binSize); for (int i = 0; i < binSize; ++i) vYBin[i] = 0; for (int i = 0; i < xSize; ++i) { int binIdx = (int)((x[i] + binWidth / 2) / binWidth); if (binIdx >= binSize) binIdx = binSize - 1; vYBin[binIdx] += 1; } // 按大小顺序将修改后的x数值存储在vXBin中,点的顺序不同,训练出的高斯混合模型参数会有一些不同。 int xIdx = 0; for (int i = 0; i < binSize; ++i) { for (int j = 0; j < vYBin[i]; ++j) { vXBin[xIdx++] = i * binWidth; } } } /* 将标准高斯模型训练出的参数转换成自定义的系数, 并返回拟合后的Y值向量 */ struct cmpFunc { bool operator()(const pair& a, const pair& b) { return a.first < b.first; } }; void GMMToFactorEY(GMM& gmm, double binWidth, vector &vYBin, vector& vFactor, vector& vEY) { /* 需要调整曲线的权重,来拟合高斯曲线,而不是用概率密度 */ double zoomFactorSum = 0.0; int valNum = 0; vEY.resize(vYBin.size()); int topM = vYBin.size() / 4; if (topM < 1) topM = 1; /* 用堆排序的方式取前topM个最大值, 用来计算缩放参数*/ priority_queue, vector >, cmpFunc> pqTopM; for (int i = 0; i < vYBin.size(); ++i) { double xVal = i * binWidth; double probVal = gmm.GetProbability(&xVal); vEY[i] = probVal; pqTopM.push(make_pair(vYBin[i], probVal)); } for (int i = 0; i < topM; ++i) { pair topEle = pqTopM.top(); pqTopM.pop(); zoomFactorSum += topEle.first / topEle.second; } double zoomFactor = zoomFactorSum / topM; for (int i = 0; i < vEY.size(); ++i) { vEY[i] *= zoomFactor; } vFactor.clear(); vFactor.push_back(zoomFactor * gmm.Prior(0) / sqrt(2 * M_PI * *gmm.Variance(0))); vFactor.push_back(*gmm.Mean(0)); vFactor.push_back(sqrt(2 * *gmm.Variance(0))); vFactor.push_back(zoomFactor * gmm.Prior(1) / sqrt(2 * M_PI * *gmm.Variance(1))); vFactor.push_back(*gmm.Mean(1)); vFactor.push_back(sqrt(2 * *gmm.Variance(1))); } /* 计算平均数 */ template T Average(vector& vVal) { T sumVal = T(0); for (int i = 0; i < vVal.size(); ++i) { sumVal += vVal[i]; } return sumVal / vVal.size(); } /* 计算平方的均值 */ template T SquareAverage(vector& vVal) { vector vSquare(vVal.size()); for (int i = 0; i < vVal.size(); ++i) { vSquare[i] = vVal[i] * vVal[i]; } return Average(vSquare); } /* 计算向量x和y的相关距离, 向量维度必须相等*/ double CorrelationDistance(vector& vX, vector& vY) { vector vXY(vX.size()); for (int i = 0; i < vXY.size(); ++i) { vXY[i] = vX[i] * vY[i]; } double uv = Average(vXY); double uu = SquareAverage(vX); double vv = SquareAverage(vY); double dist = 1.0 - uv / sqrt(uu * vv); return abs(dist); } /* 处理一个知识颗粒 */ struct ThreadParam { fs::path matFilePath; fs::path outFilePath; }; void ThreadProcessData(const ThreadParam& param) { const fs::path& matFilePath = param.matFilePath; const fs::path& outFilePath = param.outFilePath; double* hs = nullptr; int rowNum = 0; int colNum = 0; hs = ReadMatlabMat(matFilePath.string(), "hs", &rowNum, &colNum); vectorvXBin; vectorvYBin; vectorvEY; vectorvFactor; /* 用来保存数据,存入mat文件 */ vectorvDist(rowNum); vectorvFactorAll; for (int i = 0; i < rowNum; ++i) { PutXtoBin(hs + i * colNum, colNum, 0.2, vXBin, vYBin); GMM gmm(1, 2); // 1维, 2个高斯模型 gmm.Train(vXBin.data(), vXBin.size()); GMMToFactorEY(gmm, 0.2, vYBin, vFactor, vEY); vDist[i] = CorrelationDistance(vYBin, vEY); vFactorAll.insert(vFactorAll.end(), vFactor.begin(), vFactor.end()); } /* 写入matlab文件 */ MATFile* pMatFile = matOpen(outFilePath.string().c_str(), "w"); SaveMatrix(vFactorAll.data(), pMatFile, "factor", rowNum, 6); SaveMatrix(vDist.data(), pMatFile, "correlation", rowNum, 1); matClose(pMatFile); delete[] hs; } /* 程序入口 */ int main(int argc, char** argv) { if (argc != 5) { cerr << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl; return 1; } string parrentDir(argv[1]); // 知识颗粒的父目录名称 string hsMatSuffix(argv[2]); // hs矩阵对应的mat文件的后缀名(可以是全文件名,可以是文件名后缀,必须保证唯一) fs::path outFileName(argv[3]); ThreadPool thPool(8); clock_t begin, finish; begin = clock(); /* 遍历所有的知识颗粒目录,逐一进行处理 */ for (auto& childDir : fs::directory_iterator(parrentDir)) { fs::path outFilePath = childDir / outFileName; for (auto& file : fs::directory_iterator(childDir)) { const string& fileName = file.path().filename().string(); auto rPos = fileName.rfind(hsMatSuffix); if (rPos != string::npos && fileName.size() - rPos == hsMatSuffix.size()) { ThreadParam tParam = { file, outFilePath }; thPool.enqueue(ThreadProcessData, tParam); } } } thPool.~ThreadPool(); finish = clock(); cout << "Total time:" << (double)(finish - begin) / CLOCKS_PER_SEC << endl; return 0; }