多线程处理给定的文件夹下的所有子目录,进行高斯混合模型拟合

This commit is contained in:
zzh 2023-09-18 01:35:00 +08:00
parent fb5fe032a4
commit f9e3d038c1
2 changed files with 80 additions and 8 deletions

BIN
GMM/gmm_result.mat 100644

Binary file not shown.

View File

@ -10,6 +10,8 @@
#include <string>
#include <vector>
#include <queue>
#include <filesystem>
#include <thread>
#ifdef _WIN32
#include <io.h>
#include <process.h>
@ -22,6 +24,7 @@
using namespace std;
using std::cout;
using std::vector;
namespace fs = std::filesystem;
/* 从mat文件中读取给定名称的矩阵数据并获取矩阵的行列数值 */
template<typename T>
@ -39,7 +42,7 @@ T* ReadMatlabMat(const string &filePath, const string &mtxName, int *pRowNum, in
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
rowNum = mxGetM(pMxArray);
colNum = mxGetN(pMxArray);
cout << rowNum << " " << colNum << endl;
// cout << rowNum << " " << colNum << endl;
matData = (double*)mxGetData(pMxArray); //获取指针
dst = new T[rowNum * colNum];
@ -145,10 +148,10 @@ void GMMToFactorEY(GMM& gmm, double binWidth, vector<double> &vYBin, vector<doub
for (int i = 0; i < topM; ++i) {
pair<double, double> topEle = pqTopM.top();
pqTopM.pop();
cout << topEle.first << '\t' << topEle.second << endl;
// cout << topEle.first << '\t' << topEle.second << endl;
zoomFactorSum += topEle.first / topEle.second;
}
cout << endl;
// cout << endl;
double zoomFactor = zoomFactorSum / topM;
@ -311,15 +314,84 @@ void processTxtData(const string& filePath) {
ifs.close();
}
/* 处理一个知识颗粒 */
struct ThreadParam {
fs::path matFilePath;
fs::path outFilePath;
};
void ThreadProcessData(const ThreadParam& param) {
const fs::path& matFilePath = param.matFilePath;
const fs::path& outFilePath = param.outFilePath;
// cout << parrentPath.string() << '\t' << matFilePath.filename().string() << endl;
cout << outFilePath.string() << endl;
double* hs = nullptr;
int rowNum = 0;
int colNum = 0;
hs = ReadMatlabMat<double>(matFilePath.string(), "hs", &rowNum, &colNum);
vector<double>vXBin;
vector<double>vYBin;
vector<double>vEY;
vector<double>vFactor;
/* 用来保存数据存入mat文件 */
vector<double>vDist(rowNum);
vector<double>vFactorAll;
for (int i = 0; i < rowNum; ++i) {
PutXtoBin(hs + i * colNum, colNum, 0.2, vXBin, vYBin);
GMM gmm(1, 2); // 1维 2个高斯模型
gmm.Train(vXBin.data(), vXBin.size());
GMMToFactorEY(gmm, 0.2, vYBin, vFactor, vEY);
vDist[i] = CorrelationDistance(vYBin, vEY);
vFactorAll.insert(vFactorAll.end(), vFactor.begin(), vFactor.end());
}
/* 写入matlab文件 */
MATFile* pMatFile = matOpen(outFilePath.string().c_str(), "w");
SaveMatrix<double>(vFactorAll.data(), pMatFile, "factor", rowNum, 6);
SaveMatrix<double>(vDist.data(), pMatFile, "correlation", rowNum, 1);
matClose(pMatFile);
delete[] hs;
}
int main(int argc, char** argv) {
//if (argc != 2)
// cerr << "This program should take one argument(input data file)!" << endl;
if (argc != 4) {
cerr << "This program should take 3 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename)!" << endl;
return 1;
}
string parrentDir(argv[1]); // 知识颗粒的父目录名称
string hsMatSuffix(argv[2]); // hs矩阵对应的mat文件的后缀名可以是全文件名可以是文件名后缀必须保证唯一
fs::path outFileName(argv[3]);
vector<thread> vThread;
clock_t begin, finish;
begin = clock();
/* 遍历所有的知识颗粒目录,注意进行处理 */
for (auto& childDir : fs::directory_iterator(parrentDir)) {
// cout << childDir.path().string() << endl;
fs::path outFilePath = childDir / outFileName;
for (auto& file : fs::directory_iterator(childDir)) {
// cout << file.path().filename().string() << endl;
const string& fileName = file.path().filename().string();
auto rPos = fileName.rfind(hsMatSuffix);
if (rPos != string::npos && fileName.size() - rPos == hsMatSuffix.size()) {
ThreadParam tParam = { file, outFilePath };
vThread.push_back(thread(ThreadProcessData, tParam));
// ThreadProcessData(tParam);
}
}
}
for (auto& thread : vThread) {
thread.join();
}
finish = clock();
cout << "Total time:" << (double)(finish - begin) / CLOCKS_PER_SEC << endl;
// processMatData(argv[1]);
processMatData("D:\\Twirls\\runtime\\ALS_test\\1775\\twirls_id_abs2class_hs.mat");
processTxtData("D:\\Twirls\\backup\\xy.txt");
// processMatData("D:\\Twirls\\runtime\\ALS_test\\1775\\twirls_id_abs2class_hs.mat");
// processTxtData("D:\\Twirls\\backup\\xy.txt");
return 0;
}