实现了随机模拟的mex函数

This commit is contained in:
zzh 2023-10-07 04:21:54 +08:00
parent 1b63656908
commit c9393a54e3
7 changed files with 533 additions and 15 deletions

View File

@ -28,7 +28,7 @@ bool ReadChildString2D(const string& filePath, const string& parentName, const s
mxArray* pMxArray = nullptr;
mxArray* pCell = nullptr;
int rowNum, colNum;
char strBuf[STRING_BUF_SIZE];
char *strBuf = new char[STRING_BUF_SIZE];
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) {
@ -63,6 +63,7 @@ bool ReadChildString2D(const string& filePath, const string& parentName, const s
}
}
mxDestroyArray(pMxArray);
delete[]strBuf;
return true;
}

View File

@ -119,8 +119,8 @@
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="CorrelationDist.cpp" />
<ClCompile Include="main.cpp" />
<ClCompile Include="RandSim.cpp" />
</ItemGroup>
<ItemGroup>
<None Include="packages.config" />

View File

@ -18,7 +18,7 @@
<ClCompile Include="main.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="CorrelationDist.cpp">
<ClCompile Include="RandSim.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>

381
MexFunc/RandSim.cpp 100644
View File

@ -0,0 +1,381 @@
#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <string>
#include <unordered_set>
#include <ctime>
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <stdexcept>
#include <unordered_map>
#include <set>
#include <fstream>
#include <algorithm>
#include <random>
#include <cmath>
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
class ThreadPool {
public:
ThreadPool(size_t);
template<class F, class... Args>
auto enqueue(F&& f, Args&&... args)
->std::future<typename std::result_of<F(Args...)>::type>;
~ThreadPool();
private:
// need to keep track of threads so we can join them
std::vector< std::thread > workers;
// the task queue
std::queue< std::function<void()> > tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
// the constructor just launches some amount of workers
inline ThreadPool::ThreadPool(size_t threads)
: stop(false)
{
for (size_t i = 0;i < threads;++i)
workers.emplace_back(
[this]
{
for (;;)
{
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(lock,
[this] { return this->stop || !this->tasks.empty(); });
if (this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
}
);
}
// add new work item to the pool
template<class F, class... Args>
auto ThreadPool::enqueue(F&& f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>
{
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared< std::packaged_task<return_type()> >(
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
);
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
if (stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
// the destructor joins all threads
inline ThreadPool::~ThreadPool()
{
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread& worker : workers)
worker.join();
}
// 读取一维cell字符串并转换成大写
inline bool Read1DWord(const mxArray* pMxArray, vector<string>& vStr) {
mxArray* pCell = nullptr;
int rowNum, colNum;
char* strBuf = new char[STRING_BUF_SIZE];
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
vStr.resize(rowNum * colNum);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i);
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
return false;
}
vStr[i * colNum + j] = strBuf;
auto& lastStr = vStr[i * colNum + j];
transform(lastStr.cbegin(), lastStr.cend(), lastStr.begin(), ::toupper); // 转成大写
}
}
delete[]strBuf;
return true;
}
// 读取二维cell字符串并转换成大写
inline bool Read2DWord(const mxArray* pMxArray, vector<vector<string>>& vvStr) {
mxArray* pCell = nullptr;
int rowNum, colNum;
char* strBuf = new char[STRING_BUF_SIZE];
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i);
int childRowNum = (int)mxGetM(pCell);
int childColNum = (int)mxGetN(pCell);
vvStr.push_back(vector<string>());
Read1DWord(pCell, vvStr.back());
}
}
delete[]strBuf;
return true;
}
// 从txt文件里读取字符串, 并转换成大写
inline void ReadWordFromFile(const string& filePath, vector<vector<string>>& vvStr) {
filebuf fb;
if (fb.open(filePath.c_str(), ios::in) == NULL) {
cout << "FilePath error: " << filePath << endl;
return;
}
istream ist(&fb);
string lineInfo;
while (getline(ist, lineInfo)) {
int i = 0;
vvStr.push_back(vector<string>());
vector<string> & vecStr = vvStr.back();
string tmp;
while (i < lineInfo.length()) {
while (i < lineInfo.length() && lineInfo[i] != ' ') {
tmp += lineInfo[i++];
}
if (!tmp.empty()) {
transform(tmp.begin(), tmp.end(), tmp.begin(), ::toupper);
vecStr.push_back(tmp);
}
tmp.clear();
++i;
}
}
fb.close();
}
// 线程参数
struct TPRandSim {
vector<int>* pvZr;
vector<int>* pvRandPos;
unordered_map<string, int>* pumDicWordPos;
vector<vector<string>>* pvvWd2;
int wdSize;
};
// 多线程入口函数
void ThreadRandSim(TPRandSim& param) {
vector<int> &vZr = *param.pvZr;
vector<int> &vRandPos = *param.pvRandPos;
unordered_map<string, int> &umDicWordPos = *param.pumDicWordPos;
vector<vector<string>> &vvWd2 = *param.pvvWd2;
int wdSize = param.wdSize;
clock_t begin = clock(), finish;
/* 随机模拟 */
std::random_device rd;
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
unordered_set<int> usPos;
for (int i = 0; i < wdSize; ++i) {
// cout << i << '\t' << vRandPos[i] << '\t' << vvWd2.size() << endl;
auto& vWd2 = vvWd2[vRandPos[i]];
usPos.clear();
for (auto& word : vWd2) {
auto itr = umDicWordPos.find(word);
if (itr != umDicWordPos.end()) {
usPos.insert(itr->second);
}
}
for (auto idx : usPos) {
vZr[idx] += 1;
}
}
finish = clock();
// cout << "Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}
/* 入口函数 */
/*
1. wd cell
2. wd2 5wcell
3. dicr cell
4. numThread
5. numLoop
vr
*/
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
//void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
if (nrhs < 1) {
cout << "At least 3 arguments should be given for this function!" << endl;
return;
}
clock_t begin = clock(), mid, finish;
vector<string> vDicr;
vector<vector<string>> vvWd;
vector<vector<string>> vvWd2; // 5w word
Read2DWord(prhs[0], vvWd);
// Read2DWord(prhs[1], vvWd2);
char* strBuf = new char[STRING_BUF_SIZE];
mxGetString(prhs[1], strBuf, STRING_BUF_SIZE);
string wd2FilePath(strBuf);
delete[]strBuf;
ReadWordFromFile(wd2FilePath, vvWd2);
Read1DWord(prhs[2], vDicr);
// char* strBuf = new char[STRING_BUF_SIZE];
// mxGetString(prhs[1], strBuf, STRING_BUF_SIZE);
// string wd2FilePath(strBuf);
// delete[]strBuf;
// cout << wd2FilePath << endl;
// vector<vector<string>> vvWd3;
// ReadWordFromFile("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt", vvWd3);
//ofstream ofs("d:\\diff.txt");
//ofs << vvWd2.size() << '\t' << vvWd3.size() << endl;
//for (int i = 0; i < vvWd2.size(); ++i) {
// if (vvWd2[i].size() != vvWd3[i].size())
// ofs << vvWd2[i].size() << '\t' << vvWd3[i].size() << endl;
// //for (int j = 0; j < vvWd2[i].size(); ++j) {
// // if (vvWd2[i][j] != vvWd3[i][j]) {
// // ofs << i+1 << '\t' << j+1 << '\t' << vvWd2[i][j] << '\t' << vvWd3[i][j] << endl;
// // }
// //}
//}
//ofs.close();
int numThread = 1;
int loopNum = 1000;
if (nrhs > 3) {
double* pNumThread = (double*)mxGetData(prhs[3]);
numThread = (int)pNumThread[0];
if (numThread < 1) numThread = 1;
}
if (nrhs > 4) {
double* pLoopNum = (double*)mxGetData(prhs[4]);
loopNum = (int)pLoopNum[0];
if (loopNum < 1000) loopNum = 1000;
}
/* 统计dicr字典中每个单词在wd中出现的次数 */
mid = clock();
unordered_map<string, int> umWordPos;
for (int i = 0; i < vDicr.size(); ++i) umWordPos[vDicr[i]] = i; // 记录单词位置
vector<int> vZs(vDicr.size());
unordered_set<int> usPos; // 多次出现在wd中的单词只统计一次这是原matlab代码的功能是否需要修改
for (auto & vWd : vvWd) {
usPos.clear();
for (auto & word : vWd) {
auto itr = umWordPos.find(word);
if (itr != umWordPos.end()) {
usPos.insert(itr->second);
}
}
for (auto idx : usPos) {
vZs[idx] += 1;
}
}
finish = clock();
cout << "Calc word occurrence time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
/* 进行随机模拟 */
mid = clock();
vector<vector<int>> vvZr(loopNum, vector<int>(vDicr.size(), 0)); // 输出结果
vector<vector<int>> vvRandPos(numThread, vector<int>(vvWd2.size()));
for (int i = 0; i < vvWd2.size(); ++i) {
for (auto& vRandPos : vvRandPos) {
vRandPos[i] = i;
}
}
//ThreadPool thPool(numThread);
int tid = 0;
for (int i = 0; i < loopNum; ++i) {
TPRandSim tParam = { &vvZr[i], &vvRandPos[tid++ % numThread], &umWordPos, &vvWd2, vvWd.size()};
//thPool.enqueue(ThreadRandSim, tParam);
ThreadRandSim(tParam);
}
//thPool.~ThreadPool();
finish = clock();
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
/* 计算vr */
vector<double> vVr(vDicr.size());
// 按列计算平均值
vector<double> vMean(vDicr.size());
vector<double> vStd(vDicr.size());
for (int i = 0; i < vvZr.size(); ++i) {
for (int j = 0; j < vvZr[i].size(); ++j) {
vMean[j] += vvZr[i][j];
}
}
for (auto& val : vMean) { val /= loopNum; } // 均值
for (int i = 0; i < vvZr.size(); ++i) {
for (int j = 0; j < vvZr[i].size(); ++j) {
const double diff = vvZr[i][j] - vMean[j];
vStd[j] += diff * diff;
}
}
for (auto& val : vStd) { val = sqrt(val / (loopNum - 1)); } // 均方根
// 计算vr
for (int i = 0; i < vVr.size(); ++i) {
vVr[i] = (vZs[i] - vMean[i]) / vStd[i];
}
// ofstream ofs("d:\\result.txt");
// int i = 0;
// for (auto& vr : vVr) {
// ofs << vr << endl;
// }
// ofs.close();
/* 写入结果 */
if (nlhs > 0) {
mxArray* pWriteArray = NULL;//matlab格式矩阵
//创建一个rowNum*colNum的矩阵
pWriteArray = mxCreateDoubleMatrix(1, vVr.size(), mxREAL);
//把data的值赋给pWriteArray指针
memcpy((void*)(mxGetPr(pWriteArray)), (void*)vVr.data(), sizeof(double) * vVr.size());
plhs[0] = pWriteArray; // 赋值给返回值
}
finish = clock();
cout << "Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}

View File

@ -0,0 +1,109 @@
#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <unordered_set>
#include <ctime>
#include <immintrin.h>
#include <zmmintrin.h>
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <stdexcept>
#include <unordered_set>
#include <set>
#include <fstream>
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
// 读取字符串并转换成大写, 插入set
bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
mxArray* pCell = nullptr;
int rowNum, colNum;
char* strBuf = new char[STRING_BUF_SIZE];
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i);
int childRowNum = (int)mxGetM(pCell);
int childColNum = (int)mxGetN(pCell);
for (int ii = 0; ii < childRowNum; ii++) {
for (int jj = 0; jj < childColNum; jj++) {
mxArray* pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
return false;
}
string str(strBuf);
transform(str.cbegin(), str.cend(), str.begin(), ::toupper); // 转成大写
sWord.insert(str);
}
}
}
}
delete[]strBuf;
return true;
}
/* 入口函数 */
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
if (nrhs < 1) {
cout << "At least 1 arguments should be given for this function!" << endl;
return;
}
clock_t begin = clock(), finish;
//set<string> sOrderedWord;
unordered_set<string> usStr;
ReadInsertWord(prhs[0], usStr);
usStr.insert("A");
usStr.insert("Z");
///* 排序 */
set<string> sOrderedWord;
for (auto& word : usStr) {
sOrderedWord.insert(word);
}
//ofstream ofs("d:\\wd_dict.txt");
//for (auto& word : sOrderedWord) ofs << word << endl;
//ofs.close();
/* 写入结果 */
if (nlhs > 0) {
int wordSize = 0;
for (auto& word : sOrderedWord) {
if (word[0] >= 'A' && word[0] <= 'Z') {
wordSize++;
}
}
mxArray* pCell = mxCreateCellMatrix(1, wordSize);
int i = 0;
for (auto& word : sOrderedWord) {
if (word[0] >= 'A' && word[0] <= 'Z') {
mxArray* mxStr = mxCreateString(word.c_str());
mxSetCell(pCell, i++, mxStr);
//ofs << word << endl;
}
}
plhs[0] = pCell; // 赋值给返回值
}
//ofs.close();
finish = clock();
cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}

View File

@ -8,27 +8,40 @@ using namespace std;
int main(int argc, const char** argv)
{
string matFile = "D:\\x_large.mat";
//string matFile = "D:\\x_large.mat";
//string matFile = "D:\\x.mat";
//string matFile = "D:\\Twirls\\wd_small.mat";
//string matFile = "D:\\Twirls\\wd.mat";
clock_t begin = clock(), finish;
string wd2Mat = "D:\\wd2_5w.mat";
string dicrMat = "D:\\dicr.mat";
string wdMat = "D:\\wd.mat";
MATFile* pMatFile = nullptr;
mxArray* prhs[1];
int rowNum, colNum;
double* matData;
pMatFile = matOpen(matFile.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) {
cout << "filePath is error!" << endl;
return 1;
}
prhs[0] = matGetVariable(pMatFile, "x"); //获取.mat文件里面名为matrixName的矩阵
//string dicrMat = "D:\\dicr_large.mat";
//string wdMat = "D:\\wd_large.mat";
MATFile* pwdMat, *pwd2Mat, *pdicrMat;
mxArray* prhs[4];
pwdMat = matOpen(wdMat.c_str(), "r");
pwd2Mat = matOpen(wd2Mat.c_str(), "r");
pdicrMat = matOpen(dicrMat.c_str(), "r");
prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
prhs[1] = matGetVariable(pwd2Mat, "wd2");
// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
prhs[2] = matGetVariable(pdicrMat, "dicr");
mxArray* plhs = (mxArray*)malloc(sizeof(mxArray*));
mxArray** arg = &prhs[0];
mexFunction(1, &plhs, 1, arg);
mexFunction(1, &plhs, 3, arg);
//mexFunction(0, 0, 0, 0);
finish = clock();
cout << "mexFunction Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
return 0;
}

View File

@ -75,16 +75,24 @@
<ClCompile>
<Optimization>Disabled</Optimization>
<PreprocessorDefinitions>_DEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">D:\matlab2023a\extern\include;$(SolutionDir)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">CommonLib.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Platform)'=='Win32'">
<ClCompile>
<PreprocessorDefinitions>WIN32;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">D:\matlab2023a\extern\include;$(SolutionDir)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">CommonLib.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)'=='Release'">
<ClCompile>
@ -92,12 +100,18 @@
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">D:\matlab2023a\extern\include;$(SolutionDir)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">D:\matlab2023a\extern\include;$(SolutionDir)</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateWindowsMetadata>false</GenerateWindowsMetadata>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">CommonLib.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(Configuration)|$(Platform)'=='Release|x64'">CommonLib.lib;kernel32.lib;user32.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">$(OutDir)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)</AdditionalLibraryDirectories>
</Link>
</ItemDefinitionGroup>
<ItemGroup>