#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using std::cout; using std::endl; using namespace std; #define STRING_BUF_SIZE 204800 class ThreadPool { public: ThreadPool(size_t); template auto enqueue(F&& f, Args&&... args) ->std::future::type>; ~ThreadPool(); private: // need to keep track of threads so we can join them std::vector< std::thread > workers; // the task queue std::queue< std::function > tasks; // synchronization std::mutex queue_mutex; std::condition_variable condition; bool stop; }; // the constructor just launches some amount of workers inline ThreadPool::ThreadPool(size_t threads) : stop(false) { for (size_t i = 0;i < threads;++i) workers.emplace_back( [this] { for (;;) { std::function task; { std::unique_lock lock(this->queue_mutex); this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); }); if (this->stop && this->tasks.empty()) return; task = std::move(this->tasks.front()); this->tasks.pop(); } task(); } } ); } // add new work item to the pool template auto ThreadPool::enqueue(F&& f, Args&&... args) -> std::future::type> { using return_type = typename std::result_of::type; auto task = std::make_shared< std::packaged_task >( std::bind(std::forward(f), std::forward(args)...) ); std::future res = task->get_future(); { std::unique_lock lock(queue_mutex); // don't allow enqueueing after stopping the pool if (stop) throw std::runtime_error("enqueue on stopped ThreadPool"); tasks.emplace([task]() { (*task)(); }); } condition.notify_one(); return res; } // the destructor joins all threads inline ThreadPool::~ThreadPool() { { std::unique_lock lock(queue_mutex); stop = true; } condition.notify_all(); for (std::thread& worker : workers) worker.join(); } // 读取一维cell字符串并转换成大写 inline bool Read1DWord(const mxArray* pMxArray, vector& vStr) { mxArray* pCell = nullptr; int rowNum, colNum; char* strBuf = new char[STRING_BUF_SIZE]; rowNum = (int)mxGetM(pMxArray); colNum = (int)mxGetN(pMxArray); vStr.resize(rowNum * colNum); for (int i = 0; i < rowNum; ++i) { for (int j = 0; j < colNum; ++j) { pCell = mxGetCell(pMxArray, j * rowNum + i); if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) { cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl; return false; } vStr[i * colNum + j] = strBuf; auto& lastStr = vStr[i * colNum + j]; transform(lastStr.cbegin(), lastStr.cend(), lastStr.begin(), ::toupper); // 转成大写 } } delete[]strBuf; return true; } // 读取二维cell字符串并转换成大写 inline bool Read2DWord(const mxArray* pMxArray, vector>& vvStr) { mxArray* pCell = nullptr; int rowNum, colNum; char* strBuf = new char[STRING_BUF_SIZE]; rowNum = (int)mxGetM(pMxArray); colNum = (int)mxGetN(pMxArray); for (int i = 0; i < rowNum; ++i) { for (int j = 0; j < colNum; ++j) { pCell = mxGetCell(pMxArray, j * rowNum + i); int childRowNum = (int)mxGetM(pCell); int childColNum = (int)mxGetN(pCell); vvStr.push_back(vector()); Read1DWord(pCell, vvStr.back()); } } delete[]strBuf; return true; } // 从txt文件里读取字符串, 并转换成大写 inline void ReadWordFromFile(const string& filePath, vector>& vvStr) { filebuf fb; if (fb.open(filePath.c_str(), ios::in) == NULL) { cout << "FilePath error: " << filePath << endl; return; } istream ist(&fb); string lineInfo; while (getline(ist, lineInfo)) { int i = 0; vvStr.push_back(vector()); vector & vecStr = vvStr.back(); string tmp; while (i < lineInfo.length()) { while (i < lineInfo.length() && lineInfo[i] != ' ') { tmp += lineInfo[i++]; } if (!tmp.empty()) { transform(tmp.begin(), tmp.end(), tmp.begin(), ::toupper); vecStr.push_back(tmp); } tmp.clear(); ++i; } } fb.close(); } // 线程参数 struct TPRandSim { vector* pvZr; vector* pvRandPos; unordered_map* pumDicWordPos; vector>* pvvWd2; int wdSize; }; // 多线程入口函数 void ThreadRandSim(TPRandSim& param) { vector &vZr = *param.pvZr; vector &vRandPos = *param.pvRandPos; unordered_map &umDicWordPos = *param.pumDicWordPos; vector> &vvWd2 = *param.pvvWd2; int wdSize = param.wdSize; clock_t begin = clock(), finish; /* 随机模拟 */ std::random_device rd; std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd())); unordered_set usPos; for (int i = 0; i < wdSize; ++i) { // cout << i << '\t' << vRandPos[i] << '\t' << vvWd2.size() << endl; auto& vWd2 = vvWd2[vRandPos[i]]; usPos.clear(); for (auto& word : vWd2) { auto itr = umDicWordPos.find(word); if (itr != umDicWordPos.end()) { usPos.insert(itr->second); } } for (auto idx : usPos) { vZr[idx] += 1; } } finish = clock(); // cout << "Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl; } /* 入口函数 */ /* 三个参数,一个返回值 输入: 1. wd 文献摘要中的单词,二维cell 2. wd2 包含5w个摘要,每个摘要中包含多个单词,二维cell 3. dicr 由大写字母组成的字典,按字母序排序的,一维cell 4. numThread 5. numLoop 输出: vr 应该是相关性数值 */ //void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) { void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { if (nrhs < 1) { cout << "At least 3 arguments should be given for this function!" << endl; return; } clock_t begin = clock(), mid, finish; vector vDicr; vector> vvWd; vector> vvWd2; // 5w word Read2DWord(prhs[0], vvWd); // Read2DWord(prhs[1], vvWd2); char* strBuf = new char[STRING_BUF_SIZE]; mxGetString(prhs[1], strBuf, STRING_BUF_SIZE); string wd2FilePath(strBuf); delete[]strBuf; ReadWordFromFile(wd2FilePath, vvWd2); Read1DWord(prhs[2], vDicr); // char* strBuf = new char[STRING_BUF_SIZE]; // mxGetString(prhs[1], strBuf, STRING_BUF_SIZE); // string wd2FilePath(strBuf); // delete[]strBuf; // cout << wd2FilePath << endl; // vector> vvWd3; // ReadWordFromFile("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt", vvWd3); //ofstream ofs("d:\\diff.txt"); //ofs << vvWd2.size() << '\t' << vvWd3.size() << endl; //for (int i = 0; i < vvWd2.size(); ++i) { // if (vvWd2[i].size() != vvWd3[i].size()) // ofs << vvWd2[i].size() << '\t' << vvWd3[i].size() << endl; // //for (int j = 0; j < vvWd2[i].size(); ++j) { // // if (vvWd2[i][j] != vvWd3[i][j]) { // // ofs << i+1 << '\t' << j+1 << '\t' << vvWd2[i][j] << '\t' << vvWd3[i][j] << endl; // // } // //} //} //ofs.close(); int numThread = 1; int loopNum = 1000; if (nrhs > 3) { double* pNumThread = (double*)mxGetData(prhs[3]); numThread = (int)pNumThread[0]; if (numThread < 1) numThread = 1; } if (nrhs > 4) { double* pLoopNum = (double*)mxGetData(prhs[4]); loopNum = (int)pLoopNum[0]; if (loopNum < 1000) loopNum = 1000; } /* 统计dicr字典中,每个单词在wd中出现的次数 */ mid = clock(); unordered_map umWordPos; for (int i = 0; i < vDicr.size(); ++i) umWordPos[vDicr[i]] = i; // 记录单词位置 vector vZs(vDicr.size()); unordered_set usPos; // 多次出现在wd中的单词,只统计一次,这是原matlab代码的功能,是否需要修改? for (auto & vWd : vvWd) { usPos.clear(); for (auto & word : vWd) { auto itr = umWordPos.find(word); if (itr != umWordPos.end()) { usPos.insert(itr->second); } } for (auto idx : usPos) { vZs[idx] += 1; } } finish = clock(); cout << "Calc word occurrence time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl; /* 进行随机模拟 */ mid = clock(); vector> vvZr(loopNum, vector(vDicr.size(), 0)); // 输出结果 vector> vvRandPos(numThread, vector(vvWd2.size())); for (int i = 0; i < vvWd2.size(); ++i) { for (auto& vRandPos : vvRandPos) { vRandPos[i] = i; } } ThreadPool thPool(numThread); int tid = 0; for (int i = 0; i < loopNum; ++i) { TPRandSim tParam = { &vvZr[i], &vvRandPos[tid++ % numThread], &umWordPos, &vvWd2, vvWd.size()}; thPool.enqueue(ThreadRandSim, tParam); //ThreadRandSim(tParam); } thPool.~ThreadPool(); finish = clock(); cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl; /* 计算vr */ vector vVr(vDicr.size()); // 按列计算平均值 vector vMean(vDicr.size()); vector vStd(vDicr.size()); for (int i = 0; i < vvZr.size(); ++i) { for (int j = 0; j < vvZr[i].size(); ++j) { vMean[j] += vvZr[i][j]; } } for (auto& val : vMean) { val /= loopNum; } // 均值 for (int i = 0; i < vvZr.size(); ++i) { for (int j = 0; j < vvZr[i].size(); ++j) { const double diff = vvZr[i][j] - vMean[j]; vStd[j] += diff * diff; } } for (auto& val : vStd) { val = sqrt(val / (loopNum - 1)); } // 均方根 // 计算vr for (int i = 0; i < vVr.size(); ++i) { vVr[i] = (vZs[i] - vMean[i]) / vStd[i]; } // ofstream ofs("d:\\result.txt"); // int i = 0; // for (auto& vr : vVr) { // ofs << vr << endl; // } // ofs.close(); /* 写入结果 */ if (nlhs > 0) { mxArray* pWriteArray = NULL;//matlab格式矩阵 //创建一个rowNum*colNum的矩阵 pWriteArray = mxCreateDoubleMatrix(1, vVr.size(), mxREAL); //把data的值赋给pWriteArray指针 memcpy((void*)(mxGetPr(pWriteArray)), (void*)vVr.data(), sizeof(double) * vVr.size()); plhs[0] = pWriteArray; // 赋值给返回值 } finish = clock(); cout << "Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl; }