#include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include using std::cout; using std::endl; using namespace std; #define STRING_BUF_SIZE 204800 class ThreadPool { public: ThreadPool(size_t); template auto enqueue(F&& f, Args&&... args) ->std::future::type>; ~ThreadPool(); private: // need to keep track of threads so we can join them std::vector< std::thread > workers; // the task queue std::queue< std::function > tasks; // synchronization std::mutex queue_mutex; std::condition_variable condition; bool stop; }; // the constructor just launches some amount of workers inline ThreadPool::ThreadPool(size_t threads) : stop(false) { for (size_t i = 0;i < threads;++i) workers.emplace_back( [this] { for (;;) { std::function task; { std::unique_lock lock(this->queue_mutex); this->condition.wait(lock, [this] { return this->stop || !this->tasks.empty(); }); if (this->stop && this->tasks.empty()) return; task = std::move(this->tasks.front()); this->tasks.pop(); } task(); } } ); } // add new work item to the pool template auto ThreadPool::enqueue(F&& f, Args&&... args) -> std::future::type> { using return_type = typename std::result_of::type; auto task = std::make_shared< std::packaged_task >( std::bind(std::forward(f), std::forward(args)...) ); std::future res = task->get_future(); { std::unique_lock lock(queue_mutex); // don't allow enqueueing after stopping the pool if (stop) throw std::runtime_error("enqueue on stopped ThreadPool"); tasks.emplace([task]() { (*task)(); }); } condition.notify_one(); return res; } // the destructor joins all threads inline ThreadPool::~ThreadPool() { { std::unique_lock lock(queue_mutex); stop = true; } condition.notify_all(); for (std::thread& worker : workers) worker.join(); } /* 读取一维double数据 */ void Read1DDouble(const mxArray* pMxArray, vector& vDat) { int rowNum, colNum; double* matData; rowNum = (int)mxGetM(pMxArray); colNum = (int)mxGetN(pMxArray); // cout << rowNum << " " << colNum << endl; matData = (double*)mxGetData(pMxArray); //获取指针 vDat.resize(rowNum * colNum); for (int i = 0; i < vDat.size(); ++i) vDat[i] = matData[i]; } /* 读取二维double数据 */ void Read2DDouble(const mxArray* pMxArray, vector>& vvDat) { int rowNum, colNum; double* matData; rowNum = (int)mxGetM(pMxArray); colNum = (int)mxGetN(pMxArray); vvDat.resize(rowNum); matData = (double*)mxGetData(pMxArray); //获取指针 for (int i = 0; i < rowNum; ++i) { vvDat[i].resize(colNum); for (int j = 0; j < colNum; ++j) { vvDat[i][j] = matData[j * rowNum + i]; } } } // 线程参数 struct TPRandSim { vector* pvTr; vector* pvRandPos; vector* pvH; vector>* pvvX; int numPositive; }; // 多线程入口函数 void ThreadRandSim(TPRandSim& param) { vector& vTr = *param.pvTr; vector& vRandPos = *param.pvRandPos; vector>& vvX = *param.pvvX; vector& vH = *param.pvH; int numPositive = param.numPositive; int rowNum = vvX.size(); int colNum = vvX[0].size(); clock_t begin = clock(), finish; /* 随机模拟 */ std::random_device rd; std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd())); for (int i = 0; i < rowNum; ++i) { int hRowIdx = vRandPos[i]; // 随机打乱之后的行索引 if (vH[hRowIdx] == 1) { for (int j = 0; j < colNum; ++j) { vTr[j] += vvX[i][j]; } } } for (auto& val : vTr) val /= numPositive; finish = clock(); // cout << "Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl; } /* 入口函数 */ /* 三个参数,一个返回值 输入: 1. x 二维数据,double类型,行数为文献数量,列数为字典长度(每个单词在所有文献中出现的次数超过5) 2. h 长度为文献个数,值为1代表该文献属于该知识颗粒(应该是),为0则不属于 3. numThread 输出: vs z score,显著性指数,一维 ps 与vs长度一致 */ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) { //void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) { if (nrhs < 2) { cout << "At least 2 arguments should be given for this function!" << endl; return; } clock_t begin = clock(), mid, finish; vector vH; vector> vvX; Read2DDouble(prhs[0], vvX); Read1DDouble(prhs[1], vH); int rowNum = vvX.size(); int colNum = vvX[0].size(); cout << vH.size() << '\t' << vvX.size() << endl; int numThread = 1; int loopNum = 1000; if (nrhs > 2) { double* pNumThread = (double*)mxGetData(prhs[2]); numThread = (int)pNumThread[0]; if (numThread < 1) numThread = 1; } if (nrhs > 3) { double* pLoopNum = (double*)mxGetData(prhs[3]); loopNum = (int)pLoopNum[0]; if (loopNum < 1000) loopNum = 1000; } /* 进行随机模拟 */ mid = clock(); vector vTs(colNum); // 初始数据,记录vH中label为1的行的行均值 int numPositive = 0; for (int i = 0; i < rowNum; ++i) { if (vH[i] == 1) { ++numPositive; for (int j = 0; j < colNum; ++j) { vTs[j] += vvX[i][j]; } } } for (auto& val : vTs) val /= numPositive; vector> vvTr(loopNum, vector(colNum, 0)); // 模拟结果 vector> vvRandPos(numThread, vector(rowNum)); for (int i = 0; i < rowNum; ++i) { for (auto& vRandPos : vvRandPos) { vRandPos[i] = i; } } ThreadPool thPool(numThread); int tid = 0; for (int i = 0; i < loopNum; ++i) { TPRandSim tParam = { &vvTr[i], &vvRandPos[tid++ % numThread], &vH, &vvX, numPositive }; thPool.enqueue(ThreadRandSim, tParam); //ThreadRandSim(tParam); } thPool.~ThreadPool(); finish = clock(); cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl; /* 计算结果 */ vector vVs(colNum); vector vPs(colNum); // 按列计算平均值 vector vMean(colNum); vector vStd(colNum); for (int i = 0; i < vvTr.size(); ++i) { for (int j = 0; j < vvTr[i].size(); ++j) { vMean[j] += vvTr[i][j]; } } for (auto& val : vMean) { val /= loopNum; } // 均值 for (int i = 0; i < vvTr.size(); ++i) { for (int j = 0; j < vvTr[i].size(); ++j) { const double diff = vvTr[i][j] - vMean[j]; vStd[j] += diff * diff; } } for (auto& val : vStd) { val = sqrt(val / (loopNum - 1)); } // 均方根 // 计算vs for (int i = 0; i < vVs.size(); ++i) { vVs[i] = (vTs[i] - vMean[i]) / vStd[i]; } // 计算ps vector vSumGreater(colNum); vector vSumLess(colNum); for (int i = 0; i < loopNum; ++i) { for (int j = 0; j < colNum; ++j) { if (vvTr[i][j] >= vTs[j]) vSumGreater[j] ++; if (vvTr[i][j] <= vTs[j]) vSumLess[j] ++; } } for (auto& val : vSumGreater) val /= loopNum; for (auto& val : vSumLess) val /= loopNum; for (int i = 0; i < colNum; ++i) { vPs[i] = min(vSumGreater[i], vSumLess[i]); } ofstream ofs("d:\\result.txt"); for (int i = 0; i < colNum; ++i) { ofs << vVs[i] << '\t' << vPs[i] << endl; } ofs.close(); /* 写入结果 */ if (nlhs > 0) { // vs mxArray* pWriteArray = NULL;//matlab格式矩阵 //创建一个rowNum*colNum的矩阵 pWriteArray = mxCreateDoubleMatrix(1, vVs.size(), mxREAL); //把data的值赋给pWriteArray指针 memcpy((void*)(mxGetPr(pWriteArray)), (void*)vVs.data(), sizeof(double) * vVs.size()); plhs[0] = pWriteArray; // 赋值给返回值 } if (nlhs > 1) { // ps mxArray* pWriteArray = NULL;//matlab格式矩阵 //创建一个rowNum*colNum的矩阵 pWriteArray = mxCreateDoubleMatrix(1, vPs.size(), mxREAL); //把data的值赋给pWriteArray指针 memcpy((void*)(mxGetPr(pWriteArray)), (void*)vPs.data(), sizeof(double)* vPs.size()); plhs[1] = pWriteArray; // 赋值给返回值 } finish = clock(); cout << "Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl; }