完成了计算信息熵和平均信息熵(第三个计算信息熵的函数)
This commit is contained in:
parent
47b0cc187b
commit
ca3f99cc98
|
|
@ -0,0 +1,415 @@
|
||||||
|
#include <mex.h>
|
||||||
|
#include <mat.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <ctime>
|
||||||
|
#include <vector>
|
||||||
|
#include <queue>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <future>
|
||||||
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <random>
|
||||||
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
|
using std::cout;
|
||||||
|
using std::endl;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define STRING_BUF_SIZE 204800
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
ThreadPool(size_t);
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto enqueue(F&& f, Args&&... args)
|
||||||
|
->std::future<typename std::result_of<F(Args...)>::type>;
|
||||||
|
~ThreadPool();
|
||||||
|
private:
|
||||||
|
// need to keep track of threads so we can join them
|
||||||
|
std::vector< std::thread > workers;
|
||||||
|
// the task queue
|
||||||
|
std::queue< std::function<void()> > tasks;
|
||||||
|
|
||||||
|
// synchronization
|
||||||
|
std::mutex queue_mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
bool stop;
|
||||||
|
};
|
||||||
|
|
||||||
|
// the constructor just launches some amount of workers
|
||||||
|
inline ThreadPool::ThreadPool(size_t threads)
|
||||||
|
: stop(false)
|
||||||
|
{
|
||||||
|
for (size_t i = 0;i < threads;++i)
|
||||||
|
workers.emplace_back(
|
||||||
|
[this]
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
std::function<void()> task;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||||
|
this->condition.wait(lock,
|
||||||
|
[this] { return this->stop || !this->tasks.empty(); });
|
||||||
|
if (this->stop && this->tasks.empty())
|
||||||
|
return;
|
||||||
|
task = std::move(this->tasks.front());
|
||||||
|
this->tasks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add new work item to the pool
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto ThreadPool::enqueue(F&& f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of<F(Args...)>::type>
|
||||||
|
{
|
||||||
|
using return_type = typename std::result_of<F(Args...)>::type;
|
||||||
|
|
||||||
|
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
||||||
|
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
||||||
|
);
|
||||||
|
|
||||||
|
std::future<return_type> res = task->get_future();
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
|
||||||
|
// don't allow enqueueing after stopping the pool
|
||||||
|
if (stop)
|
||||||
|
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||||
|
|
||||||
|
tasks.emplace([task]() { (*task)(); });
|
||||||
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the destructor joins all threads
|
||||||
|
inline ThreadPool::~ThreadPool()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
for (std::thread& worker : workers)
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 读取一维double数据 */
|
||||||
|
void Read1DDouble(const mxArray* pMxArray, vector<double>& vDat) {
|
||||||
|
int rowNum, colNum;
|
||||||
|
double* matData;
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
// cout << rowNum << " " << colNum << endl;
|
||||||
|
matData = (double*)mxGetData(pMxArray); //获取指针
|
||||||
|
vDat.resize(rowNum * colNum);
|
||||||
|
for (int i = 0; i < vDat.size(); ++i) vDat[i] = matData[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 读取二维double数据 */
|
||||||
|
void Read2DDouble(const mxArray* pMxArray, vector<vector<double>>& vvDat) {
|
||||||
|
int rowNum, colNum;
|
||||||
|
double* matData;
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
vvDat.resize(rowNum);
|
||||||
|
matData = (double*)mxGetData(pMxArray); //获取指针
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
vvDat[i].resize(colNum);
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vvDat[i][j] = matData[j * rowNum + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
|
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
int len = rowNum * colNum;
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
|
||||||
|
return pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
|
||||||
|
#define START_IDX(tid, threadNum, arrLen) (arrLen) * (tid) / (threadNum)
|
||||||
|
#define END_IDX(tid, threadNum, arrLen) (arrLen) * (tid + 1) / (threadNum)
|
||||||
|
|
||||||
|
// 线程参数
|
||||||
|
struct TPRandSim {
|
||||||
|
vector<vector<double>>* pvvSum;
|
||||||
|
vector<vector<double>>* pvvSqSum;
|
||||||
|
vector<vector<int>>* pvvRandPos;
|
||||||
|
vector<double>* pvIx;
|
||||||
|
vector<int>* pvCateNum;
|
||||||
|
vector<vector<double>>* pvvRound;
|
||||||
|
vector<vector<double>>* pvvX;
|
||||||
|
int loopNum;
|
||||||
|
int maxCategoryNum;
|
||||||
|
int tid;
|
||||||
|
int numThread;
|
||||||
|
};
|
||||||
|
|
||||||
|
// 多线程入口函数
|
||||||
|
void ThreadRandSim(TPRandSim param) {
|
||||||
|
vector<vector<double>>& vvSum = *param.pvvSum;
|
||||||
|
vector<vector<double>>& vvSqSum = *param.pvvSqSum;
|
||||||
|
vector<vector<int>>& vvRandPos = *param.pvvRandPos;
|
||||||
|
vector<double>& vIx = *param.pvIx;
|
||||||
|
vector<int>& vCateNum = *param.pvCateNum;
|
||||||
|
vector<vector<double>>& vvRound = *param.pvvRound;
|
||||||
|
vector<vector<double>>& vvX = *param.pvvX;
|
||||||
|
int tid = param.tid;
|
||||||
|
int numThread = param.numThread;
|
||||||
|
int loopNum = param.loopNum;
|
||||||
|
int maxCategoryNum = param.maxCategoryNum;
|
||||||
|
int rowNum = vvX.size();
|
||||||
|
int colNum = vvX[0].size();
|
||||||
|
|
||||||
|
clock_t begin = clock(), mid = clock(), finish;
|
||||||
|
/* 随机模拟 */
|
||||||
|
int startIdx = START_IDX(tid, numThread, loopNum);
|
||||||
|
int endIdx = END_IDX(tid, numThread, loopNum);
|
||||||
|
|
||||||
|
// if (tid == 0) cout << startIdx << '\t' << endIdx << endl;
|
||||||
|
|
||||||
|
for (int idx = startIdx; idx < endIdx; ++idx) { // 模拟轮次
|
||||||
|
// if (tid == 0 && idx % 100 == 0) {
|
||||||
|
// finish = clock();
|
||||||
|
// cout << idx << ": time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
// mid = finish;
|
||||||
|
// }
|
||||||
|
auto& vRandPos = vvRandPos[idx];
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
const int hRowIdx = vRandPos[i]; // 随机打乱之后的行索引
|
||||||
|
const int cateIdx = vIx[hRowIdx] - 1; // 聚类的编号
|
||||||
|
//if (cateIdx == 0) {
|
||||||
|
auto& vRound = vvRound[cateIdx];
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vRound[j] += vvX[i][j];
|
||||||
|
}
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
for (int c = 0; c < maxCategoryNum; ++c) {
|
||||||
|
auto& vRound = vvRound[c];
|
||||||
|
const int numPositive = vCateNum[c];
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
const double val = vRound[j] / numPositive;
|
||||||
|
vvSum[c][j] += val;
|
||||||
|
vvSqSum[c][j] += val * val;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& vRound : vvRound) for (auto &val : vRound) val = 0;
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
// cout << tid << ": Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 多线程进行shuffle操作 */
|
||||||
|
struct TPShuffle {
|
||||||
|
vector<vector<int>>* pvvRandPos;
|
||||||
|
int loopNum;
|
||||||
|
int tid;
|
||||||
|
int numThread;
|
||||||
|
};
|
||||||
|
void ThreadShuffle(TPShuffle param) {
|
||||||
|
clock_t begin = clock(), finish;
|
||||||
|
vector<vector<int>>& vvRandPos = *param.pvvRandPos;
|
||||||
|
int tid = param.tid;
|
||||||
|
int numThread = param.numThread;
|
||||||
|
int loopNum = param.loopNum;
|
||||||
|
std::random_device rd;
|
||||||
|
int startIdx = START_IDX(tid, numThread, loopNum);
|
||||||
|
int endIdx = END_IDX(tid, numThread, loopNum);
|
||||||
|
for (int roundIdx = startIdx; roundIdx < endIdx; ++roundIdx) {
|
||||||
|
vector<int>& vRandPos = vvRandPos[roundIdx];
|
||||||
|
for (int i = 0; i < vRandPos.size(); ++i) vRandPos[i] = i;
|
||||||
|
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
// cout << tid << ": thread shuffle time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 入口函数 */
|
||||||
|
/*
|
||||||
|
三个参数,一个返回值
|
||||||
|
输入:
|
||||||
|
1. x 二维数据,double类型,行数为文献数量,列数为字典长度(每个单词在所有文献中出现的次数超过5)
|
||||||
|
2. h 长度为文献个数,值为1代表该文献属于该知识颗粒(应该是),为0则不属于
|
||||||
|
3. numThread
|
||||||
|
4. loopNum
|
||||||
|
输出:
|
||||||
|
vs z score,显著性指数,一维
|
||||||
|
ps 与vs长度一致
|
||||||
|
*/
|
||||||
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
if (nrhs < 2) {
|
||||||
|
cout << "At least 2 arguments should be given for this function!" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clock_t begin = clock(), mid, finish;
|
||||||
|
vector<double> vIx; // 类别
|
||||||
|
vector<vector<double>> vvX;
|
||||||
|
|
||||||
|
|
||||||
|
Read2DDouble(prhs[0], vvX);
|
||||||
|
Read1DDouble(prhs[1], vIx);
|
||||||
|
int rowNum = vvX.size();
|
||||||
|
int colNum = vvX[0].size();
|
||||||
|
|
||||||
|
// 查找最大的类别编号
|
||||||
|
int maxCategoryNum = 1;
|
||||||
|
for (auto category : vIx) if (maxCategoryNum < category) maxCategoryNum = category;
|
||||||
|
|
||||||
|
int numThread = 1;
|
||||||
|
int loopNum = 1000;
|
||||||
|
|
||||||
|
if (nrhs > 2) {
|
||||||
|
double* pNumThread = (double*)mxGetData(prhs[2]);
|
||||||
|
numThread = (int)pNumThread[0];
|
||||||
|
if (numThread < 1) numThread = 1;
|
||||||
|
}
|
||||||
|
if (nrhs > 3) {
|
||||||
|
double* pLoopNum = (double*)mxGetData(prhs[3]);
|
||||||
|
loopNum = (int)pLoopNum[0];
|
||||||
|
if (loopNum < 1000) loopNum = 1000;
|
||||||
|
}
|
||||||
|
// 线程中用到的数据
|
||||||
|
vector<int> vCateNum(maxCategoryNum); // 每个类别包含的个数
|
||||||
|
vector<vector<double>> vvTs(maxCategoryNum, vector<double>(colNum)); //记录各个类别的行的行均值
|
||||||
|
vector<vector<vector<double>>> vvvSum(numThread, vector<vector<double>>(maxCategoryNum, vector<double>(colNum)));
|
||||||
|
vector<vector<vector<double>>> vvvSqSum(numThread, vector<vector<double>>(maxCategoryNum, vector<double>(colNum)));
|
||||||
|
vector<vector<vector<double>>> vvvRound(numThread, vector<vector<double>>(maxCategoryNum, vector<double>(colNum)));
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
// cout << numThread << '\t' << loopNum << endl;
|
||||||
|
|
||||||
|
// maxCategoryNum = 1;
|
||||||
|
|
||||||
|
/* 计算实际分类的均值 */
|
||||||
|
mid = clock();
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
const int cateIdx = vIx[i] - 1;
|
||||||
|
vCateNum[cateIdx] ++;
|
||||||
|
for (int j = 0; j < colNum; ++j) vvTs[cateIdx][j] += vvX[i][j];
|
||||||
|
}
|
||||||
|
for (int i = 0; i < maxCategoryNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vvTs[i][j] /= vCateNum[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// for (auto c : vCateNum) cout << c << endl;
|
||||||
|
|
||||||
|
// 先把loopNum次随机shuffle做了
|
||||||
|
mid = clock();
|
||||||
|
vector<vector<int>> vvRandPos(loopNum, vector<int>(rowNum));
|
||||||
|
vector<std::thread> vTHShuffle;
|
||||||
|
for (int i = 0; i < numThread; ++i) {
|
||||||
|
TPShuffle param = { &vvRandPos, loopNum, i, numThread };
|
||||||
|
vTHShuffle.push_back(std::thread(ThreadShuffle, param));
|
||||||
|
}
|
||||||
|
for (auto& t : vTHShuffle) t.join();
|
||||||
|
finish = clock();
|
||||||
|
cout << "Shuffle time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
|
||||||
|
vector<std::thread> vTHRandSim;
|
||||||
|
for (int i = 0; i < numThread; ++i) {
|
||||||
|
TPRandSim tParam = { &vvvSum[i], &vvvSqSum[i], &vvRandPos, &vIx, &vCateNum, &vvvRound[i], & vvX, loopNum, maxCategoryNum, i, numThread};
|
||||||
|
vTHRandSim.push_back(std::thread(ThreadRandSim, tParam));
|
||||||
|
}
|
||||||
|
for (auto& t : vTHRandSim) t.join();
|
||||||
|
|
||||||
|
// 合并所有线程的数据
|
||||||
|
auto& vvSum = vvvSum[0];
|
||||||
|
auto& vvSqSum = vvvSqSum[0];
|
||||||
|
for (int t = 1; t < numThread; ++t) {
|
||||||
|
for (int i = 0; i < maxCategoryNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vvSum[i][j] += vvvSum[t][i][j];
|
||||||
|
vvSqSum[i][j] += vvvSqSum[t][i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
|
||||||
|
/* 计算结果 */
|
||||||
|
vector<vector<double>> vvVs(maxCategoryNum, vector<double>(colNum));
|
||||||
|
|
||||||
|
// 按列计算平均值
|
||||||
|
vector<vector<double>> vvMean(maxCategoryNum, vector<double>(colNum));
|
||||||
|
vector<vector<double>> vvStd(maxCategoryNum, vector<double>(colNum));
|
||||||
|
|
||||||
|
for (int c = 0; c < maxCategoryNum; ++c) {
|
||||||
|
auto& vMean = vvMean[c];
|
||||||
|
auto& vStd = vvStd[c];
|
||||||
|
auto& vVs = vvVs[c];
|
||||||
|
auto& vSum = vvSum[c];
|
||||||
|
auto& vSqSum = vvSqSum[c];
|
||||||
|
auto& vTs = vvTs[c];
|
||||||
|
|
||||||
|
for (int i = 0; i < colNum; ++i) vMean[i] = vSum[i] / loopNum;
|
||||||
|
for (int i = 0; i < colNum; ++i) {
|
||||||
|
const double meanVal = vSum[i] / loopNum;
|
||||||
|
vMean[i] = meanVal; // 均值
|
||||||
|
const double sqDiff = vSqSum[i] + loopNum * meanVal * meanVal - 2 * meanVal * vSum[i];
|
||||||
|
vStd[i] = sqrt(sqDiff / (loopNum - 1)); // 均方根
|
||||||
|
}
|
||||||
|
|
||||||
|
// 计算vs
|
||||||
|
for (int i = 0; i < vVs.size(); ++i) {
|
||||||
|
vVs[i] = (vTs[i] - vMean[i]) / vStd[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// auto& vVs = vvVs[0];
|
||||||
|
// ofstream ofs("d:\\result_new.txt");
|
||||||
|
// for (int i = 0; i < colNum; ++i) {
|
||||||
|
// ofs << vVs[i] << endl;
|
||||||
|
// }
|
||||||
|
// ofs.close();
|
||||||
|
|
||||||
|
/* 写入结果 */
|
||||||
|
if (nlhs > 0) { // vs
|
||||||
|
vector<double> vVsData(maxCategoryNum* colNum);
|
||||||
|
for (int i = 0; i < maxCategoryNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vVsData[j * maxCategoryNum + i] = vvVs[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
plhs[0] = writeToMatDouble(vVsData.data(), maxCategoryNum, colNum);
|
||||||
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "All Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 供c++调试用
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
return mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,312 @@
|
||||||
|
#include <mex.h>
|
||||||
|
#include <mat.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <ctime>
|
||||||
|
#include <vector>
|
||||||
|
#include <queue>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <future>
|
||||||
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <random>
|
||||||
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <atomic>
|
||||||
|
using std::cout;
|
||||||
|
using std::endl;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define STRING_BUF_SIZE 204800
|
||||||
|
// 将matlab存储方式转换成c存储方式
|
||||||
|
#define TRANS_ROW_COL(dst, src, rowNum, colNum) \
|
||||||
|
for (int rowI = 0; rowI < rowNum; ++rowI) { \
|
||||||
|
for (int colJ = 0; colJ < colNum; ++colJ) { \
|
||||||
|
dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
ThreadPool(size_t);
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto enqueue(F&& f, Args&&... args)
|
||||||
|
->std::future<typename std::result_of<F(Args...)>::type>;
|
||||||
|
~ThreadPool();
|
||||||
|
private:
|
||||||
|
// need to keep track of threads so we can join them
|
||||||
|
std::vector< std::thread > workers;
|
||||||
|
// the task queue
|
||||||
|
std::queue< std::function<void()> > tasks;
|
||||||
|
|
||||||
|
// synchronization
|
||||||
|
std::mutex queue_mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
bool stop;
|
||||||
|
};
|
||||||
|
|
||||||
|
// the constructor just launches some amount of workers
|
||||||
|
inline ThreadPool::ThreadPool(size_t threads)
|
||||||
|
: stop(false)
|
||||||
|
{
|
||||||
|
for (size_t i = 0;i < threads;++i)
|
||||||
|
workers.emplace_back(
|
||||||
|
[this]
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
std::function<void()> task;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||||
|
this->condition.wait(lock,
|
||||||
|
[this] { return this->stop || !this->tasks.empty(); });
|
||||||
|
if (this->stop && this->tasks.empty())
|
||||||
|
return;
|
||||||
|
task = std::move(this->tasks.front());
|
||||||
|
this->tasks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add new work item to the pool
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto ThreadPool::enqueue(F && f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of<F(Args...)>::type>
|
||||||
|
{
|
||||||
|
using return_type = typename std::result_of<F(Args...)>::type;
|
||||||
|
|
||||||
|
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
||||||
|
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
||||||
|
);
|
||||||
|
|
||||||
|
std::future<return_type> res = task->get_future();
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
|
||||||
|
// don't allow enqueueing after stopping the pool
|
||||||
|
if (stop)
|
||||||
|
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||||
|
|
||||||
|
tasks.emplace([task]() { (*task)(); });
|
||||||
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the destructor joins all threads
|
||||||
|
inline ThreadPool::~ThreadPool()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
for (std::thread& worker : workers)
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
// 读取一维cell字符串并转换成大写
|
||||||
|
inline bool ReadWord1DCell(const mxArray* pMxArray, vector<string>& vStr) {
|
||||||
|
mxArray* pCell = nullptr;
|
||||||
|
int rowNum, colNum;
|
||||||
|
char* strBuf = new char[STRING_BUF_SIZE];
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
vStr.resize(rowNum * colNum);
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
|
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
||||||
|
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
vStr[i * colNum + j] = strBuf;
|
||||||
|
auto& lastStr = vStr[i * colNum + j];
|
||||||
|
transform(lastStr.cbegin(), lastStr.cend(), lastStr.begin(), ::toupper); // 转成大写
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[]strBuf;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 读取二维cell字符串并转换成大写
|
||||||
|
inline bool ReadWord2DCell(const mxArray* pMxArray, vector<vector<string>>& vvStr) {
|
||||||
|
mxArray* pCell = nullptr;
|
||||||
|
int rowNum, colNum;
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
|
vvStr.push_back(vector<string>());
|
||||||
|
ReadWord1DCell(pCell, vvStr.back());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 读取由一维cell包裹的double数据,每个cell是一个一维的double数组
|
||||||
|
inline void ReadDoulbe1DCell(const mxArray* pMxArray, vector<vector<double> >& vvData) {
|
||||||
|
// 读取fr数值
|
||||||
|
int rowNum = (int)mxGetM(pMxArray);
|
||||||
|
int colNum = (int)mxGetN(pMxArray);
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
mxArray* pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
|
int childRowNum = (int)mxGetM(pCell);
|
||||||
|
int childColNum = (int)mxGetN(pCell);
|
||||||
|
vvData.push_back(vector<double>());
|
||||||
|
vvData.back().resize(childRowNum * childColNum);
|
||||||
|
double* pVal = (double*)mxGetData(pCell); //获取数据指针
|
||||||
|
TRANS_ROW_COL(vvData.back(), pVal, childRowNum, childColNum); // 行列存储方式转换
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
|
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
int len = rowNum * colNum;
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
|
||||||
|
return pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 多线程计算信息熵 */
|
||||||
|
struct TPEntropyMean {
|
||||||
|
vector<string>* pvDs;
|
||||||
|
vector<double>* pvFr;
|
||||||
|
vector<unordered_set<string>>* pvusAbsWord;
|
||||||
|
vector<double>* pvHs;
|
||||||
|
vector<double>* pvHd;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ThreadCalcEntropyMean(TPEntropyMean& param) {
|
||||||
|
vector<string>& vDs = *param.pvDs; // 这一组ds
|
||||||
|
vector<double>& vFr = *param.pvFr; // frequency
|
||||||
|
vector<unordered_set<string>>& vusAbsWord = *param.pvusAbsWord;
|
||||||
|
vector<double>& vHs = *param.pvHs;
|
||||||
|
vector<double>& vHd = *param.pvHd;
|
||||||
|
const int numAbs = vusAbsWord.size();
|
||||||
|
const int numDsWord = vDs.size(); // 这一组数据中包含的单词数量
|
||||||
|
// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
|
||||||
|
for (int i = 0; i < numAbs; ++i) {
|
||||||
|
for (int j = 0; j < numDsWord; ++j) {
|
||||||
|
if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
|
||||||
|
vHs[i] -= vFr[j] * log2(vFr[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
vHd[i] = vHs[i] / vusAbsWord[i].size();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
输入:
|
||||||
|
1. ds: 知识颗粒中的信息,应该也是摘要,分割成了字符串,大写的。
|
||||||
|
2. frr: ds中每个单词对应的频率。
|
||||||
|
3. ws: 文献的摘要,被切割成特定的长度了,字符串已经分割好了。
|
||||||
|
[4]. numThread
|
||||||
|
输出:
|
||||||
|
1. hs: 信息熵,二维[len(ds)][len(ws)]
|
||||||
|
2. hd: 每个单词的平均信息熵,维度同hs
|
||||||
|
*/
|
||||||
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
if (nrhs < 3) {
|
||||||
|
cout << "At least 3 arguments should be given for this function!" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clock_t begin = clock(), mid, finish;
|
||||||
|
|
||||||
|
vector<vector<string> > vvDs; // 每个知识颗粒的ds矩阵(词汇矩阵)
|
||||||
|
vector<vector<double> > vvFr; // 词汇对应的频率
|
||||||
|
ReadWord2DCell(prhs[0], vvDs);
|
||||||
|
ReadDoulbe1DCell(prhs[1], vvFr);
|
||||||
|
|
||||||
|
vector<vector<string>> vvWs;
|
||||||
|
ReadWord2DCell(prhs[2], vvWs); // 文献摘要的字符串数组
|
||||||
|
|
||||||
|
int numThread = 1;
|
||||||
|
if (nrhs > 3) {
|
||||||
|
double* pNumThread = (double*)mxGetData(prhs[3]);
|
||||||
|
numThread = (int)pNumThread[0];
|
||||||
|
if (numThread < 1) numThread = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
vector<unordered_set<string>> vusAbsWord(vvWs.size()); // 将每篇文章摘要的单词放入hash表
|
||||||
|
// 将处理摘要之后的每个词语放入hash表
|
||||||
|
for (int i=0; i<vvWs.size(); ++i) {
|
||||||
|
for (auto& word : vvWs[i]) {
|
||||||
|
vusAbsWord[i].insert(word);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl << flush;
|
||||||
|
|
||||||
|
int numGroup = vvDs.size();
|
||||||
|
int numAbs = vvWs.size(); // 摘要个数
|
||||||
|
// 计算结果
|
||||||
|
vector<vector<double>> vvHs(numGroup, vector<double>(numAbs));
|
||||||
|
vector<vector<double>> vvHd(numGroup, vector<double>(numAbs));
|
||||||
|
|
||||||
|
vector<string>* pvDs;
|
||||||
|
vector<double>* pvFr;
|
||||||
|
vector<unordered_set<string>>* pvusAbsWord;
|
||||||
|
vector<double>* pvHs;
|
||||||
|
vector<double>* pvHd;
|
||||||
|
/* 多线程计算信息熵 */
|
||||||
|
ThreadPool thPool(numThread);
|
||||||
|
for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
|
||||||
|
TPEntropyMean tp = { &vvDs[groupIdx], &vvFr[groupIdx], &vusAbsWord, &vvHs[groupIdx], &vvHd[groupIdx] };
|
||||||
|
thPool.enqueue(ThreadCalcEntropyMean, tp);
|
||||||
|
}
|
||||||
|
thPool.~ThreadPool();
|
||||||
|
|
||||||
|
// ofstream ofs("d:\\result_hs.txt");
|
||||||
|
// for (int i = 0; i < numGroup; ++i) {
|
||||||
|
// for (int j = 0; j < numAbs; ++j) {
|
||||||
|
// ofs << vvHs[i][j] << ' ';
|
||||||
|
// }
|
||||||
|
// ofs << endl;
|
||||||
|
// }
|
||||||
|
// ofs.close();
|
||||||
|
|
||||||
|
/* 将结果写入返回值 */
|
||||||
|
if (nlhs > 0) {
|
||||||
|
vector<double> vData(numGroup * numAbs);
|
||||||
|
for (int i = 0; i < numGroup; ++i) for (int j = 0; j < numAbs; ++j) vData[j * numGroup + i] = vvHs[i][j];
|
||||||
|
plhs[0] = writeToMatDouble(vData.data(), numGroup, numAbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nlhs > 1) {
|
||||||
|
vector<double> vData(numGroup * numAbs);
|
||||||
|
for (int i = 0; i < numGroup; ++i) for (int j = 0; j < numAbs; ++j) vData[j * numGroup + i] = vvHd[i][j];
|
||||||
|
plhs[1] = writeToMatDouble(vData.data(), numGroup, numAbs);
|
||||||
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Calc Entropy and Mean Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 供main调试调用 */
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
|
}
|
||||||
|
|
@ -99,10 +99,11 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
nlhs:输出参数数目(Number Left - hand side),等号左边
|
输入:
|
||||||
plhs:指向输出参数的指针(Point Left - hand side),等号左边
|
1. abs: 待感知的文献的摘要信息。
|
||||||
nrhs:输入参数数目(Number Right - hand side),等号右边
|
2. G: 知识颗粒,包含该程序需要的热词ds以及对应的频率fr。
|
||||||
prhs:指向输入参数的指针(Point Right - hand side),等号右边。要注意prhs是const的指针数组,即不能改变其指向内容。
|
输出:
|
||||||
|
1. hs: 信息熵,二维[len(知识颗粒)][len(文献)]
|
||||||
*/
|
*/
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
//cout << "MexCalcEntropy" << endl;
|
//cout << "MexCalcEntropy" << endl;
|
||||||
|
|
@ -232,6 +233,6 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 供main调试调用 */
|
/* 供main调试调用 */
|
||||||
void MexCalcEntropy(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
mexFunction(nlhs, plhs, nrhs, prhs);
|
mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
}
|
}
|
||||||
|
|
@ -17,9 +17,11 @@
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <set>
|
#include <set>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <algorithm>
|
|
||||||
#include <random>
|
#include <random>
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <atomic>
|
||||||
|
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
|
|
@ -27,89 +29,6 @@ using namespace std;
|
||||||
|
|
||||||
#define STRING_BUF_SIZE 204800
|
#define STRING_BUF_SIZE 204800
|
||||||
|
|
||||||
class ThreadPool {
|
|
||||||
public:
|
|
||||||
ThreadPool(size_t);
|
|
||||||
template<class F, class... Args>
|
|
||||||
auto enqueue(F&& f, Args&&... args)
|
|
||||||
->std::future<typename std::result_of<F(Args...)>::type>;
|
|
||||||
~ThreadPool();
|
|
||||||
private:
|
|
||||||
// need to keep track of threads so we can join them
|
|
||||||
std::vector< std::thread > workers;
|
|
||||||
// the task queue
|
|
||||||
std::queue< std::function<void()> > tasks;
|
|
||||||
|
|
||||||
// synchronization
|
|
||||||
std::mutex queue_mutex;
|
|
||||||
std::condition_variable condition;
|
|
||||||
bool stop;
|
|
||||||
};
|
|
||||||
|
|
||||||
// the constructor just launches some amount of workers
|
|
||||||
inline ThreadPool::ThreadPool(size_t threads)
|
|
||||||
: stop(false)
|
|
||||||
{
|
|
||||||
for (size_t i = 0;i < threads;++i)
|
|
||||||
workers.emplace_back(
|
|
||||||
[this]
|
|
||||||
{
|
|
||||||
for (;;)
|
|
||||||
{
|
|
||||||
std::function<void()> task;
|
|
||||||
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
|
||||||
this->condition.wait(lock,
|
|
||||||
[this] { return this->stop || !this->tasks.empty(); });
|
|
||||||
if (this->stop && this->tasks.empty())
|
|
||||||
return;
|
|
||||||
task = std::move(this->tasks.front());
|
|
||||||
this->tasks.pop();
|
|
||||||
}
|
|
||||||
|
|
||||||
task();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// add new work item to the pool
|
|
||||||
template<class F, class... Args>
|
|
||||||
auto ThreadPool::enqueue(F&& f, Args&&... args)
|
|
||||||
-> std::future<typename std::result_of<F(Args...)>::type>
|
|
||||||
{
|
|
||||||
using return_type = typename std::result_of<F(Args...)>::type;
|
|
||||||
|
|
||||||
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
|
||||||
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
|
||||||
);
|
|
||||||
|
|
||||||
std::future<return_type> res = task->get_future();
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
|
||||||
|
|
||||||
// don't allow enqueueing after stopping the pool
|
|
||||||
if (stop)
|
|
||||||
throw std::runtime_error("enqueue on stopped ThreadPool");
|
|
||||||
|
|
||||||
tasks.emplace([task]() { (*task)(); });
|
|
||||||
}
|
|
||||||
condition.notify_one();
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
|
|
||||||
// the destructor joins all threads
|
|
||||||
inline ThreadPool::~ThreadPool()
|
|
||||||
{
|
|
||||||
{
|
|
||||||
std::unique_lock<std::mutex> lock(queue_mutex);
|
|
||||||
stop = true;
|
|
||||||
}
|
|
||||||
condition.notify_all();
|
|
||||||
for (std::thread& worker : workers)
|
|
||||||
worker.join();
|
|
||||||
}
|
|
||||||
|
|
||||||
/* 读取一维double数据 */
|
/* 读取一维double数据 */
|
||||||
void Read1DDouble(const mxArray* pMxArray, vector<double>& vDat) {
|
void Read1DDouble(const mxArray* pMxArray, vector<double>& vDat) {
|
||||||
|
|
@ -141,45 +60,68 @@ void Read2DDouble(const mxArray* pMxArray, vector<vector<double>>& vvDat) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
|
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
int len = rowNum * colNum;
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
|
||||||
|
return pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
|
||||||
|
#define START_IDX(tid, threadNum, arrLen) (arrLen) * (tid) / (threadNum)
|
||||||
|
#define END_IDX(tid, threadNum, arrLen) (arrLen) * (tid + 1) / (threadNum)
|
||||||
|
|
||||||
// 线程参数
|
// 线程参数
|
||||||
struct TPRandSim {
|
struct TPRandSim {
|
||||||
vector<double>* pvTr;
|
vector<vector<double>>* pvvTr;
|
||||||
vector<int>* pvRandPos;
|
vector<int>* pvRandPos;
|
||||||
vector<double>* pvH;
|
vector<double>* pvH;
|
||||||
vector<vector<double>>* pvvX;
|
vector<vector<double>>* pvvX;
|
||||||
int numPositive;
|
int numPositive;
|
||||||
|
int tid;
|
||||||
|
int numThread;
|
||||||
};
|
};
|
||||||
|
|
||||||
// 多线程入口函数
|
// 多线程入口函数
|
||||||
void ThreadRandSim(TPRandSim& param) {
|
void ThreadRandSim(TPRandSim param) {
|
||||||
vector<double>& vTr = *param.pvTr;
|
vector < vector<double>>& vvTr = *param.pvvTr;
|
||||||
vector<int>& vRandPos = *param.pvRandPos;
|
vector<int>& vRandPos = *param.pvRandPos;
|
||||||
vector<vector<double>>& vvX = *param.pvvX;
|
vector<vector<double>>& vvX = *param.pvvX;
|
||||||
vector<double>& vH = *param.pvH;
|
vector<double>& vH = *param.pvH;
|
||||||
int numPositive = param.numPositive;
|
int numPositive = param.numPositive;
|
||||||
int rowNum = vvX.size();
|
int rowNum = vvX.size();
|
||||||
int colNum = vvX[0].size();
|
int colNum = vvX[0].size();
|
||||||
|
int tid = param.tid;
|
||||||
|
int numThread = param.numThread;
|
||||||
|
|
||||||
clock_t begin = clock(), finish;
|
clock_t begin = clock(), finish;
|
||||||
/* 随机模拟 */
|
/* 随机模拟 */
|
||||||
std::random_device rd;
|
std::random_device rd;
|
||||||
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
|
int startIdx = START_IDX(tid, numThread, vvTr.size());
|
||||||
|
int endIdx = END_IDX(tid, numThread, vvTr.size());
|
||||||
|
|
||||||
for (int i = 0; i < rowNum; ++i) {
|
// cout << tid << '\t' << numThread << '\t' << startIdx << '\t' << endIdx << endl;
|
||||||
int hRowIdx = vRandPos[i]; // 随机打乱之后的行索引
|
|
||||||
if (vH[hRowIdx] == 1) {
|
for (int idx = startIdx; idx < endIdx; ++idx) {
|
||||||
for (int j = 0; j < colNum; ++j) {
|
auto& vTr = vvTr[idx];
|
||||||
vTr[j] += vvX[i][j];
|
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
int hRowIdx = vRandPos[i]; // 随机打乱之后的行索引
|
||||||
|
if (vH[hRowIdx] == 1) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vTr[j] += vvX[i][j];
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
for (auto& val : vTr) val /= numPositive;
|
||||||
}
|
}
|
||||||
for (auto& val : vTr) val /= numPositive;
|
|
||||||
finish = clock();
|
finish = clock();
|
||||||
// cout << "Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Thread Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/* 入口函数 */
|
/* 入口函数 */
|
||||||
/*
|
/*
|
||||||
三个参数,一个返回值
|
三个参数,一个返回值
|
||||||
|
|
@ -187,12 +129,12 @@ void ThreadRandSim(TPRandSim& param) {
|
||||||
1. x 二维数据,double类型,行数为文献数量,列数为字典长度(每个单词在所有文献中出现的次数超过5)
|
1. x 二维数据,double类型,行数为文献数量,列数为字典长度(每个单词在所有文献中出现的次数超过5)
|
||||||
2. h 长度为文献个数,值为1代表该文献属于该知识颗粒(应该是),为0则不属于
|
2. h 长度为文献个数,值为1代表该文献属于该知识颗粒(应该是),为0则不属于
|
||||||
3. numThread
|
3. numThread
|
||||||
|
4. loopNum
|
||||||
输出:
|
输出:
|
||||||
vs z score,显著性指数,一维
|
vs z score,显著性指数,一维
|
||||||
ps 与vs长度一致
|
ps 与vs长度一致
|
||||||
*/
|
*/
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
//void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
|
||||||
if (nrhs < 2) {
|
if (nrhs < 2) {
|
||||||
cout << "At least 2 arguments should be given for this function!" << endl;
|
cout << "At least 2 arguments should be given for this function!" << endl;
|
||||||
return;
|
return;
|
||||||
|
|
@ -206,7 +148,7 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
int rowNum = vvX.size();
|
int rowNum = vvX.size();
|
||||||
int colNum = vvX[0].size();
|
int colNum = vvX[0].size();
|
||||||
|
|
||||||
cout << vH.size() << '\t' << vvX.size() << endl;
|
// cout << vH.size() << '\t' << vvX.size() << endl;
|
||||||
|
|
||||||
int numThread = 1;
|
int numThread = 1;
|
||||||
int loopNum = 1000;
|
int loopNum = 1000;
|
||||||
|
|
@ -222,6 +164,10 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
if (loopNum < 1000) loopNum = 1000;
|
if (loopNum < 1000) loopNum = 1000;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
// cout << numThread << '\t' << loopNum << endl;
|
||||||
|
|
||||||
/* 进行随机模拟 */
|
/* 进行随机模拟 */
|
||||||
mid = clock();
|
mid = clock();
|
||||||
vector<double> vTs(colNum); // 初始数据,记录vH中label为1的行的行均值
|
vector<double> vTs(colNum); // 初始数据,记录vH中label为1的行的行均值
|
||||||
|
|
@ -236,7 +182,6 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
}
|
}
|
||||||
for (auto& val : vTs) val /= numPositive;
|
for (auto& val : vTs) val /= numPositive;
|
||||||
|
|
||||||
|
|
||||||
vector<vector<double>> vvTr(loopNum, vector<double>(colNum, 0)); // 模拟结果
|
vector<vector<double>> vvTr(loopNum, vector<double>(colNum, 0)); // 模拟结果
|
||||||
vector<vector<int>> vvRandPos(numThread, vector<int>(rowNum));
|
vector<vector<int>> vvRandPos(numThread, vector<int>(rowNum));
|
||||||
for (int i = 0; i < rowNum; ++i) {
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
|
@ -245,14 +190,13 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
ThreadPool thPool(numThread);
|
vector<std::thread> vT;
|
||||||
int tid = 0;
|
for (int i = 0; i < numThread; ++i) {
|
||||||
for (int i = 0; i < loopNum; ++i) {
|
TPRandSim tParam = { &vvTr, &vvRandPos[i], &vH, &vvX, numPositive, i, numThread };
|
||||||
TPRandSim tParam = { &vvTr[i], &vvRandPos[tid++ % numThread], &vH, &vvX, numPositive };
|
vT.push_back(std::thread(ThreadRandSim, tParam));
|
||||||
thPool.enqueue(ThreadRandSim, tParam);
|
|
||||||
//ThreadRandSim(tParam);
|
|
||||||
}
|
}
|
||||||
thPool.~ThreadPool();
|
for (auto& t : vT) t.join();
|
||||||
|
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
|
@ -295,29 +239,24 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
vPs[i] = min(vSumGreater[i], vSumLess[i]);
|
vPs[i] = min(vSumGreater[i], vSumLess[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ofstream ofs("d:\\result.txt");
|
// ofstream ofs("d:\\result_2.txt");
|
||||||
for (int i = 0; i < colNum; ++i) {
|
// for (int i = 0; i < colNum; ++i) {
|
||||||
ofs << vVs[i] << '\t' << vPs[i] << endl;
|
// ofs << vVs[i] << '\t' << vPs[i] << endl;
|
||||||
}
|
// }
|
||||||
ofs.close();
|
// ofs.close();
|
||||||
|
|
||||||
/* 写入结果 */
|
/* 写入结果 */
|
||||||
if (nlhs > 0) { // vs
|
if (nlhs > 0) { // vs
|
||||||
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
plhs[0] = writeToMatDouble(vVs.data(), 1, vVs.size());
|
||||||
//创建一个rowNum*colNum的矩阵
|
|
||||||
pWriteArray = mxCreateDoubleMatrix(1, vVs.size(), mxREAL);
|
|
||||||
//把data的值赋给pWriteArray指针
|
|
||||||
memcpy((void*)(mxGetPr(pWriteArray)), (void*)vVs.data(), sizeof(double) * vVs.size());
|
|
||||||
plhs[0] = pWriteArray; // 赋值给返回值
|
|
||||||
}
|
}
|
||||||
if (nlhs > 1) { // ps
|
if (nlhs > 1) { // ps
|
||||||
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
plhs[1] = writeToMatDouble(vPs.data(), 1, vPs.size());
|
||||||
//创建一个rowNum*colNum的矩阵
|
|
||||||
pWriteArray = mxCreateDoubleMatrix(1, vPs.size(), mxREAL);
|
|
||||||
//把data的值赋给pWriteArray指针
|
|
||||||
memcpy((void*)(mxGetPr(pWriteArray)), (void*)vPs.data(), sizeof(double)* vPs.size());
|
|
||||||
plhs[1] = pWriteArray; // 赋值给返回值
|
|
||||||
}
|
}
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 供c++调试用
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
return mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
}
|
}
|
||||||
|
|
@ -75,7 +75,7 @@ inline bool Read2DWord(const mxArray* pMxArray, vector<vector<string>>& vvStr) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// 将结果写入mxArray, 作为后续的返回值
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
mxArray* writeToMat(const double *data, int rowNum, int colNum) {
|
mxArray* writeToMatDouble(const double *data, int rowNum, int colNum) {
|
||||||
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
int len = rowNum * colNum;
|
int len = rowNum * colNum;
|
||||||
//创建一个rowNum*colNum的矩阵
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
|
@ -187,10 +187,10 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
/* 写入结果 */
|
/* 写入结果 */
|
||||||
mid = clock();
|
mid = clock();
|
||||||
if (nlhs > 0) {
|
if (nlhs > 0) {
|
||||||
plhs[0] = writeToMat(vXSum.data(), 1, vXSum.size());
|
plhs[0] = writeToMatDouble(vXSum.data(), 1, vXSum.size());
|
||||||
}
|
}
|
||||||
if (nlhs > 1) { // xs
|
if (nlhs > 1) { // xs
|
||||||
plhs[1] = writeToMat(vXsData.data(), rowNum, colNum);
|
plhs[1] = writeToMatDouble(vXsData.data(), rowNum, colNum);
|
||||||
}
|
}
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Write result time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Write result time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
|
||||||
|
|
@ -119,7 +119,7 @@
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="IsWordInDic.cpp" />
|
<ClCompile Include="AllEntropyMean.cpp" />
|
||||||
<ClCompile Include="main.cpp" />
|
<ClCompile Include="main.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@
|
||||||
<ClCompile Include="main.cpp">
|
<ClCompile Include="main.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="IsWordInDic.cpp">
|
<ClCompile Include="AllEntropyMean.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -31,23 +31,61 @@ int main(int argc, const char** argv)
|
||||||
// prhs[2] = matGetVariable(pdicrMat, "dicr");
|
// prhs[2] = matGetVariable(pdicrMat, "dicr");
|
||||||
|
|
||||||
/* IsWordInDic */
|
/* IsWordInDic */
|
||||||
MATFile* pwdMat, * pdicMat;
|
// MATFile* pwdMat, * pdicMat;
|
||||||
|
// mxArray* plhs[4];
|
||||||
|
// const mxArray* prhs[4];
|
||||||
|
// int nlhs = 2, nrhs = 2;
|
||||||
|
// pwdMat = matOpen("D:\\wd_large.mat", "r");
|
||||||
|
// pdicMat = matOpen("D:\\G_dc_large.mat", "r");
|
||||||
|
// prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
||||||
|
// prhs[1] = matGetVariable(pdicMat, "dc");
|
||||||
|
|
||||||
|
/* ClusterRandSim */
|
||||||
|
// mxArray* plhs[4];
|
||||||
|
// const mxArray* prhs[4];
|
||||||
|
// int nlhs = 2, nrhs = 4;
|
||||||
|
// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
||||||
|
// MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
|
||||||
|
// prhs[0] = matGetVariable(pMatX, "x");
|
||||||
|
// prhs[1] = matGetVariable(pMatH, "h3");
|
||||||
|
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
// *mxGetPr(prhs[2]) = 1;
|
||||||
|
// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
// *mxGetPr(prhs[3]) = 10000;
|
||||||
|
|
||||||
|
|
||||||
|
/* AllClusterRandSim */
|
||||||
|
// mxArray* plhs[4];
|
||||||
|
// const mxArray* prhs[4];
|
||||||
|
// int nlhs = 2, nrhs = 4;
|
||||||
|
// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
||||||
|
// MATFile* pMatIx = matOpen("D:\\ix_large.mat", "r");
|
||||||
|
// prhs[0] = matGetVariable(pMatX, "x");
|
||||||
|
// prhs[1] = matGetVariable(pMatIx, "ix");
|
||||||
|
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
// *mxGetPr(prhs[2]) = 4;
|
||||||
|
// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
// *mxGetPr(prhs[3]) = 10000;
|
||||||
|
|
||||||
|
/* AllEntropyMean */
|
||||||
mxArray* plhs[4];
|
mxArray* plhs[4];
|
||||||
const mxArray* prhs[4];
|
const mxArray* prhs[4];
|
||||||
int nlhs = 2, nrhs = 2;
|
int nlhs = 2, nrhs = 4;
|
||||||
pwdMat = matOpen("D:\\wd_large.mat", "r");
|
MATFile* pMatG = matOpen("D:\\G_large.mat", "r");
|
||||||
pdicMat = matOpen("D:\\G_dc_large.mat", "r");
|
MATFile* pMatWs = matOpen("D:\\ws_large.mat", "r");
|
||||||
prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
mxArray* pMxG = matGetVariable(pMatG, "G");
|
||||||
prhs[1] = matGetVariable(pdicMat, "dc");
|
prhs[0] = mxGetField(pMxG, 0, "ds");
|
||||||
|
prhs[1] = mxGetField(pMxG, 0, "frr");
|
||||||
/* */
|
prhs[2] = matGetVariable(pMatWs, "ws");
|
||||||
//MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
//MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
|
*mxGetPr(prhs[3]) = 12;
|
||||||
//prhs[0] = matGetVariable(pMatX, "x");
|
|
||||||
//prhs[1] = matGetVariable(pMatH, "h");
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// 调用函数进行测试
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue