修改了IsWordInDic,添加了阈值参数,并计算了xs
This commit is contained in:
parent
18a5415e59
commit
47b0cc187b
|
|
@ -0,0 +1,323 @@
|
||||||
|
#include <mex.h>
|
||||||
|
#include <mat.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <ctime>
|
||||||
|
#include <vector>
|
||||||
|
#include <queue>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <future>
|
||||||
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
using std::cout;
|
||||||
|
using std::endl;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define STRING_BUF_SIZE 204800
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
ThreadPool(size_t);
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto enqueue(F&& f, Args&&... args)
|
||||||
|
->std::future<typename std::result_of<F(Args...)>::type>;
|
||||||
|
~ThreadPool();
|
||||||
|
private:
|
||||||
|
// need to keep track of threads so we can join them
|
||||||
|
std::vector< std::thread > workers;
|
||||||
|
// the task queue
|
||||||
|
std::queue< std::function<void()> > tasks;
|
||||||
|
|
||||||
|
// synchronization
|
||||||
|
std::mutex queue_mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
bool stop;
|
||||||
|
};
|
||||||
|
|
||||||
|
// the constructor just launches some amount of workers
|
||||||
|
inline ThreadPool::ThreadPool(size_t threads)
|
||||||
|
: stop(false)
|
||||||
|
{
|
||||||
|
for (size_t i = 0;i < threads;++i)
|
||||||
|
workers.emplace_back(
|
||||||
|
[this]
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
std::function<void()> task;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||||
|
this->condition.wait(lock,
|
||||||
|
[this] { return this->stop || !this->tasks.empty(); });
|
||||||
|
if (this->stop && this->tasks.empty())
|
||||||
|
return;
|
||||||
|
task = std::move(this->tasks.front());
|
||||||
|
this->tasks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add new work item to the pool
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto ThreadPool::enqueue(F&& f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of<F(Args...)>::type>
|
||||||
|
{
|
||||||
|
using return_type = typename std::result_of<F(Args...)>::type;
|
||||||
|
|
||||||
|
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
||||||
|
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
||||||
|
);
|
||||||
|
|
||||||
|
std::future<return_type> res = task->get_future();
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
|
||||||
|
// don't allow enqueueing after stopping the pool
|
||||||
|
if (stop)
|
||||||
|
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||||
|
|
||||||
|
tasks.emplace([task]() { (*task)(); });
|
||||||
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the destructor joins all threads
|
||||||
|
inline ThreadPool::~ThreadPool()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
for (std::thread& worker : workers)
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 读取一维double数据 */
|
||||||
|
void Read1DDouble(const mxArray* pMxArray, vector<double>& vDat) {
|
||||||
|
int rowNum, colNum;
|
||||||
|
double* matData;
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
// cout << rowNum << " " << colNum << endl;
|
||||||
|
matData = (double*)mxGetData(pMxArray); //获取指针
|
||||||
|
vDat.resize(rowNum * colNum);
|
||||||
|
for (int i = 0; i < vDat.size(); ++i) vDat[i] = matData[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 读取二维double数据 */
|
||||||
|
void Read2DDouble(const mxArray* pMxArray, vector<vector<double>>& vvDat) {
|
||||||
|
int rowNum, colNum;
|
||||||
|
double* matData;
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
vvDat.resize(rowNum);
|
||||||
|
matData = (double*)mxGetData(pMxArray); //获取指针
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
vvDat[i].resize(colNum);
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vvDat[i][j] = matData[j * rowNum + i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 线程参数
|
||||||
|
struct TPRandSim {
|
||||||
|
vector<double>* pvTr;
|
||||||
|
vector<int>* pvRandPos;
|
||||||
|
vector<double>* pvH;
|
||||||
|
vector<vector<double>>* pvvX;
|
||||||
|
int numPositive;
|
||||||
|
};
|
||||||
|
|
||||||
|
// 多线程入口函数
|
||||||
|
void ThreadRandSim(TPRandSim& param) {
|
||||||
|
vector<double>& vTr = *param.pvTr;
|
||||||
|
vector<int>& vRandPos = *param.pvRandPos;
|
||||||
|
vector<vector<double>>& vvX = *param.pvvX;
|
||||||
|
vector<double>& vH = *param.pvH;
|
||||||
|
int numPositive = param.numPositive;
|
||||||
|
int rowNum = vvX.size();
|
||||||
|
int colNum = vvX[0].size();
|
||||||
|
|
||||||
|
clock_t begin = clock(), finish;
|
||||||
|
/* 随机模拟 */
|
||||||
|
std::random_device rd;
|
||||||
|
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
|
||||||
|
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
int hRowIdx = vRandPos[i]; // 随机打乱之后的行索引
|
||||||
|
if (vH[hRowIdx] == 1) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vTr[j] += vvX[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& val : vTr) val /= numPositive;
|
||||||
|
finish = clock();
|
||||||
|
// cout << "Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* 入口函数 */
|
||||||
|
/*
|
||||||
|
三个参数,一个返回值
|
||||||
|
输入:
|
||||||
|
1. x 二维数据,double类型,行数为文献数量,列数为字典长度(每个单词在所有文献中出现的次数超过5)
|
||||||
|
2. h 长度为文献个数,值为1代表该文献属于该知识颗粒(应该是),为0则不属于
|
||||||
|
3. numThread
|
||||||
|
输出:
|
||||||
|
vs z score,显著性指数,一维
|
||||||
|
ps 与vs长度一致
|
||||||
|
*/
|
||||||
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
||||||
|
//void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
if (nrhs < 2) {
|
||||||
|
cout << "At least 2 arguments should be given for this function!" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clock_t begin = clock(), mid, finish;
|
||||||
|
vector<double> vH;
|
||||||
|
vector<vector<double>> vvX;
|
||||||
|
|
||||||
|
Read2DDouble(prhs[0], vvX);
|
||||||
|
Read1DDouble(prhs[1], vH);
|
||||||
|
int rowNum = vvX.size();
|
||||||
|
int colNum = vvX[0].size();
|
||||||
|
|
||||||
|
cout << vH.size() << '\t' << vvX.size() << endl;
|
||||||
|
|
||||||
|
int numThread = 1;
|
||||||
|
int loopNum = 1000;
|
||||||
|
|
||||||
|
if (nrhs > 2) {
|
||||||
|
double* pNumThread = (double*)mxGetData(prhs[2]);
|
||||||
|
numThread = (int)pNumThread[0];
|
||||||
|
if (numThread < 1) numThread = 1;
|
||||||
|
}
|
||||||
|
if (nrhs > 3) {
|
||||||
|
double* pLoopNum = (double*)mxGetData(prhs[3]);
|
||||||
|
loopNum = (int)pLoopNum[0];
|
||||||
|
if (loopNum < 1000) loopNum = 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 进行随机模拟 */
|
||||||
|
mid = clock();
|
||||||
|
vector<double> vTs(colNum); // 初始数据,记录vH中label为1的行的行均值
|
||||||
|
int numPositive = 0;
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
if (vH[i] == 1) {
|
||||||
|
++numPositive;
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
vTs[j] += vvX[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& val : vTs) val /= numPositive;
|
||||||
|
|
||||||
|
|
||||||
|
vector<vector<double>> vvTr(loopNum, vector<double>(colNum, 0)); // 模拟结果
|
||||||
|
vector<vector<int>> vvRandPos(numThread, vector<int>(rowNum));
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (auto& vRandPos : vvRandPos) {
|
||||||
|
vRandPos[i] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ThreadPool thPool(numThread);
|
||||||
|
int tid = 0;
|
||||||
|
for (int i = 0; i < loopNum; ++i) {
|
||||||
|
TPRandSim tParam = { &vvTr[i], &vvRandPos[tid++ % numThread], &vH, &vvX, numPositive };
|
||||||
|
thPool.enqueue(ThreadRandSim, tParam);
|
||||||
|
//ThreadRandSim(tParam);
|
||||||
|
}
|
||||||
|
thPool.~ThreadPool();
|
||||||
|
finish = clock();
|
||||||
|
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
/* 计算结果 */
|
||||||
|
vector<double> vVs(colNum);
|
||||||
|
vector<double> vPs(colNum);
|
||||||
|
// 按列计算平均值
|
||||||
|
vector<double> vMean(colNum);
|
||||||
|
vector<double> vStd(colNum);
|
||||||
|
for (int i = 0; i < vvTr.size(); ++i) {
|
||||||
|
for (int j = 0; j < vvTr[i].size(); ++j) {
|
||||||
|
vMean[j] += vvTr[i][j];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& val : vMean) { val /= loopNum; } // 均值
|
||||||
|
for (int i = 0; i < vvTr.size(); ++i) {
|
||||||
|
for (int j = 0; j < vvTr[i].size(); ++j) {
|
||||||
|
const double diff = vvTr[i][j] - vMean[j];
|
||||||
|
vStd[j] += diff * diff;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& val : vStd) { val = sqrt(val / (loopNum - 1)); } // 均方根
|
||||||
|
// 计算vs
|
||||||
|
for (int i = 0; i < vVs.size(); ++i) {
|
||||||
|
vVs[i] = (vTs[i] - vMean[i]) / vStd[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// 计算ps
|
||||||
|
vector<double> vSumGreater(colNum);
|
||||||
|
vector<double> vSumLess(colNum);
|
||||||
|
for (int i = 0; i < loopNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
if (vvTr[i][j] >= vTs[j]) vSumGreater[j] ++;
|
||||||
|
if (vvTr[i][j] <= vTs[j]) vSumLess[j] ++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto& val : vSumGreater) val /= loopNum;
|
||||||
|
for (auto& val : vSumLess) val /= loopNum;
|
||||||
|
for (int i = 0; i < colNum; ++i) {
|
||||||
|
vPs[i] = min(vSumGreater[i], vSumLess[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
ofstream ofs("d:\\result.txt");
|
||||||
|
for (int i = 0; i < colNum; ++i) {
|
||||||
|
ofs << vVs[i] << '\t' << vPs[i] << endl;
|
||||||
|
}
|
||||||
|
ofs.close();
|
||||||
|
|
||||||
|
/* 写入结果 */
|
||||||
|
if (nlhs > 0) { // vs
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(1, vVs.size(), mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)vVs.data(), sizeof(double) * vVs.size());
|
||||||
|
plhs[0] = pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
if (nlhs > 1) { // ps
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(1, vPs.size(), mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)vPs.data(), sizeof(double)* vPs.size());
|
||||||
|
plhs[1] = pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
cout << "Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
@ -1166,6 +1166,11 @@ T SquareAverage(vector<T>& vVal) {
|
||||||
|
|
||||||
/* 计算向量x和y的相关距离, 向量维度必须相等*/
|
/* 计算向量x和y的相关距离, 向量维度必须相等*/
|
||||||
double CorrelationDistance(vector<double>& vX, vector<double>& vY) {
|
double CorrelationDistance(vector<double>& vX, vector<double>& vY) {
|
||||||
|
double xMean = Average(vX);
|
||||||
|
transform(vX.cbegin(), vX.cend(), vX.begin(), [&](double val) { return val - xMean; });
|
||||||
|
double yMean = Average(vY);
|
||||||
|
transform(vY.cbegin(), vY.cend(), vY.begin(), [&](double val) { return val - yMean; });
|
||||||
|
|
||||||
vector<double> vXY(vX.size());
|
vector<double> vXY(vX.size());
|
||||||
for (int i = 0; i < vXY.size(); ++i) {
|
for (int i = 0; i < vXY.size(); ++i) {
|
||||||
vXY[i] = vX[i] * vY[i];
|
vXY[i] = vX[i] * vY[i];
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,204 @@
|
||||||
|
#include <mex.h>
|
||||||
|
#include <mat.h>
|
||||||
|
#include <iostream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <string>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <ctime>
|
||||||
|
#include <vector>
|
||||||
|
#include <queue>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <future>
|
||||||
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <random>
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
|
using std::cout;
|
||||||
|
using std::endl;
|
||||||
|
using namespace std;
|
||||||
|
|
||||||
|
#define STRING_BUF_SIZE 204800
|
||||||
|
|
||||||
|
|
||||||
|
// 读取一维cell字符串并转换成大写
|
||||||
|
inline bool Read1DWord(const mxArray* pMxArray, vector<string>& vStr) {
|
||||||
|
mxArray* pCell = nullptr;
|
||||||
|
int rowNum, colNum;
|
||||||
|
char* strBuf = new char[STRING_BUF_SIZE];
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
vStr.resize(rowNum * colNum);
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
|
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
||||||
|
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
vStr[i * colNum + j] = strBuf;
|
||||||
|
auto& lastStr = vStr[i * colNum + j];
|
||||||
|
transform(lastStr.cbegin(), lastStr.cend(), lastStr.begin(), ::toupper); // 转成大写
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[]strBuf;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 读取二维cell字符串并转换成大写
|
||||||
|
inline bool Read2DWord(const mxArray* pMxArray, vector<vector<string>>& vvStr) {
|
||||||
|
mxArray* pCell = nullptr;
|
||||||
|
int rowNum, colNum;
|
||||||
|
char* strBuf = new char[STRING_BUF_SIZE];
|
||||||
|
|
||||||
|
rowNum = (int)mxGetM(pMxArray);
|
||||||
|
colNum = (int)mxGetN(pMxArray);
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
|
int childRowNum = (int)mxGetM(pCell);
|
||||||
|
int childColNum = (int)mxGetN(pCell);
|
||||||
|
vvStr.push_back(vector<string>());
|
||||||
|
Read1DWord(pCell, vvStr.back());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
delete[]strBuf;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
|
mxArray* writeToMat(const double *data, int rowNum, int colNum) {
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
int len = rowNum * colNum;
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
|
||||||
|
return pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 入口函数 */
|
||||||
|
/*
|
||||||
|
三个参数,一个返回值
|
||||||
|
输入:
|
||||||
|
1. wd 文献摘要中的单词,二维cell
|
||||||
|
2. dic 字典,按字母序排序的,一维cell
|
||||||
|
3. threshold 保留超过阈值的列
|
||||||
|
输出:
|
||||||
|
x 一维int(double)类型,表示在wd的每一行单词中,dic中是否有单词匹配上(匹配后,对应的坐标为设为1,否则为0),所有匹配的个数(统计每一行的匹配个数)
|
||||||
|
dic每一个单词在wd所有行中出现的次数
|
||||||
|
*/
|
||||||
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
if (nrhs < 2) {
|
||||||
|
cout << "At least 2 arguments should be given for this function!" << endl;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clock_t begin = clock(), mid, finish;
|
||||||
|
vector<string> vDic;
|
||||||
|
vector<vector<string>> vvWd;
|
||||||
|
|
||||||
|
Read2DWord(prhs[0], vvWd);
|
||||||
|
Read1DWord(prhs[1], vDic);
|
||||||
|
int rowNum = vvWd.size();
|
||||||
|
|
||||||
|
int threshold = 5;
|
||||||
|
if (nrhs > 2) {
|
||||||
|
double* pThreshold = (double*)mxGetData(prhs[2]);
|
||||||
|
threshold = (int)pThreshold[0];
|
||||||
|
if (threshold < 5) threshold = 5;
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
vector<double> vXSum(vDic.size());
|
||||||
|
|
||||||
|
/* 统计dicr字典中,每个单词在wd中出现的次数 */
|
||||||
|
mid = clock();
|
||||||
|
unordered_map<string, int> umWordPos;
|
||||||
|
for (int i = 0; i < vDic.size(); ++i) umWordPos[vDic[i]] = i; // 记录单词位置
|
||||||
|
unordered_set<int> usPos; // 多次出现在wd中的单词,只统计一次,这是原matlab代码的功能,是否需要修改?
|
||||||
|
vector<unordered_set<int>> vusX(rowNum); // 保存每一行中非零元的坐标
|
||||||
|
int row = 0;
|
||||||
|
for (auto& vWd : vvWd) {
|
||||||
|
auto& usPos = vusX[row++];
|
||||||
|
for (auto& word : vWd) {
|
||||||
|
auto itr = umWordPos.find(word);
|
||||||
|
if (itr != umWordPos.end()) {
|
||||||
|
usPos.insert(itr->second);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto idx : usPos) {
|
||||||
|
vXSum[idx] += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
cout << "Calc word occurrence time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
/* 计算xs */
|
||||||
|
mid = clock();
|
||||||
|
int colNum = 0;
|
||||||
|
vector<int> vColIdx;
|
||||||
|
for (int i = 0; i < vXSum.size(); ++i) {
|
||||||
|
if (vXSum[i] >= threshold) {
|
||||||
|
vColIdx.push_back(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
colNum = vColIdx.size();
|
||||||
|
|
||||||
|
vector<double> vXsData(rowNum * colNum);
|
||||||
|
|
||||||
|
for (int i = 0; i < rowNum; ++i) {
|
||||||
|
for (int j = 0; j < colNum; ++j) {
|
||||||
|
if (vusX[i].find(vColIdx[j]) != vusX[i].end()) {
|
||||||
|
vXsData[j * rowNum + i] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
cout << "Calc xs time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
// 测试输出
|
||||||
|
// cout << rowNum << '\t' << colNum << endl;
|
||||||
|
// ofstream ofs1("d:\\result_xsum.txt");
|
||||||
|
// for (auto& val : vXSum) {
|
||||||
|
// ofs1 << val << endl;
|
||||||
|
// }
|
||||||
|
// ofs1.close();
|
||||||
|
//
|
||||||
|
// ofstream ofs2("d:\\result_xs.txt");
|
||||||
|
// for (int i = 0; i < rowNum; ++i) {
|
||||||
|
// for (int j = 0; j < colNum; ++j) {
|
||||||
|
// if (vXsData[j * rowNum + i] > 0) {
|
||||||
|
// ofs2 << j + 1 << '\t';
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// ofs2 << endl;
|
||||||
|
// }
|
||||||
|
// ofs2.close();
|
||||||
|
|
||||||
|
/* 写入结果 */
|
||||||
|
mid = clock();
|
||||||
|
if (nlhs > 0) {
|
||||||
|
plhs[0] = writeToMat(vXSum.data(), 1, vXSum.size());
|
||||||
|
}
|
||||||
|
if (nlhs > 1) { // xs
|
||||||
|
plhs[1] = writeToMat(vXsData.data(), rowNum, colNum);
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
cout << "Write result time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
cout << "Calc word occurrence in Dic Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 供c++调试用
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
return mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
|
}
|
||||||
|
|
@ -119,8 +119,8 @@
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
<ClCompile Include="IsWordInDic.cpp" />
|
||||||
<ClCompile Include="main.cpp" />
|
<ClCompile Include="main.cpp" />
|
||||||
<ClCompile Include="RandSim.cpp" />
|
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Include="packages.config" />
|
<None Include="packages.config" />
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@
|
||||||
<ClCompile Include="main.cpp">
|
<ClCompile Include="main.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="RandSim.cpp">
|
<ClCompile Include="IsWordInDic.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -13,32 +13,45 @@ int main(int argc, const char** argv)
|
||||||
//string matFile = "D:\\Twirls\\wd_small.mat";
|
//string matFile = "D:\\Twirls\\wd_small.mat";
|
||||||
//string matFile = "D:\\Twirls\\wd.mat";
|
//string matFile = "D:\\Twirls\\wd.mat";
|
||||||
clock_t begin = clock(), finish;
|
clock_t begin = clock(), finish;
|
||||||
string wd2Mat = "D:\\wd2_5w.mat";
|
//string wd2Mat = "D:\\wd2_5w.mat";
|
||||||
string dicrMat = "D:\\dicr.mat";
|
//string dicrMat = "D:\\dicr.mat";
|
||||||
string wdMat = "D:\\wd.mat";
|
//string wdMat = "D:\\wd.mat";
|
||||||
|
|
||||||
//string dicrMat = "D:\\dicr_large.mat";
|
|
||||||
|
//string dicMat = "D:\\G_dc_large.mat";
|
||||||
//string wdMat = "D:\\wd_large.mat";
|
//string wdMat = "D:\\wd_large.mat";
|
||||||
|
|
||||||
MATFile* pwdMat, *pwd2Mat, *pdicrMat;
|
//MATFile* pwdMat, *pwd2Mat, *pdicMat;
|
||||||
mxArray* prhs[4];
|
//mxArray* prhs[4];
|
||||||
|
|
||||||
pwdMat = matOpen(wdMat.c_str(), "r");
|
//pwdMat = matOpen(wdMat.c_str(), "r");
|
||||||
pwd2Mat = matOpen(wd2Mat.c_str(), "r");
|
// pwd2Mat = matOpen(wd2Mat.c_str(), "r");
|
||||||
pdicrMat = matOpen(dicrMat.c_str(), "r");
|
//pdicMat = matOpen(dicMat.c_str(), "r");
|
||||||
|
|
||||||
|
|
||||||
prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
|
||||||
prhs[1] = matGetVariable(pwd2Mat, "wd2");
|
|
||||||
// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
|
// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
|
||||||
prhs[2] = matGetVariable(pdicrMat, "dicr");
|
// prhs[2] = matGetVariable(pdicrMat, "dicr");
|
||||||
|
|
||||||
mxArray* plhs = (mxArray*)malloc(sizeof(mxArray*));
|
/* IsWordInDic */
|
||||||
|
MATFile* pwdMat, * pdicMat;
|
||||||
|
mxArray* plhs[4];
|
||||||
|
const mxArray* prhs[4];
|
||||||
|
int nlhs = 2, nrhs = 2;
|
||||||
|
pwdMat = matOpen("D:\\wd_large.mat", "r");
|
||||||
|
pdicMat = matOpen("D:\\G_dc_large.mat", "r");
|
||||||
|
prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
||||||
|
prhs[1] = matGetVariable(pdicMat, "dc");
|
||||||
|
|
||||||
mxArray** arg = &prhs[0];
|
/* */
|
||||||
|
//MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
||||||
|
//MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
|
||||||
|
//prhs[0] = matGetVariable(pMatX, "x");
|
||||||
|
//prhs[1] = matGetVariable(pMatH, "h");
|
||||||
|
|
||||||
mexFunction(1, &plhs, 3, arg);
|
|
||||||
//mexFunction(0, 0, 0, 0);
|
|
||||||
|
finish = clock();
|
||||||
|
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
mexFunctionWrap(nlhs, &plhs[0], nrhs, &prhs[0]);
|
||||||
|
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "mexFunction Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "mexFunction Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
#include <mat.h>
|
#include <mat.h>
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs);
|
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs);
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]);
|
||||||
Loading…
Reference in New Issue