修改了SortDedup,可以将字符输出到文件
This commit is contained in:
parent
ca3f99cc98
commit
f96d9cf4a2
|
|
@ -283,7 +283,7 @@ void CalcEntropy(int argc, const char** argv) {
|
||||||
cout << "read abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "read abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
/* 将分割结果写入mat文件 */
|
/* 将分割结果写入mat文件 */
|
||||||
begin = clock();
|
begin = clock();
|
||||||
if (argc > 6) {
|
if (argc > 6) { // ア」エ誣s
|
||||||
MATFile* pMatFile = matOpen(argv[6], "w");
|
MATFile* pMatFile = matOpen(argv[6], "w");
|
||||||
mxArray* pCellMtx= mxCreateCellMatrix(1, vvWordMtx.size());
|
mxArray* pCellMtx= mxCreateCellMatrix(1, vvWordMtx.size());
|
||||||
for (int i = 0; i < vvWordMtx.size(); ++i) {
|
for (int i = 0; i < vvWordMtx.size(); ++i) {
|
||||||
|
|
|
||||||
|
|
@ -2,10 +2,26 @@
|
||||||
#include <mat.h>
|
#include <mat.h>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <vector>
|
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <ctime>
|
#include <ctime>
|
||||||
|
#include <vector>
|
||||||
|
#include <queue>
|
||||||
|
#include <memory>
|
||||||
|
#include <thread>
|
||||||
|
#include <mutex>
|
||||||
|
#include <condition_variable>
|
||||||
|
#include <future>
|
||||||
|
#include <functional>
|
||||||
|
#include <stdexcept>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <set>
|
||||||
|
#include <fstream>
|
||||||
|
#include <random>
|
||||||
|
#include <cmath>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <atomic>
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
@ -40,6 +56,91 @@ using namespace std;
|
||||||
dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
|
dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
|
||||||
} \
|
} \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ThreadPool {
|
||||||
|
public:
|
||||||
|
ThreadPool(size_t);
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto enqueue(F&& f, Args&&... args)
|
||||||
|
->std::future<typename std::result_of<F(Args...)>::type>;
|
||||||
|
~ThreadPool();
|
||||||
|
private:
|
||||||
|
// need to keep track of threads so we can join them
|
||||||
|
std::vector< std::thread > workers;
|
||||||
|
// the task queue
|
||||||
|
std::queue< std::function<void()> > tasks;
|
||||||
|
|
||||||
|
// synchronization
|
||||||
|
std::mutex queue_mutex;
|
||||||
|
std::condition_variable condition;
|
||||||
|
bool stop;
|
||||||
|
};
|
||||||
|
|
||||||
|
// the constructor just launches some amount of workers
|
||||||
|
inline ThreadPool::ThreadPool(size_t threads)
|
||||||
|
: stop(false)
|
||||||
|
{
|
||||||
|
for (size_t i = 0;i < threads;++i)
|
||||||
|
workers.emplace_back(
|
||||||
|
[this]
|
||||||
|
{
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
std::function<void()> task;
|
||||||
|
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(this->queue_mutex);
|
||||||
|
this->condition.wait(lock,
|
||||||
|
[this] { return this->stop || !this->tasks.empty(); });
|
||||||
|
if (this->stop && this->tasks.empty())
|
||||||
|
return;
|
||||||
|
task = std::move(this->tasks.front());
|
||||||
|
this->tasks.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
task();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// add new work item to the pool
|
||||||
|
template<class F, class... Args>
|
||||||
|
auto ThreadPool::enqueue(F && f, Args&&... args)
|
||||||
|
-> std::future<typename std::result_of<F(Args...)>::type>
|
||||||
|
{
|
||||||
|
using return_type = typename std::result_of<F(Args...)>::type;
|
||||||
|
|
||||||
|
auto task = std::make_shared< std::packaged_task<return_type()> >(
|
||||||
|
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
|
||||||
|
);
|
||||||
|
|
||||||
|
std::future<return_type> res = task->get_future();
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
|
||||||
|
// don't allow enqueueing after stopping the pool
|
||||||
|
if (stop)
|
||||||
|
throw std::runtime_error("enqueue on stopped ThreadPool");
|
||||||
|
|
||||||
|
tasks.emplace([task]() { (*task)(); });
|
||||||
|
}
|
||||||
|
condition.notify_one();
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the destructor joins all threads
|
||||||
|
inline ThreadPool::~ThreadPool()
|
||||||
|
{
|
||||||
|
{
|
||||||
|
std::unique_lock<std::mutex> lock(queue_mutex);
|
||||||
|
stop = true;
|
||||||
|
}
|
||||||
|
condition.notify_all();
|
||||||
|
for (std::thread& worker : workers)
|
||||||
|
worker.join();
|
||||||
|
}
|
||||||
|
|
||||||
// 将二维索引转成一维的索引
|
// 将二维索引转成一维的索引
|
||||||
inline int Get1DIndex(int colNum, int row, int col) {
|
inline int Get1DIndex(int colNum, int row, int col) {
|
||||||
return row * colNum + col;
|
return row * colNum + col;
|
||||||
|
|
@ -98,6 +199,79 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
|
||||||
delete[]strBuf;
|
delete[]strBuf;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// 保存由一维cell组成的字符串数组
|
||||||
|
mxArray* writeToMatString1DCell(vector<string>& vStr) {
|
||||||
|
mxArray* pCellMtx = mxCreateCellMatrix(1, vStr.size());
|
||||||
|
for (int j = 0; j < vStr.size(); ++j) {
|
||||||
|
mxArray* mxStr = mxCreateString(vStr[j].c_str());
|
||||||
|
mxSetCell(pCellMtx, j, mxStr);
|
||||||
|
}
|
||||||
|
return pCellMtx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 保存由二维cell组成的字符串数组
|
||||||
|
mxArray* writeToMatString2DCell(vector<vector<string>>& vvStr) {
|
||||||
|
mxArray* pCellMtx = mxCreateCellMatrix(1, vvStr.size());
|
||||||
|
for (int i = 0; i < vvStr.size(); ++i) {
|
||||||
|
mxArray* pChildCellMtx = writeToMatString1DCell(vvStr[i]);
|
||||||
|
mxSetCell(pCellMtx, i, pChildCellMtx);
|
||||||
|
}
|
||||||
|
return pCellMtx;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将结果写入mxArray, 作为后续的返回值
|
||||||
|
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
|
||||||
|
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
||||||
|
int len = rowNum * colNum;
|
||||||
|
//创建一个rowNum*colNum的矩阵
|
||||||
|
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
|
||||||
|
//把data的值赋给pWriteArray指针
|
||||||
|
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
|
||||||
|
return pWriteArray; // 赋值给返回值
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 多线程计算信息熵 */
|
||||||
|
struct TPEntropy {
|
||||||
|
vector<string>* pvDs;
|
||||||
|
vector<double>* pvFr;
|
||||||
|
vector<unordered_set<string>>* pvusAbsWord;
|
||||||
|
double* pHs;
|
||||||
|
};
|
||||||
|
|
||||||
|
void ThreadCalcEntropy(TPEntropy& param) {
|
||||||
|
vector<string>& vDs = *param.pvDs; // 这一组ds
|
||||||
|
vector<double>& vFr = *param.pvFr; // frequency
|
||||||
|
vector<unordered_set<string>>& vusAbsWord = *param.pvusAbsWord;
|
||||||
|
double* hs = param.pHs;
|
||||||
|
const int numAbs = vusAbsWord.size();
|
||||||
|
const int numDsWord = vDs.size(); // 这一组数据中包含的单词数量
|
||||||
|
|
||||||
|
vector<vector<int> > vX(numAbs, vector<int>(numDsWord, 0));
|
||||||
|
// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
|
||||||
|
for (int i = 0; i < numAbs; ++i) {
|
||||||
|
for (int j = 0; j < numDsWord; ++j) {
|
||||||
|
if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
|
||||||
|
vX[i][j] = 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 找词汇的最高频率
|
||||||
|
double maxFr = *max_element(vFr.begin(), vFr.end());
|
||||||
|
// 将fr的数值规范化到(0,0.368)之间
|
||||||
|
const double normalMax = 0.368;
|
||||||
|
for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
|
||||||
|
maxFr = normalMax;
|
||||||
|
// 对每个知识颗粒每一组数据,计算信息熵
|
||||||
|
for (int i = 0; i < numAbs; ++i) {
|
||||||
|
for (int j = 0; j < numDsWord; ++j) {
|
||||||
|
if (vX[i][j] == 1) {
|
||||||
|
hs[i] -= vFr[j] * log2(vFr[j]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
输入:
|
输入:
|
||||||
1. abs: 待感知的文献的摘要信息。
|
1. abs: 待感知的文献的摘要信息。
|
||||||
|
|
@ -106,23 +280,34 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
|
||||||
1. hs: 信息熵,二维[len(知识颗粒)][len(文献)]
|
1. hs: 信息熵,二维[len(知识颗粒)][len(文献)]
|
||||||
*/
|
*/
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
//cout << "MexCalcEntropy" << endl;
|
if (nrhs < 2) {
|
||||||
//cout << nlhs << '\t' << nrhs << endl;
|
cout << "At least 2 arguments should be given for this function!" << endl;
|
||||||
if (nrhs != 2) {
|
|
||||||
cout << "2 arguments should be given for this function!" << endl;
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
clock_t begin, finish;
|
clock_t begin = clock(), mid, finish;
|
||||||
begin = clock();
|
|
||||||
vector<vector<string> > vvDs; // 每个知识颗粒的ds矩阵(词汇矩阵)
|
|
||||||
vector<vector<double> > vvFr; // 词汇对应的频率
|
|
||||||
GetFrDs(prhs[1], vvDs, vvFr);
|
|
||||||
|
|
||||||
vector<string> vAbstract; // 读取abs1, 然后分割成一个一个的单词
|
vector<string> vAbstract; // 读取abs1, 然后分割成一个一个的单词
|
||||||
GetAbstract(prhs[0], vAbstract);
|
GetAbstract(prhs[0], vAbstract);
|
||||||
|
|
||||||
|
vector<vector<string>> vvDs; // 每个知识颗粒的ds矩阵(词汇矩阵)
|
||||||
|
vector<vector<double>> vvFr; // 词汇对应的频率
|
||||||
|
GetFrDs(prhs[1], vvDs, vvFr);
|
||||||
|
|
||||||
|
int numThread = 1; // 是否打印信息, 1打印简单信息,2打印详细信息
|
||||||
|
if (nrhs > 2) {
|
||||||
|
double* pData = (double*)mxGetData(prhs[2]);
|
||||||
|
numThread = (int)pData[0];
|
||||||
|
if (numThread < 1) numThread = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
int flagPrint = 0; // 是否打印信息, 1打印简单信息,2打印详细信息
|
||||||
|
if (nrhs > 3) {
|
||||||
|
double* pData = (double*)mxGetData(prhs[3]);
|
||||||
|
flagPrint = (int)pData[0];
|
||||||
|
}
|
||||||
|
finish = clock();
|
||||||
|
if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
/* 将摘要信息分割成一个一个的词汇 */
|
/* 将摘要信息分割成一个一个的词汇 */
|
||||||
// begin = clock();
|
mid = clock();
|
||||||
unordered_set<char> usWordChars; // 能组成单词的字符,要不要考虑数字?原版matlab是提取了数字的
|
unordered_set<char> usWordChars; // 能组成单词的字符,要不要考虑数字?原版matlab是提取了数字的
|
||||||
for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
|
for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
|
||||||
for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
|
for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
|
||||||
|
|
@ -154,82 +339,91 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
vusAbsWord[i].insert(upWord);
|
vusAbsWord[i].insert(upWord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// finish = clock();
|
finish = clock();
|
||||||
// cout << "Split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
if (flagPrint == 2) cout << "Split abstract time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
// 存放结果,用一维数组存放二维数据
|
// 存放结果,用一维数组存放二维数据
|
||||||
|
mid = clock();
|
||||||
vector<double> hs;
|
vector<double> hs;
|
||||||
vector<double> hr;
|
// vector<double> hr;
|
||||||
const int numLiterature = vusAbsWord.size(); // pubmed 文件中包含的文献数量
|
const int numLiterature = vusAbsWord.size(); // pubmed 文件中包含的文献数量
|
||||||
const int numGroup = vvDs.size(); // ds包含的组数
|
const int numGroup = vvDs.size(); // ds包含的组数
|
||||||
hs.resize(numGroup * numLiterature);
|
hs.resize(numGroup * numLiterature);
|
||||||
hr.resize(numLiterature * numGroup);
|
// hr.resize(numLiterature * numGroup);
|
||||||
|
// 并行, 没有计算hr
|
||||||
for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
|
ThreadPool thPool(numThread);
|
||||||
vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
|
for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) {
|
||||||
vector<double>& vFr = vvFr[groupIdx]; // frequency
|
TPEntropy tp = { &vvDs[groupIdx], &vvFr[groupIdx], &vusAbsWord, &hs[groupIdx * numLiterature] };
|
||||||
const int numWord = vDs.size(); // 这一组数据中包含的单词数量
|
thPool.enqueue(ThreadCalcEntropy, tp);
|
||||||
vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
|
|
||||||
// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
|
|
||||||
for (int i = 0; i < numLiterature; ++i) {
|
|
||||||
for (int j = 0; j < numWord; ++j) {
|
|
||||||
if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
|
|
||||||
vX[i][j] = 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 找词汇的最高频率
|
|
||||||
double maxFr = *max_element(vFr.begin(), vFr.end());
|
|
||||||
// 将fr的数值规范化到(0,0.368)之间
|
|
||||||
const double normalMax = 0.368;
|
|
||||||
for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
|
|
||||||
maxFr = normalMax;
|
|
||||||
// 对每个知识颗粒每一组数据,计算信息熵
|
|
||||||
for (int i = 0; i < numLiterature; ++i) {
|
|
||||||
for (int j = 0; j < numWord; ++j) {
|
|
||||||
if (vX[i][j] == 1) {
|
|
||||||
hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// 找最高频词汇所在的索引位置
|
|
||||||
vector<int> vMaxPos;
|
|
||||||
int idx = 0;
|
|
||||||
for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
|
|
||||||
if (val == maxFr) vMaxPos.push_back(idx);
|
|
||||||
idx++;
|
|
||||||
});
|
|
||||||
|
|
||||||
for (int i = 0; i < numLiterature; ++i) {
|
|
||||||
int cumulateX = 0; // 计算在最高频词汇处,x值的累加结果
|
|
||||||
for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
|
|
||||||
if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
|
|
||||||
hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
thPool.~ThreadPool();
|
||||||
|
|
||||||
|
// // 串行
|
||||||
|
// for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
|
||||||
|
// vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
|
||||||
|
// vector<double>& vFr = vvFr[groupIdx]; // frequency
|
||||||
|
// const int numWord = vDs.size(); // 这一组数据中包含的单词数量
|
||||||
|
// vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
|
||||||
|
// // 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
|
||||||
|
// for (int i = 0; i < numLiterature; ++i) {
|
||||||
|
// for (int j = 0; j < numWord; ++j) {
|
||||||
|
// if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
|
||||||
|
// vX[i][j] = 1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // 找词汇的最高频率
|
||||||
|
// double maxFr = *max_element(vFr.begin(), vFr.end());
|
||||||
|
// // 将fr的数值规范化到(0,0.368)之间
|
||||||
|
// const double normalMax = 0.368;
|
||||||
|
// for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
|
||||||
|
// maxFr = normalMax;
|
||||||
|
// // 对每个知识颗粒每一组数据,计算信息熵
|
||||||
|
// for (int i = 0; i < numLiterature; ++i) {
|
||||||
|
// for (int j = 0; j < numWord; ++j) {
|
||||||
|
// if (vX[i][j] == 1) {
|
||||||
|
// hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// // 找最高频词汇所在的索引位置
|
||||||
|
// vector<int> vMaxPos;
|
||||||
|
// int idx = 0;
|
||||||
|
// for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
|
||||||
|
// if (val == maxFr) vMaxPos.push_back(idx);
|
||||||
|
// idx++;
|
||||||
|
// });
|
||||||
|
//
|
||||||
|
// for (int i = 0; i < numLiterature; ++i) {
|
||||||
|
// int cumulateX = 0; // 计算在最高频词汇处,x值的累加结果
|
||||||
|
// for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
|
||||||
|
// if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
|
||||||
|
// hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
finish = clock();
|
||||||
|
if (flagPrint == 2) cout << "Calc entropy time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
/* 将结果写入返回值 */
|
/* 将结果写入返回值 */
|
||||||
|
mid = clock();
|
||||||
if (nlhs > 0) {
|
if (nlhs > 0) {
|
||||||
int datasize = numGroup * numLiterature;
|
int datasize = numGroup * numLiterature;
|
||||||
double* mtxData = new double[datasize];//待存储数据转为double格式
|
vector<double> vData(datasize);
|
||||||
mxArray* pWriteArray = NULL;//matlab格式矩阵
|
for (int i = 0; i < numGroup; i++) for (int j = 0; j < numLiterature; j++)
|
||||||
//创建一个rowNum*colNum的矩阵
|
vData[j * numGroup + i] = hs[i * numLiterature + j];
|
||||||
pWriteArray = mxCreateDoubleMatrix(numGroup, numLiterature, mxREAL);
|
plhs[0] = writeToMatDouble(vData.data(), numGroup, numLiterature);
|
||||||
for (int i = 0; i < numGroup; i++) {
|
}
|
||||||
for (int j = 0; j < numLiterature; j++) {
|
if (nlhs > 1) { // 将ws写入结果
|
||||||
mtxData[j * numGroup + i] = hs[i * numLiterature + j];
|
plhs[1] = writeToMatString2DCell(vvWordMtx);
|
||||||
}
|
|
||||||
}
|
|
||||||
//把data的值赋给pWriteArray指针
|
|
||||||
memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize);
|
|
||||||
plhs[0] = pWriteArray; // 赋值给返回值
|
|
||||||
delete[]mtxData;
|
|
||||||
}
|
}
|
||||||
finish = clock();
|
finish = clock();
|
||||||
// cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
if(flagPrint) cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 供main调试调用 */
|
/* 供main调试调用 */
|
||||||
|
|
|
||||||
|
|
@ -18,8 +18,6 @@
|
||||||
#include <functional>
|
#include <functional>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
|
||||||
// #include "CommonLib/kthread.h"
|
|
||||||
// #include "CommonLib/thread_pool.h"
|
|
||||||
using std::cout;
|
using std::cout;
|
||||||
using std::endl;
|
using std::endl;
|
||||||
using namespace std;
|
using namespace std;
|
||||||
|
|
@ -160,7 +158,14 @@ void ThreadCalcDist(TPCorDist& param) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Èë¿Úº¯Êý */
|
/* Èë¿Úº¯Êý */
|
||||||
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
/*
|
||||||
|
输入:
|
||||||
|
1. x: 二维。
|
||||||
|
[2]. numThread: 线程数。
|
||||||
|
[3]. numGroup: 每次线程函数处理的数据量。
|
||||||
|
输出:
|
||||||
|
1. d: 相关距离
|
||||||
|
*/
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
if (nrhs < 1) {
|
if (nrhs < 1) {
|
||||||
cout << "At least 1 arguments should be given for this function!" << endl;
|
cout << "At least 1 arguments should be given for this function!" << endl;
|
||||||
|
|
@ -321,4 +326,9 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Correlation Dist Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
cout << "Correlation Dist Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* 供main调试调用 */
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
}
|
}
|
||||||
|
|
@ -119,7 +119,7 @@
|
||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="AllEntropyMean.cpp" />
|
<ClCompile Include="IsWordInDic.cpp" />
|
||||||
<ClCompile Include="main.cpp" />
|
<ClCompile Include="main.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@
|
||||||
<ClCompile Include="main.cpp">
|
<ClCompile Include="main.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<ClCompile Include="AllEntropyMean.cpp">
|
<ClCompile Include="IsWordInDic.cpp">
|
||||||
<Filter>Source Files</Filter>
|
<Filter>Source Files</Filter>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
|
||||||
|
|
@ -59,32 +59,64 @@ bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
|
||||||
}
|
}
|
||||||
|
|
||||||
/* 入口函数 */
|
/* 入口函数 */
|
||||||
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
/*
|
||||||
|
输入:
|
||||||
|
1. wd: 文献摘要,由二维cell组成的字符串数组
|
||||||
|
[2]. 将字符串保存到文件路径
|
||||||
|
[3]. flagPrint 是否输出信息
|
||||||
|
输出:
|
||||||
|
1. dic: 单词组成的一维cell,包含去重之后的文献摘要所有单词,大写,按字母序排序(只包含字母的单词,去掉数字等)
|
||||||
|
*/
|
||||||
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
if (nrhs < 1) {
|
if (nrhs < 1) {
|
||||||
cout << "At least 1 arguments should be given for this function!" << endl;
|
cout << "At least 1 arguments should be given for this function!" << endl;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
clock_t begin = clock(), finish;
|
clock_t begin = clock(), mid, finish;
|
||||||
|
|
||||||
//set<string> sOrderedWord;
|
|
||||||
|
|
||||||
unordered_set<string> usStr;
|
unordered_set<string> usStr;
|
||||||
ReadInsertWord(prhs[0], usStr);
|
ReadInsertWord(prhs[0], usStr);
|
||||||
usStr.insert("A");
|
// usStr.insert("A");
|
||||||
usStr.insert("Z");
|
// usStr.insert("Z");
|
||||||
|
string outputPath;
|
||||||
|
if (nrhs > 1) {
|
||||||
|
char* strBuf = new char[STRING_BUF_SIZE];
|
||||||
|
mxGetString(prhs[1], strBuf, STRING_BUF_SIZE);
|
||||||
|
outputPath = strBuf;
|
||||||
|
delete[]strBuf;
|
||||||
|
}
|
||||||
|
|
||||||
///* ÅÅÐò */
|
int flagPrint = 0; // 是否打印信息, 1打印简单信息,2打印详细信息
|
||||||
|
if (nrhs > 2) {
|
||||||
|
double* pData = (double*)mxGetData(prhs[2]);
|
||||||
|
flagPrint = (int)pData[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
/* 排序 */
|
||||||
|
mid = clock();
|
||||||
set<string> sOrderedWord;
|
set<string> sOrderedWord;
|
||||||
for (auto& word : usStr) {
|
for (auto& word : usStr) {
|
||||||
sOrderedWord.insert(word);
|
sOrderedWord.insert(word);
|
||||||
}
|
}
|
||||||
|
finish = clock();
|
||||||
|
if (flagPrint == 2) cout << "Sort and deduplicate time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
//ofstream ofs("d:\\wd_dict.txt");
|
/* 将字符串保存到文件 */
|
||||||
//for (auto& word : sOrderedWord) ofs << word << endl;
|
if (! outputPath.empty()) {
|
||||||
//ofs.close();
|
cout << outputPath << endl;
|
||||||
|
ofstream ofs(outputPath);
|
||||||
|
for (auto& word : sOrderedWord) ofs << word << endl;
|
||||||
|
ofs.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
sOrderedWord.insert("A");
|
||||||
|
sOrderedWord.insert("Z");
|
||||||
|
|
||||||
/* 写入结果 */
|
/* 写入结果 */
|
||||||
|
mid = clock();
|
||||||
if (nlhs > 0) {
|
if (nlhs > 0) {
|
||||||
int wordSize = 0;
|
int wordSize = 0;
|
||||||
for (auto& word : sOrderedWord) {
|
for (auto& word : sOrderedWord) {
|
||||||
|
|
@ -98,12 +130,18 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
if (word[0] >= 'A' && word[0] <= 'Z') {
|
if (word[0] >= 'A' && word[0] <= 'Z') {
|
||||||
mxArray* mxStr = mxCreateString(word.c_str());
|
mxArray* mxStr = mxCreateString(word.c_str());
|
||||||
mxSetCell(pCell, i++, mxStr);
|
mxSetCell(pCell, i++, mxStr);
|
||||||
//ofs << word << endl;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
plhs[0] = pCell; // 赋值给返回值
|
plhs[0] = pCell; // 赋值给返回值
|
||||||
}
|
}
|
||||||
//ofs.close();
|
|
||||||
finish = clock();
|
finish = clock();
|
||||||
cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
|
||||||
|
finish = clock();
|
||||||
|
if (flagPrint)cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
||||||
|
}
|
||||||
|
|
||||||
|
// 供c++调试用
|
||||||
|
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
||||||
|
return mexFunction(nlhs, plhs, nrhs, prhs);
|
||||||
}
|
}
|
||||||
|
|
@ -7,45 +7,42 @@ using namespace std;
|
||||||
|
|
||||||
int main(int argc, const char** argv)
|
int main(int argc, const char** argv)
|
||||||
{
|
{
|
||||||
|
|
||||||
//string matFile = "D:\\x_large.mat";
|
|
||||||
//string matFile = "D:\\x.mat";
|
|
||||||
//string matFile = "D:\\Twirls\\wd_small.mat";
|
|
||||||
//string matFile = "D:\\Twirls\\wd.mat";
|
|
||||||
clock_t begin = clock(), finish;
|
clock_t begin = clock(), finish;
|
||||||
//string wd2Mat = "D:\\wd2_5w.mat";
|
const int argReserveNum = 10;
|
||||||
//string dicrMat = "D:\\dicr.mat";
|
mxArray* plhs[argReserveNum];
|
||||||
//string wdMat = "D:\\wd.mat";
|
const mxArray* prhs[argReserveNum];
|
||||||
|
|
||||||
|
/* SortDedup */
|
||||||
|
int nlhs = 1, nrhs = 2;
|
||||||
|
MATFile* pwdMat = matOpen("D:\\tmp\\wd_small.mat", "r");
|
||||||
|
prhs[0] = matGetVariable(pwdMat, "wd");
|
||||||
|
prhs[1] = mxCreateString("D:\\Twirls\\runtime\\output_1.dat");
|
||||||
|
prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
*mxGetPr(prhs[2]) = 2;
|
||||||
|
|
||||||
//string dicMat = "D:\\G_dc_large.mat";
|
/* CalcEntropy */
|
||||||
//string wdMat = "D:\\wd_large.mat";
|
// int nlhs = 2, nrhs = 4;
|
||||||
|
// MATFile* pMatAbs = matOpen("D:\\tmp\\abs_189.mat", "r");
|
||||||
//MATFile* pwdMat, *pwd2Mat, *pdicMat;
|
// MATFile* pMatG = matOpen("D:\\tmp\\G_189.mat", "r");
|
||||||
//mxArray* prhs[4];
|
// prhs[0] = matGetVariable(pMatAbs, "abs");
|
||||||
|
// prhs[1] = matGetVariable(pMatG, "G");
|
||||||
//pwdMat = matOpen(wdMat.c_str(), "r");
|
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
// pwd2Mat = matOpen(wd2Mat.c_str(), "r");
|
// *mxGetPr(prhs[2]) = 12;
|
||||||
//pdicMat = matOpen(dicMat.c_str(), "r");
|
// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
|
// *mxGetPr(prhs[3]) = 2;
|
||||||
// prhs[2] = matGetVariable(pdicrMat, "dicr");
|
|
||||||
|
|
||||||
/* IsWordInDic */
|
/* IsWordInDic */
|
||||||
// MATFile* pwdMat, * pdicMat;
|
// MATFile* pwdMat, * pdicMat;
|
||||||
// mxArray* plhs[4];
|
|
||||||
// const mxArray* prhs[4];
|
|
||||||
// int nlhs = 2, nrhs = 2;
|
// int nlhs = 2, nrhs = 2;
|
||||||
// pwdMat = matOpen("D:\\wd_large.mat", "r");
|
// pwdMat = matOpen("D:\\tmp\\wd_large.mat", "r");
|
||||||
// pdicMat = matOpen("D:\\G_dc_large.mat", "r");
|
// pdicMat = matOpen("D:\\tmp\\G_dc_large.mat", "r");
|
||||||
// prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
// prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
|
||||||
// prhs[1] = matGetVariable(pdicMat, "dc");
|
// prhs[1] = matGetVariable(pdicMat, "dc");
|
||||||
|
|
||||||
/* ClusterRandSim */
|
/* ClusterRandSim */
|
||||||
// mxArray* plhs[4];
|
|
||||||
// const mxArray* prhs[4];
|
|
||||||
// int nlhs = 2, nrhs = 4;
|
// int nlhs = 2, nrhs = 4;
|
||||||
// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
|
||||||
// MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
|
// MATFile* pMatH = matOpen("D:\\tmp\\h_large.mat", "r");
|
||||||
// prhs[0] = matGetVariable(pMatX, "x");
|
// prhs[0] = matGetVariable(pMatX, "x");
|
||||||
// prhs[1] = matGetVariable(pMatH, "h3");
|
// prhs[1] = matGetVariable(pMatH, "h3");
|
||||||
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
|
@ -55,11 +52,9 @@ int main(int argc, const char** argv)
|
||||||
|
|
||||||
|
|
||||||
/* AllClusterRandSim */
|
/* AllClusterRandSim */
|
||||||
// mxArray* plhs[4];
|
|
||||||
// const mxArray* prhs[4];
|
|
||||||
// int nlhs = 2, nrhs = 4;
|
// int nlhs = 2, nrhs = 4;
|
||||||
// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
|
// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
|
||||||
// MATFile* pMatIx = matOpen("D:\\ix_large.mat", "r");
|
// MATFile* pMatIx = matOpen("D:\\tmp\\ix_large.mat", "r");
|
||||||
// prhs[0] = matGetVariable(pMatX, "x");
|
// prhs[0] = matGetVariable(pMatX, "x");
|
||||||
// prhs[1] = matGetVariable(pMatIx, "ix");
|
// prhs[1] = matGetVariable(pMatIx, "ix");
|
||||||
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
|
|
@ -68,19 +63,15 @@ int main(int argc, const char** argv)
|
||||||
// *mxGetPr(prhs[3]) = 10000;
|
// *mxGetPr(prhs[3]) = 10000;
|
||||||
|
|
||||||
/* AllEntropyMean */
|
/* AllEntropyMean */
|
||||||
mxArray* plhs[4];
|
// int nlhs = 2, nrhs = 4;
|
||||||
const mxArray* prhs[4];
|
// MATFile* pMatG = matOpen("D:\\tmp\\G_large.mat", "r");
|
||||||
int nlhs = 2, nrhs = 4;
|
// MATFile* pMatWs = matOpen("D:\\tmp\\ws_large.mat", "r");
|
||||||
MATFile* pMatG = matOpen("D:\\G_large.mat", "r");
|
// mxArray* pMxG = matGetVariable(pMatG, "G");
|
||||||
MATFile* pMatWs = matOpen("D:\\ws_large.mat", "r");
|
// prhs[0] = mxGetField(pMxG, 0, "ds");
|
||||||
mxArray* pMxG = matGetVariable(pMatG, "G");
|
// prhs[1] = mxGetField(pMxG, 0, "frr");
|
||||||
prhs[0] = mxGetField(pMxG, 0, "ds");
|
// prhs[2] = matGetVariable(pMatWs, "ws");
|
||||||
prhs[1] = mxGetField(pMxG, 0, "frr");
|
// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
||||||
prhs[2] = matGetVariable(pMatWs, "ws");
|
// *mxGetPr(prhs[3]) = 12;
|
||||||
prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
|
|
||||||
*mxGetPr(prhs[3]) = 12;
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue