修改了SortDedup，可以将字符输出到文件

2023-10-13 15:46:15 +08:00 · 2023-10-13 15:46:15 +08:00 · f96d9cf4a2
parent ca3f99cc98
commit f96d9cf4a2
7 changed files with 371 additions and 138 deletions
--- a/CppRun/calc_entropy.cpp
+++ b/CppRun/calc_entropy.cpp
@ -283,7 +283,7 @@ void CalcEntropy(int argc, const char** argv) {
 	cout << "read abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 	/* 将分割结果写入mat文件 */
 	begin = clock();
-	if (argc > 6) {
+	if (argc > 6) { // ｱ｣ｴ誣s
 		MATFile* pMatFile = matOpen(argv[6], "w");
 		mxArray* pCellMtx= mxCreateCellMatrix(1, vvWordMtx.size());
 		for (int i = 0; i < vvWordMtx.size(); ++i) {
--- a/MexFunc/CalcEntropy.cpp
+++ b/MexFunc/CalcEntropy.cpp
@ -2,10 +2,26 @@
 #include <mat.h>
 #include <iostream>
 #include <algorithm>
 #include <vector>
 #include <string>
 #include <unordered_set>
 #include <ctime>
 #include <vector>
 #include <queue>
 #include <memory>
 #include <thread>
 #include <mutex>
 #include <condition_variable>
 #include <future>
 #include <functional>
 #include <stdexcept>
 #include <unordered_map>
 #include <set>
 #include <fstream>
 #include <random>
 #include <cmath>
 #include <stdlib.h>
 #include <limits.h>
 #include <atomic>
 using std::cout;
 using std::endl;
 using namespace std;
@ -40,6 +56,91 @@ using namespace std;
 			dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI];	\
 		}                                                           \
 	}
 class ThreadPool {
 public:
 	ThreadPool(size_t);
 	template<class F, class... Args>
 	auto enqueue(F&& f, Args&&... args)
 		->std::future<typename std::result_of<F(Args...)>::type>;
 	~ThreadPool();
 private:
 	// need to keep track of threads so we can join them
 	std::vector< std::thread > workers;
 	// the task queue
 	std::queue< std::function<void()> > tasks;
 	// synchronization
 	std::mutex queue_mutex;
 	std::condition_variable condition;
 	bool stop;
 };
 // the constructor just launches some amount of workers
 inline ThreadPool::ThreadPool(size_t threads)
 	: stop(false)
 {
 	for (size_t i = 0;i < threads;++i)
 		workers.emplace_back(
 			[this]
 			{
 				for (;;)
 				{
 					std::function<void()> task;
 					{
 						std::unique_lock<std::mutex> lock(this->queue_mutex);
 						this->condition.wait(lock,
 							[this] { return this->stop || !this->tasks.empty(); });
 						if (this->stop && this->tasks.empty())
 							return;
 						task = std::move(this->tasks.front());
 						this->tasks.pop();
 					}
 					task();
 				}
 			}
 			);
 }
 // add new work item to the pool
 template<class F, class... Args>
 auto ThreadPool::enqueue(F && f, Args&&... args)
 -> std::future<typename std::result_of<F(Args...)>::type>
 {
 	using return_type = typename std::result_of<F(Args...)>::type;
 	auto task = std::make_shared< std::packaged_task<return_type()> >(
 		std::bind(std::forward<F>(f), std::forward<Args>(args)...)
 		);
 	std::future<return_type> res = task->get_future();
 	{
 		std::unique_lock<std::mutex> lock(queue_mutex);
 		// don't allow enqueueing after stopping the pool
 		if (stop)
 			throw std::runtime_error("enqueue on stopped ThreadPool");
 		tasks.emplace([task]() { (*task)(); });
 	}
 	condition.notify_one();
 	return res;
 }
 // the destructor joins all threads
 inline ThreadPool::~ThreadPool()
 {
 	{
 		std::unique_lock<std::mutex> lock(queue_mutex);
 		stop = true;
 	}
 	condition.notify_all();
 	for (std::thread& worker : workers)
 		worker.join();
 }
 // 将二维索引转成一维的索引
 inline int Get1DIndex(int colNum, int row, int col) {
 	return row * colNum + col;
@ -98,6 +199,79 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
 	delete[]strBuf;
 }
 // 保存由一维cell组成的字符串数组
 mxArray* writeToMatString1DCell(vector<string>& vStr) {
 	mxArray* pCellMtx = mxCreateCellMatrix(1, vStr.size());
 	for (int j = 0; j < vStr.size(); ++j) {
 		mxArray* mxStr = mxCreateString(vStr[j].c_str());
 		mxSetCell(pCellMtx, j, mxStr);
 	}
 	return pCellMtx;
 }
 // 保存由二维cell组成的字符串数组
 mxArray* writeToMatString2DCell(vector<vector<string>>& vvStr) {
 	mxArray* pCellMtx = mxCreateCellMatrix(1, vvStr.size());
 	for (int i = 0; i < vvStr.size(); ++i) {
 		mxArray* pChildCellMtx = writeToMatString1DCell(vvStr[i]);
 		mxSetCell(pCellMtx, i, pChildCellMtx);
 	}
 	return pCellMtx;
 }
 // 将结果写入mxArray, 作为后续的返回值
 mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
 	mxArray* pWriteArray = NULL;//matlab格式矩阵
 	int len = rowNum * colNum;
 	//创建一个rowNum*colNum的矩阵  
 	pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
 	//把data的值赋给pWriteArray指针
 	memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
 	return pWriteArray; // 赋值给返回值
 }
 /* 多线程计算信息熵 */
 struct TPEntropy {
 	vector<string>* pvDs;
 	vector<double>* pvFr;
 	vector<unordered_set<string>>* pvusAbsWord;
 	double* pHs;
 };
 void ThreadCalcEntropy(TPEntropy& param) {
 	vector<string>& vDs = *param.pvDs; // 这一组ds
 	vector<double>& vFr = *param.pvFr; // frequency
 	vector<unordered_set<string>>& vusAbsWord = *param.pvusAbsWord;
 	double* hs = param.pHs;
 	const int numAbs = vusAbsWord.size();
 	const int numDsWord = vDs.size(); // 这一组数据中包含的单词数量
 	vector<vector<int> > vX(numAbs, vector<int>(numDsWord, 0));
 	// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
 	for (int i = 0; i < numAbs; ++i) {
 		for (int j = 0; j < numDsWord; ++j) {
 			if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
 				vX[i][j] = 1;
 			}
 		}
 	}
 	// 找词汇的最高频率
 	double maxFr = *max_element(vFr.begin(), vFr.end());
 	// 将fr的数值规范化到（0，0.368）之间
 	const double normalMax = 0.368;
 	for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
 	maxFr = normalMax;
 	// 对每个知识颗粒每一组数据，计算信息熵
 	for (int i = 0; i < numAbs; ++i) {
 		for (int j = 0; j < numDsWord; ++j) {
 			if (vX[i][j] == 1) {
 				hs[i] -= vFr[j] * log2(vFr[j]);
 			}
 		}
 	}
 }
 /*
 输入：
 1. abs: 待感知的文献的摘要信息。
@ -106,23 +280,34 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
 1. hs: 信息熵，二维[len(知识颗粒)][len(文献)]
 */
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
-	//cout << "MexCalcEntropy" << endl;
+	if (nrhs < 2) {
-	//cout << nlhs << '\t' << nrhs << endl;
+		cout << "At least 2 arguments should be given for this function!" << endl;
 	if (nrhs != 2) {
 		cout << "2 arguments should be given for this function!" << endl;
 		return;
 	}
-	clock_t begin, finish;
+	clock_t begin = clock(), mid, finish;
 	begin = clock();
 	vector<vector<string> > vvDs; // 每个知识颗粒的ds矩阵（词汇矩阵）
 	vector<vector<double> > vvFr; // 词汇对应的频率
 	GetFrDs(prhs[1], vvDs, vvFr);
 	vector<string> vAbstract; // 读取abs1, 然后分割成一个一个的单词
 	GetAbstract(prhs[0], vAbstract);
 	vector<vector<string>> vvDs; // 每个知识颗粒的ds矩阵（词汇矩阵）
 	vector<vector<double>> vvFr; // 词汇对应的频率
 	GetFrDs(prhs[1], vvDs, vvFr);
 	int numThread = 1; // 是否打印信息, 1打印简单信息，2打印详细信息
 	if (nrhs > 2) {
 		double* pData = (double*)mxGetData(prhs[2]);
 		numThread = (int)pData[0];
 		if (numThread < 1) numThread = 1;
 	}
 	int flagPrint = 0; // 是否打印信息, 1打印简单信息，2打印详细信息
 	if (nrhs > 3) {
 		double* pData = (double*)mxGetData(prhs[3]);
 		flagPrint = (int)pData[0];
 	}
 	finish = clock();
 	if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 	/* 将摘要信息分割成一个一个的词汇 */
-	// begin = clock();
+	mid = clock();
 	unordered_set<char> usWordChars; // 能组成单词的字符，要不要考虑数字？原版matlab是提取了数字的
 	for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
 	for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
@ -154,82 +339,91 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 			vusAbsWord[i].insert(upWord);
 		}
 	}
-	// finish = clock();
+	finish = clock();
-	// cout << "Split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	if (flagPrint == 2) cout << "Split abstract time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
 	// 存放结果，用一维数组存放二维数据
 	mid = clock();
 	vector<double> hs;
-	vector<double> hr;
+	// vector<double> hr;
 	const int numLiterature = vusAbsWord.size(); // pubmed 文件中包含的文献数量
 	const int numGroup = vvDs.size(); // ds包含的组数
 	hs.resize(numGroup * numLiterature);
-	hr.resize(numLiterature * numGroup);
+	// hr.resize(numLiterature * numGroup);
-
+	// 并行, 没有计算hr
-	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
+	ThreadPool thPool(numThread);
-		vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
+	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) {
-		vector<double>& vFr = vvFr[groupIdx]; // frequency
+		TPEntropy tp = { &vvDs[groupIdx], &vvFr[groupIdx], &vusAbsWord, &hs[groupIdx * numLiterature] };
-		const int numWord = vDs.size(); // 这一组数据中包含的单词数量
+		thPool.enqueue(ThreadCalcEntropy, tp);
 		vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
 		// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
 		for (int i = 0; i < numLiterature; ++i) {
 			for (int j = 0; j < numWord; ++j) {
 				if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
 					vX[i][j] = 1;
 				}
 			}
 		}
 		// 找词汇的最高频率
 		double maxFr = *max_element(vFr.begin(), vFr.end());
 		// 将fr的数值规范化到（0，0.368）之间
 		const double normalMax = 0.368;
 		for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
 		maxFr = normalMax;
 		// 对每个知识颗粒每一组数据，计算信息熵
 		for (int i = 0; i < numLiterature; ++i) {
 			for (int j = 0; j < numWord; ++j) {
 				if (vX[i][j] == 1) {
 					hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
 				}
 			}
 		}
 		// 找最高频词汇所在的索引位置
 		vector<int> vMaxPos;
 		int idx = 0;
 		for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
 			if (val == maxFr) vMaxPos.push_back(idx);
 			idx++;
 			});
 		for (int i = 0; i < numLiterature; ++i) {
 			int cumulateX = 0; // 计算在最高频词汇处，x值的累加结果
 			for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
 			if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
 				hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
 			}
 		}
 	}
 	thPool.~ThreadPool();
 //  // 串行
 //	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
 //		vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
 //		vector<double>& vFr = vvFr[groupIdx]; // frequency
 //		const int numWord = vDs.size(); // 这一组数据中包含的单词数量
 //		vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
 //		// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
 //		for (int i = 0; i < numLiterature; ++i) {
 //			for (int j = 0; j < numWord; ++j) {
 //				if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
 //					vX[i][j] = 1;
 //				}
 //			}
 //		}
 //
 //		// 找词汇的最高频率
 //		double maxFr = *max_element(vFr.begin(), vFr.end());
 //		// 将fr的数值规范化到（0，0.368）之间
 //		const double normalMax = 0.368;
 //		for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
 //		maxFr = normalMax;
 //		// 对每个知识颗粒每一组数据，计算信息熵
 //		for (int i = 0; i < numLiterature; ++i) {
 //			for (int j = 0; j < numWord; ++j) {
 //				if (vX[i][j] == 1) {
 //					hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
 //				}
 //			}
 //		}
 //
 //		// 找最高频词汇所在的索引位置
 //		vector<int> vMaxPos;
 //		int idx = 0;
 //		for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
 //			if (val == maxFr) vMaxPos.push_back(idx);
 //			idx++;
 //			});
 //
 //		for (int i = 0; i < numLiterature; ++i) {
 //			int cumulateX = 0; // 计算在最高频词汇处，x值的累加结果
 //			for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
 //			if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
 //				hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
 //			}
 //		}
 //	}
 	finish = clock();
 	if (flagPrint == 2) cout << "Calc entropy time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
 	/* 将结果写入返回值 */
 	mid = clock();
 	if (nlhs > 0) {
 		int datasize = numGroup * numLiterature;
-		double* mtxData = new double[datasize];//待存储数据转为double格式
+		vector<double> vData(datasize);
-		mxArray* pWriteArray = NULL;//matlab格式矩阵
+		for (int i = 0; i < numGroup; i++) for (int j = 0; j < numLiterature; j++) 
-		//创建一个rowNum*colNum的矩阵  
+			vData[j * numGroup + i] = hs[i * numLiterature + j];
-		pWriteArray = mxCreateDoubleMatrix(numGroup, numLiterature, mxREAL);
+		plhs[0] = writeToMatDouble(vData.data(), numGroup, numLiterature);
-		for (int i = 0; i < numGroup; i++) {
+	}
-			for (int j = 0; j < numLiterature; j++) {
+	if (nlhs > 1) { // 将ws写入结果
-				mtxData[j * numGroup + i] = hs[i * numLiterature + j];
+		plhs[1] = writeToMatString2DCell(vvWordMtx);
 			}
 		}
 		//把data的值赋给pWriteArray指针
 		memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize);
 		plhs[0] = pWriteArray; // 赋值给返回值
 		delete[]mtxData;
 	}
 	finish = clock();
-	// cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
 	finish = clock();
 	if(flagPrint) cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 }
 /* 供main调试调用 */
--- a/MexFunc/CorrelationDist.cpp
+++ b/MexFunc/CorrelationDist.cpp
@ -18,8 +18,6 @@
 #include <functional>
 #include <stdexcept>
 // #include "CommonLib/kthread.h"
 // #include "CommonLib/thread_pool.h"
 using std::cout;
 using std::endl;
 using namespace std;
@ -160,7 +158,14 @@ void ThreadCalcDist(TPCorDist& param) {
 }
 /* Èë¿Úº¯Êý */
-// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
+/*
 输入：
 1. x: 二维。
 [2]. numThread: 线程数。
 [3]. numGroup: 每次线程函数处理的数据量。
 输出：
 1. d: 相关距离
 */
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	if (nrhs < 1) {
 		cout << "At least 1 arguments should be given for this function!" << endl;
@ -321,4 +326,9 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	finish = clock();
 	cout << "Correlation Dist Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 }
 /* 供main调试调用 */
 void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	mexFunction(nlhs, plhs, nrhs, prhs);
 }
--- a/MexFunc/MexFunc.vcxproj
+++ b/MexFunc/MexFunc.vcxproj
@ -119,7 +119,7 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
-    <ClCompile Include="AllEntropyMean.cpp" />
+    <ClCompile Include="IsWordInDic.cpp" />
    <ClCompile Include="main.cpp" />
  </ItemGroup>
  <ItemGroup>
--- a/MexFunc/MexFunc.vcxproj.filters
+++ b/MexFunc/MexFunc.vcxproj.filters
@ -18,7 +18,7 @@
    <ClCompile Include="main.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="AllEntropyMean.cpp">
+    <ClCompile Include="IsWordInDic.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
--- a/MexFunc/SortDedup.cpp
+++ b/MexFunc/SortDedup.cpp
@ -59,32 +59,64 @@ bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
 }
 /* 入口函数 */
-// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
+/*
 输入：
 1. wd: 文献摘要，由二维cell组成的字符串数组
 [2]. 将字符串保存到文件路径
 [3]. flagPrint 是否输出信息
 输出：
 1. dic: 单词组成的一维cell，包含去重之后的文献摘要所有单词，大写，按字母序排序(只包含字母的单词，去掉数字等)
 */
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	if (nrhs < 1) {
 		cout << "At least 1 arguments should be given for this function!" << endl;
 		return;
 	}
-	clock_t begin = clock(), finish;
+	clock_t begin = clock(), mid, finish;
 	//set<string> sOrderedWord;
 	unordered_set<string> usStr;
 	ReadInsertWord(prhs[0], usStr);
-	usStr.insert("A");
+	// usStr.insert("A");
-	usStr.insert("Z");
+	// usStr.insert("Z");
 	string outputPath;
 	if (nrhs > 1) {
 		char* strBuf = new char[STRING_BUF_SIZE];
 		mxGetString(prhs[1], strBuf, STRING_BUF_SIZE);
 		outputPath = strBuf;
 		delete[]strBuf;
 	}
-	///* ÅÅÐò */
+	int flagPrint = 0; // 是否打印信息, 1打印简单信息，2打印详细信息
 	if (nrhs > 2) {
 		double* pData = (double*)mxGetData(prhs[2]);
 		flagPrint = (int)pData[0];
 	}
 	finish = clock();
 	if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 	/* 排序 */
 	mid = clock();
 	set<string> sOrderedWord;
 	for (auto& word : usStr) {
 		sOrderedWord.insert(word);
 	}
 	finish = clock();
 	if (flagPrint == 2) cout << "Sort and deduplicate time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
-	//ofstream ofs("d:\\wd_dict.txt");
+	/* 将字符串保存到文件 */
-	//for (auto& word : sOrderedWord) ofs << word << endl;
+	if (! outputPath.empty()) {
-	//ofs.close();
+		cout << outputPath << endl;
 		ofstream ofs(outputPath);
 		for (auto& word : sOrderedWord) ofs << word << endl;
 		ofs.close();
 	}
 	sOrderedWord.insert("A");
 	sOrderedWord.insert("Z");
 	/* 写入结果 */
 	mid = clock();
 	if (nlhs > 0) {
 		int wordSize = 0;
 		for (auto& word : sOrderedWord) {
@ -98,12 +130,18 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 			if (word[0] >= 'A' && word[0] <= 'Z') {
 				mxArray* mxStr = mxCreateString(word.c_str());
 				mxSetCell(pCell, i++, mxStr);
 				//ofs << word << endl;
 			}
 		}
 		plhs[0] = pCell; // 赋值给返回值
 	}
 	//ofs.close();
 	finish = clock();
-	cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
 	finish = clock();
 	if (flagPrint)cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 }
 // 供c++调试用
 void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	return mexFunction(nlhs, plhs, nrhs, prhs);
 }
--- a/MexFunc/main.cpp
+++ b/MexFunc/main.cpp
@ -7,45 +7,42 @@ using namespace std;
 int main(int argc, const char** argv)
 {
    //string matFile = "D:\\x_large.mat";
 	//string matFile = "D:\\x.mat";
 	//string matFile = "D:\\Twirls\\wd_small.mat";
 	//string matFile = "D:\\Twirls\\wd.mat";
 	clock_t begin = clock(), finish;
-	//string wd2Mat = "D:\\wd2_5w.mat";
+	const int argReserveNum = 10;
-	//string dicrMat = "D:\\dicr.mat";
+	mxArray* plhs[argReserveNum];
-	//string wdMat = "D:\\wd.mat";
+	const mxArray* prhs[argReserveNum];
 	/* SortDedup */
 	int nlhs = 1, nrhs = 2;
 	MATFile* pwdMat = matOpen("D:\\tmp\\wd_small.mat", "r");
 	prhs[0] = matGetVariable(pwdMat, "wd");
 	prhs[1] = mxCreateString("D:\\Twirls\\runtime\\output_1.dat");
 	prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
 	*mxGetPr(prhs[2]) = 2;
-	//string dicMat = "D:\\G_dc_large.mat";
+	/* CalcEntropy */
-	//string wdMat = "D:\\wd_large.mat";
+	// int nlhs = 2, nrhs = 4;
-
+	// MATFile* pMatAbs = matOpen("D:\\tmp\\abs_189.mat", "r");
-	//MATFile* pwdMat, *pwd2Mat, *pdicMat;
+	// MATFile* pMatG = matOpen("D:\\tmp\\G_189.mat", "r");
-	//mxArray* prhs[4];
+	// prhs[0] = matGetVariable(pMatAbs, "abs");
-
+	// prhs[1] = matGetVariable(pMatG, "G");
-	//pwdMat = matOpen(wdMat.c_str(), "r");
+	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
-	// pwd2Mat = matOpen(wd2Mat.c_str(), "r");
+	// *mxGetPr(prhs[2]) = 12;
-	//pdicMat = matOpen(dicMat.c_str(), "r");
+	// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
-	// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
+	// *mxGetPr(prhs[3]) = 2;
 	// 	prhs[2] = matGetVariable(pdicrMat, "dicr");
 	/* IsWordInDic  */
 	// MATFile* pwdMat, * pdicMat;
 	// mxArray* plhs[4];
 	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 2;
-	// pwdMat = matOpen("D:\\wd_large.mat", "r");
+	// pwdMat = matOpen("D:\\tmp\\wd_large.mat", "r");
-	// pdicMat = matOpen("D:\\G_dc_large.mat", "r");
+	// pdicMat = matOpen("D:\\tmp\\G_dc_large.mat", "r");
 	// prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
 	// prhs[1] = matGetVariable(pdicMat, "dc");
 	/* ClusterRandSim */
 	// mxArray* plhs[4];
 	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 4;
-	// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
+	// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
-	// MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
+	// MATFile* pMatH = matOpen("D:\\tmp\\h_large.mat", "r");
 	// prhs[0] = matGetVariable(pMatX, "x");
 	// prhs[1] = matGetVariable(pMatH, "h3");
 	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
@ -55,11 +52,9 @@ int main(int argc, const char** argv)
 	/* AllClusterRandSim */
 	// mxArray* plhs[4];
 	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 4;
-	// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
+	// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
-	// MATFile* pMatIx = matOpen("D:\\ix_large.mat", "r");
+	// MATFile* pMatIx = matOpen("D:\\tmp\\ix_large.mat", "r");
 	// prhs[0] = matGetVariable(pMatX, "x");
 	// prhs[1] = matGetVariable(pMatIx, "ix");
 	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
@ -68,19 +63,15 @@ int main(int argc, const char** argv)
 	// *mxGetPr(prhs[3]) = 10000;
 	/* AllEntropyMean */
-	mxArray* plhs[4];
+	// int nlhs = 2, nrhs = 4;
-	const mxArray* prhs[4];
+	// MATFile* pMatG = matOpen("D:\\tmp\\G_large.mat", "r");
-	int nlhs = 2, nrhs = 4;
+	// MATFile* pMatWs = matOpen("D:\\tmp\\ws_large.mat", "r");
-	MATFile* pMatG = matOpen("D:\\G_large.mat", "r");
+	// mxArray* pMxG = matGetVariable(pMatG, "G");
-	MATFile* pMatWs = matOpen("D:\\ws_large.mat", "r");
+	// prhs[0] = mxGetField(pMxG, 0, "ds");
-	mxArray* pMxG = matGetVariable(pMatG, "G");
+	// prhs[1] = mxGetField(pMxG, 0, "frr");
-	prhs[0] = mxGetField(pMxG, 0, "ds");
+	// prhs[2] = matGetVariable(pMatWs, "ws");
-	prhs[1] = mxGetField(pMxG, 0, "frr");
+	// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
-	prhs[2] = matGetVariable(pMatWs, "ws");
+	// *mxGetPr(prhs[3]) = 12;
 	prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
 	*mxGetPr(prhs[3]) = 12;