修改了SortDedup，可以将字符输出到文件

2023-10-13 15:46:15 +08:00 · 2023-10-13 15:46:15 +08:00 · f96d9cf4a2
parent ca3f99cc98
commit f96d9cf4a2
7 changed files with 371 additions and 138 deletions
--- a/CppRun/calc_entropy.cpp
+++ b/CppRun/calc_entropy.cpp
@ -283,7 +283,7 @@ void CalcEntropy(int argc, const char** argv) {
 	cout << "read abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 	/* 将分割结果写入mat文件 */
 	begin = clock();
-	if (argc > 6) {
+	if (argc > 6) { // ｱ｣ｴ誣s
 		MATFile* pMatFile = matOpen(argv[6], "w");
 		mxArray* pCellMtx= mxCreateCellMatrix(1, vvWordMtx.size());
 		for (int i = 0; i < vvWordMtx.size(); ++i) {
--- a/MexFunc/CalcEntropy.cpp
+++ b/MexFunc/CalcEntropy.cpp
@ -2,10 +2,26 @@
 #include <mat.h>
 #include <iostream>
 #include <algorithm>
-#include <vector>
 #include <string>
 #include <unordered_set>
 #include <ctime>
+#include <vector>
+#include <queue>
+#include <memory>
+#include <thread>
+#include <mutex>
+#include <condition_variable>
+#include <future>
+#include <functional>
+#include <stdexcept>
+#include <unordered_map>
+#include <set>
+#include <fstream>
+#include <random>
+#include <cmath>
+#include <stdlib.h>
+#include <limits.h>
+#include <atomic>
 using std::cout;
 using std::endl;
 using namespace std;
@ -40,6 +56,91 @@ using namespace std;
 			dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI];	\
 		}                                                           \
 	}
+
+class ThreadPool {
+public:
+	ThreadPool(size_t);
+	template<class F, class... Args>
+	auto enqueue(F&& f, Args&&... args)
+		->std::future<typename std::result_of<F(Args...)>::type>;
+	~ThreadPool();
+private:
+	// need to keep track of threads so we can join them
+	std::vector< std::thread > workers;
+	// the task queue
+	std::queue< std::function<void()> > tasks;
+
+	// synchronization
+	std::mutex queue_mutex;
+	std::condition_variable condition;
+	bool stop;
+};
+
+// the constructor just launches some amount of workers
+inline ThreadPool::ThreadPool(size_t threads)
+	: stop(false)
+{
+	for (size_t i = 0;i < threads;++i)
+		workers.emplace_back(
+			[this]
+			{
+				for (;;)
+				{
+					std::function<void()> task;
+
+					{
+						std::unique_lock<std::mutex> lock(this->queue_mutex);
+						this->condition.wait(lock,
+							[this] { return this->stop || !this->tasks.empty(); });
+						if (this->stop && this->tasks.empty())
+							return;
+						task = std::move(this->tasks.front());
+						this->tasks.pop();
+					}
+
+					task();
+				}
+			}
+			);
+}
+
+// add new work item to the pool
+template<class F, class... Args>
+auto ThreadPool::enqueue(F && f, Args&&... args)
+-> std::future<typename std::result_of<F(Args...)>::type>
+{
+	using return_type = typename std::result_of<F(Args...)>::type;
+
+	auto task = std::make_shared< std::packaged_task<return_type()> >(
+		std::bind(std::forward<F>(f), std::forward<Args>(args)...)
+		);
+
+	std::future<return_type> res = task->get_future();
+	{
+		std::unique_lock<std::mutex> lock(queue_mutex);
+
+		// don't allow enqueueing after stopping the pool
+		if (stop)
+			throw std::runtime_error("enqueue on stopped ThreadPool");
+
+		tasks.emplace([task]() { (*task)(); });
+	}
+	condition.notify_one();
+	return res;
+}
+
+// the destructor joins all threads
+inline ThreadPool::~ThreadPool()
+{
+	{
+		std::unique_lock<std::mutex> lock(queue_mutex);
+		stop = true;
+	}
+	condition.notify_all();
+	for (std::thread& worker : workers)
+		worker.join();
+}
+
 // 将二维索引转成一维的索引
 inline int Get1DIndex(int colNum, int row, int col) {
 	return row * colNum + col;
@ -98,6 +199,79 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
 	delete[]strBuf;
 }

+// 保存由一维cell组成的字符串数组
+mxArray* writeToMatString1DCell(vector<string>& vStr) {
+	mxArray* pCellMtx = mxCreateCellMatrix(1, vStr.size());
+	for (int j = 0; j < vStr.size(); ++j) {
+		mxArray* mxStr = mxCreateString(vStr[j].c_str());
+		mxSetCell(pCellMtx, j, mxStr);
+	}
+	return pCellMtx;
+}
+
+// 保存由二维cell组成的字符串数组
+mxArray* writeToMatString2DCell(vector<vector<string>>& vvStr) {
+	mxArray* pCellMtx = mxCreateCellMatrix(1, vvStr.size());
+	for (int i = 0; i < vvStr.size(); ++i) {
+		mxArray* pChildCellMtx = writeToMatString1DCell(vvStr[i]);
+		mxSetCell(pCellMtx, i, pChildCellMtx);
+	}
+	return pCellMtx;
+}
+
+// 将结果写入mxArray, 作为后续的返回值
+mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
+	mxArray* pWriteArray = NULL;//matlab格式矩阵
+	int len = rowNum * colNum;
+	//创建一个rowNum*colNum的矩阵  
+	pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
+	//把data的值赋给pWriteArray指针
+	memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
+	return pWriteArray; // 赋值给返回值
+}
+
+/* 多线程计算信息熵 */
+struct TPEntropy {
+	vector<string>* pvDs;
+	vector<double>* pvFr;
+	vector<unordered_set<string>>* pvusAbsWord;
+	double* pHs;
+};
+
+void ThreadCalcEntropy(TPEntropy& param) {
+	vector<string>& vDs = *param.pvDs; // 这一组ds
+	vector<double>& vFr = *param.pvFr; // frequency
+	vector<unordered_set<string>>& vusAbsWord = *param.pvusAbsWord;
+	double* hs = param.pHs;
+	const int numAbs = vusAbsWord.size();
+	const int numDsWord = vDs.size(); // 这一组数据中包含的单词数量
+
+	vector<vector<int> > vX(numAbs, vector<int>(numDsWord, 0));
+	// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
+	for (int i = 0; i < numAbs; ++i) {
+		for (int j = 0; j < numDsWord; ++j) {
+			if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
+				vX[i][j] = 1;
+			}
+		}
+	}
+
+	// 找词汇的最高频率
+	double maxFr = *max_element(vFr.begin(), vFr.end());
+	// 将fr的数值规范化到（0，0.368）之间
+	const double normalMax = 0.368;
+	for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
+	maxFr = normalMax;
+	// 对每个知识颗粒每一组数据，计算信息熵
+	for (int i = 0; i < numAbs; ++i) {
+		for (int j = 0; j < numDsWord; ++j) {
+			if (vX[i][j] == 1) {
+				hs[i] -= vFr[j] * log2(vFr[j]);
+			}
+		}
+	}
+}
+
 /*
 输入：
 1. abs: 待感知的文献的摘要信息。
@ -106,23 +280,34 @@ void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
 1. hs: 信息熵，二维[len(知识颗粒)][len(文献)]
 */
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
-	//cout << "MexCalcEntropy" << endl;
-	//cout << nlhs << '\t' << nrhs << endl;
-	if (nrhs != 2) {
-		cout << "2 arguments should be given for this function!" << endl;
+	if (nrhs < 2) {
+		cout << "At least 2 arguments should be given for this function!" << endl;
 		return;
 	}
-	clock_t begin, finish;
-	begin = clock();
-	vector<vector<string> > vvDs; // 每个知识颗粒的ds矩阵（词汇矩阵）
-	vector<vector<double> > vvFr; // 词汇对应的频率
-	GetFrDs(prhs[1], vvDs, vvFr);
-
+	clock_t begin = clock(), mid, finish;
 	vector<string> vAbstract; // 读取abs1, 然后分割成一个一个的单词
 	GetAbstract(prhs[0], vAbstract);

+	vector<vector<string>> vvDs; // 每个知识颗粒的ds矩阵（词汇矩阵）
+	vector<vector<double>> vvFr; // 词汇对应的频率
+	GetFrDs(prhs[1], vvDs, vvFr);
+
+	int numThread = 1; // 是否打印信息, 1打印简单信息，2打印详细信息
+	if (nrhs > 2) {
+		double* pData = (double*)mxGetData(prhs[2]);
+		numThread = (int)pData[0];
+		if (numThread < 1) numThread = 1;
+	}
+
+	int flagPrint = 0; // 是否打印信息, 1打印简单信息，2打印详细信息
+	if (nrhs > 3) {
+		double* pData = (double*)mxGetData(prhs[3]);
+		flagPrint = (int)pData[0];
+	}
+	finish = clock();
+	if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 	/* 将摘要信息分割成一个一个的词汇 */
-	// begin = clock();
+	mid = clock();
 	unordered_set<char> usWordChars; // 能组成单词的字符，要不要考虑数字？原版matlab是提取了数字的
 	for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
 	for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
@ -154,82 +339,91 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 			vusAbsWord[i].insert(upWord);
 		}
 	}
-	// finish = clock();
-	// cout << "Split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	finish = clock();
+	if (flagPrint == 2) cout << "Split abstract time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;

 	// 存放结果，用一维数组存放二维数据
+	mid = clock();
 	vector<double> hs;
-	vector<double> hr;
+	// vector<double> hr;
 	const int numLiterature = vusAbsWord.size(); // pubmed 文件中包含的文献数量
 	const int numGroup = vvDs.size(); // ds包含的组数
 	hs.resize(numGroup * numLiterature);
-	hr.resize(numLiterature * numGroup);
-
-	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
-		vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
-		vector<double>& vFr = vvFr[groupIdx]; // frequency
-		const int numWord = vDs.size(); // 这一组数据中包含的单词数量
-		vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
-		// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
-		for (int i = 0; i < numLiterature; ++i) {
-			for (int j = 0; j < numWord; ++j) {
-				if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
-					vX[i][j] = 1;
-				}
-			}
-		}
-
-		// 找词汇的最高频率
-		double maxFr = *max_element(vFr.begin(), vFr.end());
-		// 将fr的数值规范化到（0，0.368）之间
-		const double normalMax = 0.368;
-		for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
-		maxFr = normalMax;
-		// 对每个知识颗粒每一组数据，计算信息熵
-		for (int i = 0; i < numLiterature; ++i) {
-			for (int j = 0; j < numWord; ++j) {
-				if (vX[i][j] == 1) {
-					hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
-				}
-			}
-		}
-
-		// 找最高频词汇所在的索引位置
-		vector<int> vMaxPos;
-		int idx = 0;
-		for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
-			if (val == maxFr) vMaxPos.push_back(idx);
-			idx++;
-			});
-
-		for (int i = 0; i < numLiterature; ++i) {
-			int cumulateX = 0; // 计算在最高频词汇处，x值的累加结果
-			for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
-			if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
-				hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
-			}
-		}
+	// hr.resize(numLiterature * numGroup);
+	// 并行, 没有计算hr
+	ThreadPool thPool(numThread);
+	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) {
+		TPEntropy tp = { &vvDs[groupIdx], &vvFr[groupIdx], &vusAbsWord, &hs[groupIdx * numLiterature] };
+		thPool.enqueue(ThreadCalcEntropy, tp);
 	}
+	thPool.~ThreadPool();
+
+//  // 串行
+//	for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // 遍历知识颗粒中的每一组
+//		vector<string>& vDs = vvDs[groupIdx]; // 这一组ds
+//		vector<double>& vFr = vvFr[groupIdx]; // frequency
+//		const int numWord = vDs.size(); // 这一组数据中包含的单词数量
+//		vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
+//		// 检查知识颗粒中的词语是否出现在pubmed摘要的词语中
+//		for (int i = 0; i < numLiterature; ++i) {
+//			for (int j = 0; j < numWord; ++j) {
+//				if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // 这一组单词中的j索引位置的单词在第i个文献中出现过
+//					vX[i][j] = 1;
+//				}
+//			}
+//		}
+//
+//		// 找词汇的最高频率
+//		double maxFr = *max_element(vFr.begin(), vFr.end());
+//		// 将fr的数值规范化到（0，0.368）之间
+//		const double normalMax = 0.368;
+//		for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
+//		maxFr = normalMax;
+//		// 对每个知识颗粒每一组数据，计算信息熵
+//		for (int i = 0; i < numLiterature; ++i) {
+//			for (int j = 0; j < numWord; ++j) {
+//				if (vX[i][j] == 1) {
+//					hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
+//				}
+//			}
+//		}
+//
+//		// 找最高频词汇所在的索引位置
+//		vector<int> vMaxPos;
+//		int idx = 0;
+//		for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
+//			if (val == maxFr) vMaxPos.push_back(idx);
+//			idx++;
+//			});
+//
+//		for (int i = 0; i < numLiterature; ++i) {
+//			int cumulateX = 0; // 计算在最高频词汇处，x值的累加结果
+//			for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
+//			if (cumulateX == vMaxPos.size()) { // 如果频率最高的词汇都出现在了文献中
+//				hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // 应该是表示知识颗粒的这一组数据跟这篇文献相关性比较高
+//			}
+//		}
+//	}
+	finish = clock();
+	if (flagPrint == 2) cout << "Calc entropy time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;

 	/* 将结果写入返回值 */
+	mid = clock();
 	if (nlhs > 0) {
 		int datasize = numGroup * numLiterature;
-		double* mtxData = new double[datasize];//待存储数据转为double格式
-		mxArray* pWriteArray = NULL;//matlab格式矩阵
-		//创建一个rowNum*colNum的矩阵  
-		pWriteArray = mxCreateDoubleMatrix(numGroup, numLiterature, mxREAL);
-		for (int i = 0; i < numGroup; i++) {
-			for (int j = 0; j < numLiterature; j++) {
-				mtxData[j * numGroup + i] = hs[i * numLiterature + j];
-			}
-		}
-		//把data的值赋给pWriteArray指针
-		memcpy((void*)(mxGetPr(pWriteArray)), (void*)mtxData, sizeof(double) * datasize);
-		plhs[0] = pWriteArray; // 赋值给返回值
-		delete[]mtxData;
+		vector<double> vData(datasize);
+		for (int i = 0; i < numGroup; i++) for (int j = 0; j < numLiterature; j++) 
+			vData[j * numGroup + i] = hs[i * numLiterature + j];
+		plhs[0] = writeToMatDouble(vData.data(), numGroup, numLiterature);
+	}
+	if (nlhs > 1) { // 将ws写入结果
+		plhs[1] = writeToMatString2DCell(vvWordMtx);
 	}
 	finish = clock();
-	// cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
+
+	finish = clock();
+	if(flagPrint) cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
 }

 /* 供main调试调用 */
--- a/MexFunc/CorrelationDist.cpp
+++ b/MexFunc/CorrelationDist.cpp
@ -18,8 +18,6 @@
 #include <functional>
 #include <stdexcept>

-// #include "CommonLib/kthread.h"
-// #include "CommonLib/thread_pool.h"
 using std::cout;
 using std::endl;
 using namespace std;
@ -160,7 +158,14 @@ void ThreadCalcDist(TPCorDist& param) {
 }

 /* Èë¿Úº¯Êý */
-// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
+/*
+输入：
+1. x: 二维。
+[2]. numThread: 线程数。
+[3]. numGroup: 每次线程函数处理的数据量。
+输出：
+1. d: 相关距离
+*/
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	if (nrhs < 1) {
 		cout << "At least 1 arguments should be given for this function!" << endl;
@ -321,4 +326,9 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {

 	finish = clock();
 	cout << "Correlation Dist Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+}
+
+/* 供main调试调用 */
+void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
+	mexFunction(nlhs, plhs, nrhs, prhs);
 }
--- a/MexFunc/MexFunc.vcxproj
+++ b/MexFunc/MexFunc.vcxproj
@ -119,7 +119,7 @@
    </Link>
  </ItemDefinitionGroup>
  <ItemGroup>
-    <ClCompile Include="AllEntropyMean.cpp" />
+    <ClCompile Include="IsWordInDic.cpp" />
    <ClCompile Include="main.cpp" />
  </ItemGroup>
  <ItemGroup>
--- a/MexFunc/MexFunc.vcxproj.filters
+++ b/MexFunc/MexFunc.vcxproj.filters
@ -18,7 +18,7 @@
    <ClCompile Include="main.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
-    <ClCompile Include="AllEntropyMean.cpp">
+    <ClCompile Include="IsWordInDic.cpp">
      <Filter>Source Files</Filter>
    </ClCompile>
  </ItemGroup>
--- a/MexFunc/SortDedup.cpp
+++ b/MexFunc/SortDedup.cpp
@ -59,32 +59,64 @@ bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
 }

 /* 入口函数 */
-// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
+/*
+输入：
+1. wd: 文献摘要，由二维cell组成的字符串数组
+[2]. 将字符串保存到文件路径
+[3]. flagPrint 是否输出信息
+输出：
+1. dic: 单词组成的一维cell，包含去重之后的文献摘要所有单词，大写，按字母序排序(只包含字母的单词，去掉数字等)
+*/
 void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 	if (nrhs < 1) {
 		cout << "At least 1 arguments should be given for this function!" << endl;
 		return;
 	}
-	clock_t begin = clock(), finish;
-
-	//set<string> sOrderedWord;
+	clock_t begin = clock(), mid, finish;

 	unordered_set<string> usStr;
 	ReadInsertWord(prhs[0], usStr);
-	usStr.insert("A");
-	usStr.insert("Z");
+	// usStr.insert("A");
+	// usStr.insert("Z");
+	string outputPath;
+	if (nrhs > 1) {
+		char* strBuf = new char[STRING_BUF_SIZE];
+		mxGetString(prhs[1], strBuf, STRING_BUF_SIZE);
+		outputPath = strBuf;
+		delete[]strBuf;
+	}

-	///* ÅÅÐò */
+	int flagPrint = 0; // 是否打印信息, 1打印简单信息，2打印详细信息
+	if (nrhs > 2) {
+		double* pData = (double*)mxGetData(prhs[2]);
+		flagPrint = (int)pData[0];
+	}
+
+	finish = clock();
+	if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+
+	/* 排序 */
+	mid = clock();
 	set<string> sOrderedWord;
 	for (auto& word : usStr) {
 		sOrderedWord.insert(word);
 	}
+	finish = clock();
+	if (flagPrint == 2) cout << "Sort and deduplicate time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;

-	//ofstream ofs("d:\\wd_dict.txt");
-	//for (auto& word : sOrderedWord) ofs << word << endl;
-	//ofs.close();
+	/* 将字符串保存到文件 */
+	if (! outputPath.empty()) {
+		cout << outputPath << endl;
+		ofstream ofs(outputPath);
+		for (auto& word : sOrderedWord) ofs << word << endl;
+		ofs.close();
+	}
+
+	sOrderedWord.insert("A");
+	sOrderedWord.insert("Z");

 	/* 写入结果 */
+	mid = clock();
 	if (nlhs > 0) {
 		int wordSize = 0;
 		for (auto& word : sOrderedWord) {
@ -98,12 +130,18 @@ void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
 			if (word[0] >= 'A' && word[0] <= 'Z') {
 				mxArray* mxStr = mxCreateString(word.c_str());
 				mxSetCell(pCell, i++, mxStr);
-				//ofs << word << endl;
 			}
 		}
 		plhs[0] = pCell; // 赋值给返回值
 	}
-	//ofs.close();
 	finish = clock();
-	cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+	if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
+
+	finish = clock();
+	if (flagPrint)cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
+}
+
+// 供c++调试用
+void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
+	return mexFunction(nlhs, plhs, nrhs, prhs);
 }
--- a/MexFunc/main.cpp
+++ b/MexFunc/main.cpp
@ -7,45 +7,42 @@ using namespace std;

 int main(int argc, const char** argv)
 {
-
-    //string matFile = "D:\\x_large.mat";
-	//string matFile = "D:\\x.mat";
-	//string matFile = "D:\\Twirls\\wd_small.mat";
-	//string matFile = "D:\\Twirls\\wd.mat";
 	clock_t begin = clock(), finish;
-	//string wd2Mat = "D:\\wd2_5w.mat";
-	//string dicrMat = "D:\\dicr.mat";
-	//string wdMat = "D:\\wd.mat";
+	const int argReserveNum = 10;
+	mxArray* plhs[argReserveNum];
+	const mxArray* prhs[argReserveNum];

+	/* SortDedup */
+	int nlhs = 1, nrhs = 2;
+	MATFile* pwdMat = matOpen("D:\\tmp\\wd_small.mat", "r");
+	prhs[0] = matGetVariable(pwdMat, "wd");
+	prhs[1] = mxCreateString("D:\\Twirls\\runtime\\output_1.dat");
+	prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
+	*mxGetPr(prhs[2]) = 2;

-	//string dicMat = "D:\\G_dc_large.mat";
-	//string wdMat = "D:\\wd_large.mat";
-
-	//MATFile* pwdMat, *pwd2Mat, *pdicMat;
-	//mxArray* prhs[4];
-
-	//pwdMat = matOpen(wdMat.c_str(), "r");
-	// pwd2Mat = matOpen(wd2Mat.c_str(), "r");
-	//pdicMat = matOpen(dicMat.c_str(), "r");
-	// prhs[1] = mxCreateString("D:\\Twirls\\gat1\\literatures\\temp\\wd2s.txt");
-	// 	prhs[2] = matGetVariable(pdicrMat, "dicr");
+	/* CalcEntropy */
+	// int nlhs = 2, nrhs = 4;
+	// MATFile* pMatAbs = matOpen("D:\\tmp\\abs_189.mat", "r");
+	// MATFile* pMatG = matOpen("D:\\tmp\\G_189.mat", "r");
+	// prhs[0] = matGetVariable(pMatAbs, "abs");
+	// prhs[1] = matGetVariable(pMatG, "G");
+	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
+	// *mxGetPr(prhs[2]) = 12;
+	// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
+	// *mxGetPr(prhs[3]) = 2;

 	/* IsWordInDic  */
 	// MATFile* pwdMat, * pdicMat;
-	// mxArray* plhs[4];
-	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 2;
-	// pwdMat = matOpen("D:\\wd_large.mat", "r");
-	// pdicMat = matOpen("D:\\G_dc_large.mat", "r");
+	// pwdMat = matOpen("D:\\tmp\\wd_large.mat", "r");
+	// pdicMat = matOpen("D:\\tmp\\G_dc_large.mat", "r");
 	// prhs[0] = matGetVariable(pwdMat, "wd"); //获取.mat文件里面名为matrixName的矩阵
 	// prhs[1] = matGetVariable(pdicMat, "dc");

 	/* ClusterRandSim */
-	// mxArray* plhs[4];
-	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 4;
-	// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
-	// MATFile* pMatH = matOpen("D:\\h_large.mat", "r");
+	// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
+	// MATFile* pMatH = matOpen("D:\\tmp\\h_large.mat", "r");
 	// prhs[0] = matGetVariable(pMatX, "x");
 	// prhs[1] = matGetVariable(pMatH, "h3");
 	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
@ -55,11 +52,9 @@ int main(int argc, const char** argv)


 	/* AllClusterRandSim */
-	// mxArray* plhs[4];
-	// const mxArray* prhs[4];
 	// int nlhs = 2, nrhs = 4;
-	// MATFile* pMatX = matOpen("D:\\x_large.mat", "r");
-	// MATFile* pMatIx = matOpen("D:\\ix_large.mat", "r");
+	// MATFile* pMatX = matOpen("D:\\tmp\\x_large.mat", "r");
+	// MATFile* pMatIx = matOpen("D:\\tmp\\ix_large.mat", "r");
 	// prhs[0] = matGetVariable(pMatX, "x");
 	// prhs[1] = matGetVariable(pMatIx, "ix");
 	// prhs[2] = mxCreateDoubleMatrix(1, 1, mxREAL);
@ -68,19 +63,15 @@ int main(int argc, const char** argv)
 	// *mxGetPr(prhs[3]) = 10000;

 	/* AllEntropyMean */
-	mxArray* plhs[4];
-	const mxArray* prhs[4];
-	int nlhs = 2, nrhs = 4;
-	MATFile* pMatG = matOpen("D:\\G_large.mat", "r");
-	MATFile* pMatWs = matOpen("D:\\ws_large.mat", "r");
-	mxArray* pMxG = matGetVariable(pMatG, "G");
-	prhs[0] = mxGetField(pMxG, 0, "ds");
-	prhs[1] = mxGetField(pMxG, 0, "frr");
-	prhs[2] = matGetVariable(pMatWs, "ws");
-	prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
-	*mxGetPr(prhs[3]) = 12;
-
-
+	// int nlhs = 2, nrhs = 4;
+	// MATFile* pMatG = matOpen("D:\\tmp\\G_large.mat", "r");
+	// MATFile* pMatWs = matOpen("D:\\tmp\\ws_large.mat", "r");
+	// mxArray* pMxG = matGetVariable(pMatG, "G");
+	// prhs[0] = mxGetField(pMxG, 0, "ds");
+	// prhs[1] = mxGetField(pMxG, 0, "frr");
+	// prhs[2] = matGetVariable(pMatWs, "ws");
+	// prhs[3] = mxCreateDoubleMatrix(1, 1, mxREAL);
+	// *mxGetPr(prhs[3]) = 12;