109 lines
2.7 KiB
C++
109 lines
2.7 KiB
C++
#include <mex.h>
|
|
#include <mat.h>
|
|
#include <iostream>
|
|
#include <algorithm>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <unordered_set>
|
|
#include <ctime>
|
|
#include <immintrin.h>
|
|
#include <zmmintrin.h>
|
|
#include <vector>
|
|
#include <queue>
|
|
#include <memory>
|
|
#include <thread>
|
|
#include <mutex>
|
|
#include <condition_variable>
|
|
#include <future>
|
|
#include <functional>
|
|
#include <stdexcept>
|
|
#include <unordered_set>
|
|
#include <set>
|
|
#include <fstream>
|
|
|
|
using std::cout;
|
|
using std::endl;
|
|
using namespace std;
|
|
|
|
#define STRING_BUF_SIZE 204800
|
|
|
|
// 读取字符串并转换成大写, 插入set
|
|
bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
|
|
mxArray* pCell = nullptr;
|
|
int rowNum, colNum;
|
|
char* strBuf = new char[STRING_BUF_SIZE];
|
|
|
|
rowNum = (int)mxGetM(pMxArray);
|
|
colNum = (int)mxGetN(pMxArray);
|
|
for (int i = 0; i < rowNum; ++i) {
|
|
for (int j = 0; j < colNum; ++j) {
|
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
|
int childRowNum = (int)mxGetM(pCell);
|
|
int childColNum = (int)mxGetN(pCell);
|
|
for (int ii = 0; ii < childRowNum; ii++) {
|
|
for (int jj = 0; jj < childColNum; jj++) {
|
|
mxArray* pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
|
|
if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
|
|
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
|
return false;
|
|
}
|
|
string str(strBuf);
|
|
transform(str.cbegin(), str.cend(), str.begin(), ::toupper); // 转成大写
|
|
sWord.insert(str);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
delete[]strBuf;
|
|
return true;
|
|
}
|
|
|
|
/* 入口函数 */
|
|
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
|
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
|
if (nrhs < 1) {
|
|
cout << "At least 1 arguments should be given for this function!" << endl;
|
|
return;
|
|
}
|
|
clock_t begin = clock(), finish;
|
|
|
|
//set<string> sOrderedWord;
|
|
|
|
unordered_set<string> usStr;
|
|
ReadInsertWord(prhs[0], usStr);
|
|
usStr.insert("A");
|
|
usStr.insert("Z");
|
|
|
|
///* 排序 */
|
|
set<string> sOrderedWord;
|
|
for (auto& word : usStr) {
|
|
sOrderedWord.insert(word);
|
|
}
|
|
|
|
//ofstream ofs("d:\\wd_dict.txt");
|
|
//for (auto& word : sOrderedWord) ofs << word << endl;
|
|
//ofs.close();
|
|
|
|
/* 写入结果 */
|
|
if (nlhs > 0) {
|
|
int wordSize = 0;
|
|
for (auto& word : sOrderedWord) {
|
|
if (word[0] >= 'A' && word[0] <= 'Z') {
|
|
wordSize++;
|
|
}
|
|
}
|
|
mxArray* pCell = mxCreateCellMatrix(1, wordSize);
|
|
int i = 0;
|
|
for (auto& word : sOrderedWord) {
|
|
if (word[0] >= 'A' && word[0] <= 'Z') {
|
|
mxArray* mxStr = mxCreateString(word.c_str());
|
|
mxSetCell(pCell, i++, mxStr);
|
|
//ofs << word << endl;
|
|
}
|
|
}
|
|
plhs[0] = pCell; // 赋值给返回值
|
|
}
|
|
//ofs.close();
|
|
finish = clock();
|
|
cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
|
} |