twirls/MexFunc/SortDedup.cpp

109 lines
2.7 KiB
C++

#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <unordered_set>
#include <ctime>
#include <immintrin.h>
#include <zmmintrin.h>
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <stdexcept>
#include <unordered_set>
#include <set>
#include <fstream>
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
// 读取字符串并转换成大写, 插入set
bool ReadInsertWord(const mxArray* pMxArray, unordered_set<string> &sWord) {
mxArray* pCell = nullptr;
int rowNum, colNum;
char* strBuf = new char[STRING_BUF_SIZE];
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i);
int childRowNum = (int)mxGetM(pCell);
int childColNum = (int)mxGetN(pCell);
for (int ii = 0; ii < childRowNum; ii++) {
for (int jj = 0; jj < childColNum; jj++) {
mxArray* pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
return false;
}
string str(strBuf);
transform(str.cbegin(), str.cend(), str.begin(), ::toupper); // 转成大写
sWord.insert(str);
}
}
}
}
delete[]strBuf;
return true;
}
/* 入口函数 */
// void mexFunction(int nlhs, mxArray* plhs[], int nrhs, mxArray** prhs) {
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
if (nrhs < 1) {
cout << "At least 1 arguments should be given for this function!" << endl;
return;
}
clock_t begin = clock(), finish;
//set<string> sOrderedWord;
unordered_set<string> usStr;
ReadInsertWord(prhs[0], usStr);
usStr.insert("A");
usStr.insert("Z");
///* 排序 */
set<string> sOrderedWord;
for (auto& word : usStr) {
sOrderedWord.insert(word);
}
//ofstream ofs("d:\\wd_dict.txt");
//for (auto& word : sOrderedWord) ofs << word << endl;
//ofs.close();
/* 写入结果 */
if (nlhs > 0) {
int wordSize = 0;
for (auto& word : sOrderedWord) {
if (word[0] >= 'A' && word[0] <= 'Z') {
wordSize++;
}
}
mxArray* pCell = mxCreateCellMatrix(1, wordSize);
int i = 0;
for (auto& word : sOrderedWord) {
if (word[0] >= 'A' && word[0] <= 'Z') {
mxArray* mxStr = mxCreateString(word.c_str());
mxSetCell(pCell, i++, mxStr);
//ofs << word << endl;
}
}
plhs[0] = pCell; // 赋值给返回值
}
//ofs.close();
finish = clock();
cout << "Deduplicate and Sort word Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}