twirls/MexFunc/WordSplit.cpp

101 lines
3.4 KiB
C++
Raw Normal View History

2023-10-05 10:38:21 +08:00
#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <vector>
#include <string>
#include <unordered_set>
#include <ctime>
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
/* <20><>ȡabs */
void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
int rowNum = (int)mxGetM(pMxAbs);
int colNum = (int)mxGetN(pMxAbs);
char *strBuf = new char[STRING_BUF_SIZE];
vAbs.resize(rowNum * colNum);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
mxArray* pCell = mxGetCell(pMxAbs, j * rowNum + i);
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
delete[]strBuf;
return;
}
vAbs[i * colNum + j] = strBuf;
}
}
delete[]strBuf;
}
/*
nlhs<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ(Number Left - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
plhs<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>(Point Left - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
nrhs<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ(Number Right - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD>ұ<EFBFBD>
prhs<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>(Point Right - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD>ұߡ<EFBFBD>Ҫע<EFBFBD><EFBFBD>prhs<EFBFBD><EFBFBD>const<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ܸı<EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݡ<EFBFBD>
*/
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
//cout << "WordSplit" << endl;
//cout << nlhs << '\t' << nrhs << endl;
if (nrhs != 1) {
cout << "1 arguments should be given for this function!" << endl;
return;
}
/* <20><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>ժҪ<D5AA><D2AA>Ϣ */
vector<string> vAbstract; // <20><>ȡabs1, Ȼ<><C8BB><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>
GetAbstract(prhs[0], vAbstract);
/* <20><>ժҪ<D5AA><D2AA>Ϣ<EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>Ĵʻ<C4B4> */
clock_t begin, finish;
begin = clock();
unordered_set<char> usWordChars; // <20><><EFBFBD><EFBFBD><EFBFBD>ɵ<EFBFBD><C9B5>ʵ<EFBFBD><CAB5>ַ<EFBFBD><D6B7><EFBFBD>Ҫ<EFBFBD><D2AA>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>ԭ<EFBFBD><D4AD>matlab<61><62><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD>
for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
for (int i = 48; i <= 57; i++) usWordChars.insert(char(i)); // 0 - 9
usWordChars.insert('/'); usWordChars.insert('+'); usWordChars.insert('-');
vector<vector<string> > vvWordMtx(vAbstract.size()); // <20><>ʼ<EFBFBD><CABC>СΪ<D0A1><CEAA><EFBFBD>µĸ<C2B5><C4B8><EFBFBD>
for (int i = 0; i < vAbstract.size(); i++) {
auto& strAbs = vAbstract[i];
// <20><><EFBFBD><EFBFBD>ժҪ<D5AA>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ÿһ<C3BF><D2BB><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ȡ<EFBFBD><C8A1>ÿһ<C3BF><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vector<string>& vWord = vvWordMtx[i];
if (strAbs.size() == 0) continue; // ժҪ<D5AA><D2AA>ϢΪ<CFA2>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><E3B2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int wordStartPos = 0;
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
wordStartPos++;
for (int curPos = wordStartPos + 1; curPos < strAbs.size(); ++curPos) {
if (usWordChars.find(strAbs[curPos]) == usWordChars.end()) { // <20>ҵ<EFBFBD><D2B5>˷ָ<CBB7><D6B8><EFBFBD>
vWord.push_back(strAbs.substr(wordStartPos, curPos - wordStartPos));
wordStartPos = curPos + 1; // <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼλ<CABC><CEBB>
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
wordStartPos++;
curPos = wordStartPos; // ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD>Զ<EFBFBD><D4B6><EFBFBD>1
}
}
}
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><EFBFBD>ز<EFBFBD><D8B2><EFBFBD> */
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><EFBFBD><EBB7B5>ֵ */
if (nlhs > 0) {
mxArray* pCellMtx = mxCreateCellMatrix(1, vvWordMtx.size());
for (int i = 0; i < vvWordMtx.size(); ++i) {
mxArray* pChildCellMtx = mxCreateCellMatrix(1, vvWordMtx[i].size());
for (int j = 0; j < vvWordMtx[i].size(); ++j) {
mxArray* mxStr = mxCreateString(vvWordMtx[i][j].c_str());
mxSetCell(pChildCellMtx, j, mxStr);
}
mxSetCell(pCellMtx, i, pChildCellMtx);
}
plhs[0] = pCellMtx; // <20><>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
}
finish = clock();
//cout << "split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}