101 lines
3.4 KiB
C++
101 lines
3.4 KiB
C++
|
|
#include <mex.h>
|
|||
|
|
#include <mat.h>
|
|||
|
|
#include <iostream>
|
|||
|
|
#include <algorithm>
|
|||
|
|
#include <vector>
|
|||
|
|
#include <string>
|
|||
|
|
#include <unordered_set>
|
|||
|
|
#include <ctime>
|
|||
|
|
using std::cout;
|
|||
|
|
using std::endl;
|
|||
|
|
using namespace std;
|
|||
|
|
|
|||
|
|
#define STRING_BUF_SIZE 204800
|
|||
|
|
|
|||
|
|
/* <20><>ȡabs */
|
|||
|
|
void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
|
|||
|
|
int rowNum = (int)mxGetM(pMxAbs);
|
|||
|
|
int colNum = (int)mxGetN(pMxAbs);
|
|||
|
|
char *strBuf = new char[STRING_BUF_SIZE];
|
|||
|
|
|
|||
|
|
vAbs.resize(rowNum * colNum);
|
|||
|
|
for (int i = 0; i < rowNum; ++i) {
|
|||
|
|
for (int j = 0; j < colNum; ++j) {
|
|||
|
|
mxArray* pCell = mxGetCell(pMxAbs, j * rowNum + i);
|
|||
|
|
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
|||
|
|
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
|||
|
|
delete[]strBuf;
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
vAbs[i * colNum + j] = strBuf;
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
delete[]strBuf;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/*
|
|||
|
|
nlhs<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ(Number Left - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
plhs<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>(Point Left - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
nrhs<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŀ(Number Right - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD>ұ<EFBFBD>
|
|||
|
|
prhs<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD>(Point Right - hand side)<EFBFBD><EFBFBD><EFBFBD>Ⱥ<EFBFBD><EFBFBD>ұߡ<EFBFBD>Ҫע<EFBFBD><EFBFBD>prhs<EFBFBD><EFBFBD>const<EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>飬<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ܸı<EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ݡ<EFBFBD>
|
|||
|
|
*/
|
|||
|
|
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
|
|||
|
|
//cout << "WordSplit" << endl;
|
|||
|
|
//cout << nlhs << '\t' << nrhs << endl;
|
|||
|
|
if (nrhs != 1) {
|
|||
|
|
cout << "1 arguments should be given for this function!" << endl;
|
|||
|
|
return;
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* <20><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>ժҪ<D5AA><D2AA>Ϣ */
|
|||
|
|
vector<string> vAbstract; // <20><>ȡabs1, Ȼ<><C8BB><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>
|
|||
|
|
GetAbstract(prhs[0], vAbstract);
|
|||
|
|
|
|||
|
|
/* <20><>ժҪ<D5AA><D2AA>Ϣ<EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>Ĵʻ<C4B4> */
|
|||
|
|
clock_t begin, finish;
|
|||
|
|
begin = clock();
|
|||
|
|
unordered_set<char> usWordChars; // <20><><EFBFBD><EFBFBD><EFBFBD>ɵ<EFBFBD><C9B5>ʵ<EFBFBD><CAB5>ַ<EFBFBD><D6B7><EFBFBD>Ҫ<EFBFBD><D2AA>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>ԭ<EFBFBD><D4AD>matlab<61><62><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD>
|
|||
|
|
for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
|
|||
|
|
for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
|
|||
|
|
for (int i = 48; i <= 57; i++) usWordChars.insert(char(i)); // 0 - 9
|
|||
|
|
usWordChars.insert('/'); usWordChars.insert('+'); usWordChars.insert('-');
|
|||
|
|
vector<vector<string> > vvWordMtx(vAbstract.size()); // <20><>ʼ<EFBFBD><CABC>СΪ<D0A1><CEAA><EFBFBD>µĸ<C2B5><C4B8><EFBFBD>
|
|||
|
|
for (int i = 0; i < vAbstract.size(); i++) {
|
|||
|
|
auto& strAbs = vAbstract[i];
|
|||
|
|
// <20><><EFBFBD><EFBFBD>ժҪ<D5AA>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ÿһ<C3BF><D2BB><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ȡ<EFBFBD><C8A1>ÿһ<C3BF><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
vector<string>& vWord = vvWordMtx[i];
|
|||
|
|
if (strAbs.size() == 0) continue; // ժҪ<D5AA><D2AA>ϢΪ<CFA2>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD>㲻<EFBFBD><E3B2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|||
|
|
int wordStartPos = 0;
|
|||
|
|
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
|
|||
|
|
wordStartPos++;
|
|||
|
|
for (int curPos = wordStartPos + 1; curPos < strAbs.size(); ++curPos) {
|
|||
|
|
if (usWordChars.find(strAbs[curPos]) == usWordChars.end()) { // <20>ҵ<EFBFBD><D2B5>˷ָ<CBB7><D6B8><EFBFBD>
|
|||
|
|
vWord.push_back(strAbs.substr(wordStartPos, curPos - wordStartPos));
|
|||
|
|
wordStartPos = curPos + 1; // <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼλ<CABC><CEBB>
|
|||
|
|
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
|
|||
|
|
wordStartPos++;
|
|||
|
|
curPos = wordStartPos; // ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD>Զ<EFBFBD><D4B6><EFBFBD>1
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD>뷵<EFBFBD>ز<EFBFBD><D8B2><EFBFBD> */
|
|||
|
|
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD>뷵<EFBFBD><EBB7B5>ֵ */
|
|||
|
|
if (nlhs > 0) {
|
|||
|
|
mxArray* pCellMtx = mxCreateCellMatrix(1, vvWordMtx.size());
|
|||
|
|
for (int i = 0; i < vvWordMtx.size(); ++i) {
|
|||
|
|
mxArray* pChildCellMtx = mxCreateCellMatrix(1, vvWordMtx[i].size());
|
|||
|
|
for (int j = 0; j < vvWordMtx[i].size(); ++j) {
|
|||
|
|
mxArray* mxStr = mxCreateString(vvWordMtx[i][j].c_str());
|
|||
|
|
mxSetCell(pChildCellMtx, j, mxStr);
|
|||
|
|
}
|
|||
|
|
mxSetCell(pCellMtx, i, pChildCellMtx);
|
|||
|
|
}
|
|||
|
|
plhs[0] = pCellMtx; // <20><>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
|
|||
|
|
finish = clock();
|
|||
|
|
//cout << "split abstract time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
|
|||
|
|
|
|||
|
|
}
|