twirls/MexFunc/CalcEntropy.cpp

432 lines
14 KiB
C++
Raw Normal View History

2023-10-05 10:38:21 +08:00
#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <string>
#include <unordered_set>
#include <ctime>
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <stdexcept>
#include <unordered_map>
#include <set>
#include <fstream>
#include <random>
#include <cmath>
#include <stdlib.h>
#include <limits.h>
#include <atomic>
2023-10-05 10:38:21 +08:00
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
/* <20><>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD>cell<6C><6C><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>,<2C><><EFBFBD><EFBFBD>ֵ<EFBFBD><D6B5>ds,fr */
#define OUTER_FOR_BEGIN \
rowNum = (int)mxGetM(pMxArray); \
colNum = (int)mxGetN(pMxArray); \
for (int i = 0; i < rowNum; ++i) { \
for (int j = 0; j < colNum; ++j) { \
mxArray* pCell = mxGetCell(pMxArray, j * rowNum + i); \
int childRowNum = (int)mxGetM(pCell); \
int childColNum = (int)mxGetN(pCell);
#define OUTER_FOR_END \
} \
}
#define INNTER_FOR_BEGIN \
for (int ii = 0; ii < childRowNum; ii++) { \
for (int jj = 0; jj < childColNum; jj++) { \
mxArray *pChildCell = mxGetCell(pCell, jj * childRowNum + ii);
#define INNTER_FOR_END \
} \
}
// <20><>matlab<61><EFBFBD><E6B4A2>ʽת<CABD><D7AA><EFBFBD><EFBFBD>c<EFBFBD><EFBFBD><E6B4A2>ʽ
#define TRANS_ROW_COL(dst, src, rowNum, colNum) \
for (int rowI = 0; rowI < rowNum; ++rowI) { \
for (int colJ = 0; colJ < colNum; ++colJ) { \
dst[rowI * colNum + colJ] = src[colJ * rowNum + rowI]; \
} \
}
class ThreadPool {
public:
ThreadPool(size_t);
template<class F, class... Args>
auto enqueue(F&& f, Args&&... args)
->std::future<typename std::result_of<F(Args...)>::type>;
~ThreadPool();
private:
// need to keep track of threads so we can join them
std::vector< std::thread > workers;
// the task queue
std::queue< std::function<void()> > tasks;
// synchronization
std::mutex queue_mutex;
std::condition_variable condition;
bool stop;
};
// the constructor just launches some amount of workers
inline ThreadPool::ThreadPool(size_t threads)
: stop(false)
{
for (size_t i = 0;i < threads;++i)
workers.emplace_back(
[this]
{
for (;;)
{
std::function<void()> task;
{
std::unique_lock<std::mutex> lock(this->queue_mutex);
this->condition.wait(lock,
[this] { return this->stop || !this->tasks.empty(); });
if (this->stop && this->tasks.empty())
return;
task = std::move(this->tasks.front());
this->tasks.pop();
}
task();
}
}
);
}
// add new work item to the pool
template<class F, class... Args>
auto ThreadPool::enqueue(F && f, Args&&... args)
-> std::future<typename std::result_of<F(Args...)>::type>
{
using return_type = typename std::result_of<F(Args...)>::type;
auto task = std::make_shared< std::packaged_task<return_type()> >(
std::bind(std::forward<F>(f), std::forward<Args>(args)...)
);
std::future<return_type> res = task->get_future();
{
std::unique_lock<std::mutex> lock(queue_mutex);
// don't allow enqueueing after stopping the pool
if (stop)
throw std::runtime_error("enqueue on stopped ThreadPool");
tasks.emplace([task]() { (*task)(); });
}
condition.notify_one();
return res;
}
// the destructor joins all threads
inline ThreadPool::~ThreadPool()
{
{
std::unique_lock<std::mutex> lock(queue_mutex);
stop = true;
}
condition.notify_all();
for (std::thread& worker : workers)
worker.join();
}
2023-10-05 10:38:21 +08:00
// <20><><EFBFBD><EFBFBD>ά<EFBFBD><CEAC><EFBFBD><EFBFBD>ת<EFBFBD><D7AA>һά<D2BB><CEAC><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
inline int Get1DIndex(int colNum, int row, int col) {
return row * colNum + col;
}
// <20><>ȡG<C8A1><EFBFBD><E1B9B9><EFBFBD>е<EFBFBD>ds<64><73>fr
void GetFrDs(const mxArray* pMxParent, vector<vector<string> >& vvDs, vector<vector<double> >& vvFr) {
// <20><>ȡds<64>ַ<EFBFBD><D6B7><EFBFBD>
int rowNum, colNum;
char *strBuf = new char[STRING_BUF_SIZE];
mxArray* pMxArray = mxGetField(pMxParent, 0, "ds"); // ds
OUTER_FOR_BEGIN
vvDs.push_back(vector<string>());
vvDs.back().resize(childRowNum * childColNum);
INNTER_FOR_BEGIN
if (mxGetString(pChildCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
delete[]strBuf;
return;
}
vvDs.back()[ii * childColNum + jj] = strBuf;
auto& lastStr = vvDs.back()[ii * childColNum + jj];
transform(lastStr.begin(), lastStr.end(), lastStr.begin(), ::toupper); // ת<>ɴ<EFBFBD>д
INNTER_FOR_END
OUTER_FOR_END
// <20><>ȡfr<66><72>ֵ
pMxArray = mxGetField(pMxParent, 0, "fr"); // fr
OUTER_FOR_BEGIN
vvFr.push_back(vector<double>());
vvFr.back().resize(childRowNum * childColNum);
double* pVal = (double*)mxGetData(pCell); //<2F><>ȡָ<C8A1><D6B8>
TRANS_ROW_COL(vvFr.back(), pVal, childRowNum, childColNum); // <20><><EFBFBD>д洢<D0B4><E6B4A2>ʽת<CABD><D7AA>
OUTER_FOR_END
delete[]strBuf;
}
/* <20><>ȡabs */
void GetAbstract(const mxArray* pMxAbs, vector<string>& vAbs) {
int rowNum = (int)mxGetM(pMxAbs);
int colNum = (int)mxGetN(pMxAbs);
char *strBuf = new char[STRING_BUF_SIZE];
vAbs.resize(rowNum * colNum);
for (int i = 0; i < rowNum; ++i) {
for (int j = 0; j < colNum; ++j) {
mxArray* pCell = mxGetCell(pMxAbs, j * rowNum + i);
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
delete[]strBuf;
return;
}
vAbs[i * colNum + j] = strBuf;
}
}
delete[]strBuf;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һάcell<6C><6C><EFBFBD>ɵ<EFBFBD><C9B5>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
mxArray* writeToMatString1DCell(vector<string>& vStr) {
mxArray* pCellMtx = mxCreateCellMatrix(1, vStr.size());
for (int j = 0; j < vStr.size(); ++j) {
mxArray* mxStr = mxCreateString(vStr[j].c_str());
mxSetCell(pCellMtx, j, mxStr);
}
return pCellMtx;
}
// <20><><EFBFBD><EFBFBD><EFBFBD>ɶ<EFBFBD>άcell<6C><6C><EFBFBD>ɵ<EFBFBD><C9B5>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
mxArray* writeToMatString2DCell(vector<vector<string>>& vvStr) {
mxArray* pCellMtx = mxCreateCellMatrix(1, vvStr.size());
for (int i = 0; i < vvStr.size(); ++i) {
mxArray* pChildCellMtx = writeToMatString1DCell(vvStr[i]);
mxSetCell(pCellMtx, i, pChildCellMtx);
}
return pCellMtx;
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4>mxArray, <20><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD>ķ<EFBFBD><C4B7><EFBFBD>ֵ
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
mxArray* pWriteArray = NULL;//matlab<61><62>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD>
int len = rowNum * colNum;
//<2F><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>rowNum*colNum<75>ľ<EFBFBD><C4BE><EFBFBD>
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
//<2F><>data<74><61>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>pWriteArrayָ<79><D6B8>
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
return pWriteArray; // <20><>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
}
/* <20><><EFBFBD>̼߳<DFB3><CCBC><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2> */
struct TPEntropy {
vector<string>* pvDs;
vector<double>* pvFr;
vector<unordered_set<string>>* pvusAbsWord;
double* pHs;
};
void ThreadCalcEntropy(TPEntropy& param) {
vector<string>& vDs = *param.pvDs; // <20><>һ<EFBFBD><D2BB>ds
vector<double>& vFr = *param.pvFr; // frequency
vector<unordered_set<string>>& vusAbsWord = *param.pvusAbsWord;
double* hs = param.pHs;
const int numAbs = vusAbsWord.size();
const int numDsWord = vDs.size(); // <20><>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>а<EFBFBD><D0B0><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vector<vector<int> > vX(numAbs, vector<int>(numDsWord, 0));
// <20><><EFBFBD><EFBFBD>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD><EFBFBD>еĴ<D0B5><C4B4><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>pubmedժҪ<D5AA>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>
for (int i = 0; i < numAbs; ++i) {
for (int j = 0; j < numDsWord; ++j) {
if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // <20><>һ<EFBFBD><EFBFBD><E9B5A5><EFBFBD>е<EFBFBD>j<EFBFBD><6A><EFBFBD><EFBFBD>λ<EFBFBD>õĵ<C3B5><C4B5><EFBFBD><EFBFBD>ڵ<EFBFBD>i<EFBFBD><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>г<EFBFBD><D0B3>ֹ<EFBFBD>
vX[i][j] = 1;
}
}
}
// <20>Ҵʻ<D2B4><CABB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5>
double maxFr = *max_element(vFr.begin(), vFr.end());
// <20><>fr<66><72><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><E6B7B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>0<EFBFBD><30>0.368<EFBFBD><EFBFBD>֮<EFBFBD><EFBFBD>
const double normalMax = 0.368;
for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
maxFr = normalMax;
// <20><>ÿ<EFBFBD><C3BF>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD>ÿһ<C3BF><D2BB><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2>
for (int i = 0; i < numAbs; ++i) {
for (int j = 0; j < numDsWord; ++j) {
if (vX[i][j] == 1) {
hs[i] -= vFr[j] * log2(vFr[j]);
}
}
}
}
2023-10-05 10:38:21 +08:00
/*
<EFBFBD><EFBFBD><EFBFBD>
1. abs: <EFBFBD><EFBFBD><EFBFBD><EFBFBD>֪<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>׵<EFBFBD>ժҪ<EFBFBD><EFBFBD>Ϣ<EFBFBD><EFBFBD>
2. G: ֪ʶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ó<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD><EFBFBD><EFBFBD>ȴ<EFBFBD>ds<EFBFBD>Լ<EFBFBD><EFBFBD><EFBFBD>Ӧ<EFBFBD><EFBFBD>Ƶ<EFBFBD><EFBFBD>fr<EFBFBD><EFBFBD>
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
1. hs: <EFBFBD><EFBFBD>Ϣ<EFBFBD>أ<EFBFBD><EFBFBD><EFBFBD>ά[len(֪ʶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>)][len(<EFBFBD><EFBFBD><EFBFBD><EFBFBD>)]
2023-10-05 10:38:21 +08:00
*/
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
if (nrhs < 2) {
cout << "At least 2 arguments should be given for this function!" << endl;
2023-10-05 10:38:21 +08:00
return;
}
clock_t begin = clock(), mid, finish;
2023-10-05 10:38:21 +08:00
vector<string> vAbstract; // <20><>ȡabs1, Ȼ<><C8BB><EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD>
GetAbstract(prhs[0], vAbstract);
vector<vector<string>> vvDs; // ÿ<><C3BF>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ds<64><73><EFBFBD>󣨴ʻ<F3A3A8B4><CABB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vector<vector<double>> vvFr; // <20>ʻ<EFBFBD><CABB><EFBFBD>Ӧ<EFBFBD><D3A6>Ƶ<EFBFBD><C6B5>
GetFrDs(prhs[1], vvDs, vvFr);
int numThread = 1; // <20>Ƿ<EFBFBD><C7B7><EFBFBD>ӡ<EFBFBD><D3A1>Ϣ, 1<><31>ӡ<EFBFBD><D3A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2>2<EFBFBD><32>ӡ<EFBFBD><D3A1>ϸ<EFBFBD><CFB8>Ϣ
if (nrhs > 2) {
double* pData = (double*)mxGetData(prhs[2]);
numThread = (int)pData[0];
if (numThread < 1) numThread = 1;
}
int flagPrint = 0; // <20>Ƿ<EFBFBD><C7B7><EFBFBD>ӡ<EFBFBD><D3A1>Ϣ, 1<><31>ӡ<EFBFBD><D3A1><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2>2<EFBFBD><32>ӡ<EFBFBD><D3A1>ϸ<EFBFBD><CFB8>Ϣ
if (nrhs > 3) {
double* pData = (double*)mxGetData(prhs[3]);
flagPrint = (int)pData[0];
}
finish = clock();
if (flagPrint == 2) cout << "Load data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
2023-10-05 10:38:21 +08:00
/* <20><>ժҪ<D5AA><D2AA>Ϣ<EFBFBD>ָ<EFBFBD><D6B8><EFBFBD>һ<EFBFBD><D2BB>һ<EFBFBD><D2BB><EFBFBD>Ĵʻ<C4B4> */
mid = clock();
2023-10-05 10:38:21 +08:00
unordered_set<char> usWordChars; // <20><><EFBFBD><EFBFBD><EFBFBD>ɵ<EFBFBD><C9B5>ʵ<EFBFBD><CAB5>ַ<EFBFBD><D6B7><EFBFBD>Ҫ<EFBFBD><D2AA>Ҫ<EFBFBD><D2AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֣<EFBFBD>ԭ<EFBFBD><D4AD>matlab<61><62><EFBFBD><EFBFBD>ȡ<EFBFBD><C8A1><EFBFBD><EFBFBD><EFBFBD>ֵ<EFBFBD>
for (int i = 65; i <= 90; i++) usWordChars.insert(char(i)); // A - Z
for (int i = 97; i <= 122; i++) usWordChars.insert(char(i)); // a - z
for (int i = 48; i <= 57; i++) usWordChars.insert(char(i)); // 0 - 9
usWordChars.insert('/'); usWordChars.insert('+'); usWordChars.insert('-');
vector<vector<string> > vvWordMtx(vAbstract.size()); // <20><>ʼ<EFBFBD><CABC>СΪ<D0A1><CEAA><EFBFBD>µĸ<C2B5><C4B8><EFBFBD>
vector<unordered_set<string> > vusAbsWord(vAbstract.size()); // <20><>ÿƪ<C3BF><C6AA><EFBFBD><EFBFBD>ժҪ<D5AA>ĵ<EFBFBD><C4B5>ʷ<EFBFBD><CAB7><EFBFBD>hash<73><68>
for (int i = 0; i < vAbstract.size(); i++) {
auto& strAbs = vAbstract[i];
// <20><><EFBFBD><EFBFBD>ժҪ<D5AA>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>ÿһ<C3BF><D2BB><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>ȡ<EFBFBD><C8A1>ÿһ<C3BF><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vector<string>& vWord = vvWordMtx[i];
if (strAbs.size() == 0) continue; // ժҪ<D5AA><D2AA>ϢΪ<CFA2>գ<EFBFBD><D5A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><E3B2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
int wordStartPos = 0;
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
wordStartPos++;
for (int curPos = wordStartPos + 1; curPos < strAbs.size(); ++curPos) {
if (usWordChars.find(strAbs[curPos]) == usWordChars.end()) { // <20>ҵ<EFBFBD><D2B5>˷ָ<CBB7><D6B8><EFBFBD>
vWord.push_back(strAbs.substr(wordStartPos, curPos - wordStartPos));
wordStartPos = curPos + 1; // <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ʼλ<CABC><CEBB>
while (wordStartPos < strAbs.size() && usWordChars.find(strAbs[wordStartPos]) == usWordChars.end())
wordStartPos++;
curPos = wordStartPos; // ѭ<><D1AD><EFBFBD><EFBFBD><EFBFBD>Զ<EFBFBD><D4B6><EFBFBD>1
}
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ժҪ֮<D2AA><D6AE><EFBFBD><EFBFBD>ÿ<EFBFBD><C3BF><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>hash<73><68>
for (auto& word : vWord) {
string upWord(word);
transform(upWord.begin(), upWord.end(), upWord.begin(), ::toupper);
vusAbsWord[i].insert(upWord);
}
}
finish = clock();
if (flagPrint == 2) cout << "Split abstract time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
2023-10-05 10:38:21 +08:00
// <20><><EFBFBD>Ž<EFBFBD><C5BD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һά<D2BB><CEAC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ŷ<EFBFBD>ά<EFBFBD><CEAC><EFBFBD><EFBFBD>
mid = clock();
2023-10-05 10:38:21 +08:00
vector<double> hs;
// vector<double> hr;
2023-10-05 10:38:21 +08:00
const int numLiterature = vusAbsWord.size(); // pubmed <20>ļ<EFBFBD><C4BC>а<EFBFBD><D0B0><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
const int numGroup = vvDs.size(); // ds<64><73><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
hs.resize(numGroup * numLiterature);
// hr.resize(numLiterature * numGroup);
// <20><><EFBFBD><EFBFBD>, û<>м<EFBFBD><D0BC><EFBFBD>hr
ThreadPool thPool(numThread);
for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) {
TPEntropy tp = { &vvDs[groupIdx], &vvFr[groupIdx], &vusAbsWord, &hs[groupIdx * numLiterature] };
thPool.enqueue(ThreadCalcEntropy, tp);
2023-10-05 10:38:21 +08:00
}
thPool.~ThreadPool();
// // <20><><EFBFBD><EFBFBD>
// for (int groupIdx = 0; groupIdx < numGroup; ++groupIdx) { // <20><><EFBFBD><EFBFBD>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD><EFBFBD>е<EFBFBD>ÿһ<C3BF><D2BB>
// vector<string>& vDs = vvDs[groupIdx]; // <20><>һ<EFBFBD><D2BB>ds
// vector<double>& vFr = vvFr[groupIdx]; // frequency
// const int numWord = vDs.size(); // <20><>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>а<EFBFBD><D0B0><EFBFBD><EFBFBD>ĵ<EFBFBD><C4B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// vector<vector<int> > vX(numLiterature, vector<int>(numWord, 0));
// // <20><><EFBFBD><EFBFBD>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD><EFBFBD>еĴ<D0B5><C4B4><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>pubmedժҪ<D5AA>Ĵ<EFBFBD><C4B4><EFBFBD><EFBFBD><EFBFBD>
// for (int i = 0; i < numLiterature; ++i) {
// for (int j = 0; j < numWord; ++j) {
// if (vusAbsWord[i].find(vDs[j]) != vusAbsWord[i].end()) { // <20><>һ<EFBFBD><EFBFBD><E9B5A5><EFBFBD>е<EFBFBD>j<EFBFBD><6A><EFBFBD><EFBFBD>λ<EFBFBD>õĵ<C3B5><C4B5><EFBFBD><EFBFBD>ڵ<EFBFBD>i<EFBFBD><69><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>г<EFBFBD><D0B3>ֹ<EFBFBD>
// vX[i][j] = 1;
// }
// }
// }
//
// // <20>Ҵʻ<D2B4><CABB><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5>
// double maxFr = *max_element(vFr.begin(), vFr.end());
// // <20><>fr<66><72><EFBFBD><EFBFBD>ֵ<EFBFBD><EFBFBD><E6B7B6><EFBFBD><EFBFBD><EFBFBD><EFBFBD>0<EFBFBD><30>0.368<EFBFBD><EFBFBD>֮<EFBFBD><EFBFBD>
// const double normalMax = 0.368;
// for (auto& frVal : vFr) frVal = frVal * normalMax / maxFr;
// maxFr = normalMax;
// // <20><>ÿ<EFBFBD><C3BF>֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD>ÿһ<C3BF><D2BB><EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϣ<EFBFBD><CFA2>
// for (int i = 0; i < numLiterature; ++i) {
// for (int j = 0; j < numWord; ++j) {
// if (vX[i][j] == 1) {
// hs[Get1DIndex(numLiterature, groupIdx, i)] -= vFr[j] * log2(vFr[j]);
// }
// }
// }
//
// // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>ʻ<EFBFBD><CABB><EFBFBD><EFBFBD>ڵ<EFBFBD><DAB5><EFBFBD><EFBFBD><EFBFBD>λ<EFBFBD><CEBB>
// vector<int> vMaxPos;
// int idx = 0;
// for_each(vFr.begin(), vFr.end(), [&idx, maxFr, &vMaxPos](double val) {
// if (val == maxFr) vMaxPos.push_back(idx);
// idx++;
// });
//
// for (int i = 0; i < numLiterature; ++i) {
// int cumulateX = 0; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ƶ<EFBFBD>ʻ<CABB><E3B4A6><78><D6B5><EFBFBD>ۼӽ<DBBC><D3BD><EFBFBD>
// for (int j = 0; j < vMaxPos.size(); ++j) cumulateX += vX[i][vMaxPos[j]];
// if (cumulateX == vMaxPos.size()) { // <20><><EFBFBD><EFBFBD>Ƶ<EFBFBD><C6B5><EFBFBD><EFBFBD><EFBFBD>ߵĴʻ<CABB><E3B6BC><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// hr[Get1DIndex(numGroup, i, groupIdx)] = 1; // Ӧ<><D3A6><EFBFBD>DZ<EFBFBD>ʾ֪ʶ<D6AA><CAB6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD><EFBFBD><EFBFBD>ݸ<EFBFBD><DDB8><EFBFBD>ƪ<EFBFBD><C6AA><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ԱȽϸ<C8BD>
// }
// }
// }
finish = clock();
if (flagPrint == 2) cout << "Calc entropy time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
2023-10-05 10:38:21 +08:00
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><EFBFBD><EBB7B5>ֵ */
mid = clock();
2023-10-05 10:38:21 +08:00
if (nlhs > 0) {
int datasize = numGroup * numLiterature;
vector<double> vData(datasize);
for (int i = 0; i < numGroup; i++) for (int j = 0; j < numLiterature; j++)
vData[j * numGroup + i] = hs[i * numLiterature + j];
plhs[0] = writeToMatDouble(vData.data(), numGroup, numLiterature);
}
if (nlhs > 1) { // <20><>wsд<73><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
plhs[1] = writeToMatString2DCell(vvWordMtx);
2023-10-05 10:38:21 +08:00
}
finish = clock();
if (flagPrint == 2) cout << "Write back data time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
finish = clock();
if(flagPrint) cout << "CalcEntropy Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
2023-10-05 10:38:21 +08:00
}
/* <20><>main<69><6E><EFBFBD>Ե<EFBFBD><D4B5><EFBFBD> */
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
2023-10-05 10:38:21 +08:00
mexFunction(nlhs, plhs, nrhs, prhs);
}