twirls/MexFunc/ClusterRandSim.cpp

262 lines
7.0 KiB
C++
Raw Permalink Normal View History

#include <mex.h>
#include <mat.h>
#include <iostream>
#include <algorithm>
#include <string>
#include <unordered_set>
#include <ctime>
#include <vector>
#include <queue>
#include <memory>
#include <thread>
#include <mutex>
#include <condition_variable>
#include <future>
#include <functional>
#include <stdexcept>
#include <unordered_map>
#include <set>
#include <fstream>
#include <random>
#include <cmath>
#include <stdlib.h>
#include <limits.h>
#include <atomic>
using std::cout;
using std::endl;
using namespace std;
#define STRING_BUF_SIZE 204800
/* <20><>ȡһάdouble<6C><65><EFBFBD><EFBFBD> */
void Read1DDouble(const mxArray* pMxArray, vector<double>& vDat) {
int rowNum, colNum;
double* matData;
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
// cout << rowNum << " " << colNum << endl;
matData = (double*)mxGetData(pMxArray); //<2F><>ȡָ<C8A1><D6B8>
vDat.resize(rowNum * colNum);
for (int i = 0; i < vDat.size(); ++i) vDat[i] = matData[i];
}
/* <20><>ȡ<EFBFBD><C8A1>άdouble<6C><65><EFBFBD><EFBFBD> */
void Read2DDouble(const mxArray* pMxArray, vector<vector<double>>& vvDat) {
int rowNum, colNum;
double* matData;
rowNum = (int)mxGetM(pMxArray);
colNum = (int)mxGetN(pMxArray);
vvDat.resize(rowNum);
matData = (double*)mxGetData(pMxArray); //<2F><>ȡָ<C8A1><D6B8>
for (int i = 0; i < rowNum; ++i) {
vvDat[i].resize(colNum);
for (int j = 0; j < colNum; ++j) {
vvDat[i][j] = matData[j * rowNum + i];
}
}
}
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>д<EFBFBD><D0B4>mxArray, <20><>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD><EFBFBD>ķ<EFBFBD><C4B7><EFBFBD>ֵ
mxArray* writeToMatDouble(const double* data, int rowNum, int colNum) {
mxArray* pWriteArray = NULL;//matlab<61><62>ʽ<EFBFBD><CABD><EFBFBD><EFBFBD>
int len = rowNum * colNum;
//<2F><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB>rowNum*colNum<75>ľ<EFBFBD><C4BE><EFBFBD>
pWriteArray = mxCreateDoubleMatrix(rowNum, colNum, mxREAL);
//<2F><>data<74><61>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD>pWriteArrayָ<79><D6B8>
memcpy((void*)(mxGetPr(pWriteArray)), (void*)data, sizeof(double) * len);
return pWriteArray; // <20><>ֵ<EFBFBD><D6B5><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
}
#define START_IDX(tid, threadNum, arrLen) (arrLen) * (tid) / (threadNum)
#define END_IDX(tid, threadNum, arrLen) (arrLen) * (tid + 1) / (threadNum)
// <20>̲߳<DFB3><CCB2><EFBFBD>
struct TPRandSim {
vector<vector<double>>* pvvTr;
vector<int>* pvRandPos;
vector<double>* pvH;
vector<vector<double>>* pvvX;
int numPositive;
int tid;
int numThread;
};
// <20><><EFBFBD>߳<EFBFBD><DFB3><EFBFBD><EFBFBD>ں<EFBFBD><DABA><EFBFBD>
void ThreadRandSim(TPRandSim param) {
vector < vector<double>>& vvTr = *param.pvvTr;
vector<int>& vRandPos = *param.pvRandPos;
vector<vector<double>>& vvX = *param.pvvX;
vector<double>& vH = *param.pvH;
int numPositive = param.numPositive;
int rowNum = vvX.size();
int colNum = vvX[0].size();
int tid = param.tid;
int numThread = param.numThread;
clock_t begin = clock(), finish;
/* <20><><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3> */
std::random_device rd;
int startIdx = START_IDX(tid, numThread, vvTr.size());
int endIdx = END_IDX(tid, numThread, vvTr.size());
// cout << tid << '\t' << numThread << '\t' << startIdx << '\t' << endIdx << endl;
for (int idx = startIdx; idx < endIdx; ++idx) {
auto& vTr = vvTr[idx];
std::shuffle(vRandPos.begin(), vRandPos.end(), std::default_random_engine(rd()));
for (int i = 0; i < rowNum; ++i) {
int hRowIdx = vRandPos[i]; // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>֮<EFBFBD><D6AE><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (vH[hRowIdx] == 1) {
for (int j = 0; j < colNum; ++j) {
vTr[j] += vvX[i][j];
}
}
}
for (auto& val : vTr) val /= numPositive;
}
finish = clock();
cout << "Thread Random simulation time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}
/* <20><><EFBFBD>ں<EFBFBD><DABA><EFBFBD> */
/*
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵ
<EFBFBD><EFBFBD><EFBFBD>
1. x <EFBFBD><EFBFBD>ά<EFBFBD><EFBFBD><EFBFBD>ݣ<EFBFBD>double<EFBFBD><EFBFBD><EFBFBD>ͣ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD>ֵ<EFBFBD>ȣ<EFBFBD>ÿ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>г<EFBFBD><EFBFBD>ֵĴ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>5<EFBFBD><EFBFBD>
2. h <EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ϊ<EFBFBD><EFBFBD><EFBFBD>׸<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֵΪ1<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ڸ<EFBFBD>֪ʶ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ӧ<EFBFBD><EFBFBD><EFBFBD>ǣ<EFBFBD><EFBFBD><EFBFBD>Ϊ0<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
3. numThread
4. loopNum
<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vs z score,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ָ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һά
ps <EFBFBD><EFBFBD>vs<EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD>
*/
void mexFunction(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
if (nrhs < 2) {
cout << "At least 2 arguments should be given for this function!" << endl;
return;
}
clock_t begin = clock(), mid, finish;
vector<double> vH;
vector<vector<double>> vvX;
Read2DDouble(prhs[0], vvX);
Read1DDouble(prhs[1], vH);
int rowNum = vvX.size();
int colNum = vvX[0].size();
// cout << vH.size() << '\t' << vvX.size() << endl;
int numThread = 1;
int loopNum = 1000;
if (nrhs > 2) {
double* pNumThread = (double*)mxGetData(prhs[2]);
numThread = (int)pNumThread[0];
if (numThread < 1) numThread = 1;
}
if (nrhs > 3) {
double* pLoopNum = (double*)mxGetData(prhs[3]);
loopNum = (int)pLoopNum[0];
if (loopNum < 1000) loopNum = 1000;
}
finish = clock();
cout << "Load Data time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
// cout << numThread << '\t' << loopNum << endl;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ģ<EFBFBD><C4A3> */
mid = clock();
vector<double> vTs(colNum); // <20><>ʼ<EFBFBD><CABC><EFBFBD>ݣ<EFBFBD><DDA3><EFBFBD>¼vH<76><48>labelΪ1<CEAA><31><EFBFBD>е<EFBFBD><D0B5>о<EFBFBD>ֵ
int numPositive = 0;
for (int i = 0; i < rowNum; ++i) {
if (vH[i] == 1) {
++numPositive;
for (int j = 0; j < colNum; ++j) {
vTs[j] += vvX[i][j];
}
}
}
for (auto& val : vTs) val /= numPositive;
vector<vector<double>> vvTr(loopNum, vector<double>(colNum, 0)); // ģ<><C4A3><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
vector<vector<int>> vvRandPos(numThread, vector<int>(rowNum));
for (int i = 0; i < rowNum; ++i) {
for (auto& vRandPos : vvRandPos) {
vRandPos[i] = i;
}
}
vector<std::thread> vT;
for (int i = 0; i < numThread; ++i) {
TPRandSim tParam = { &vvTr, &vvRandPos[i], &vH, &vvX, numPositive, i, numThread };
vT.push_back(std::thread(ThreadRandSim, tParam));
}
for (auto& t : vT) t.join();
finish = clock();
cout << "Random simulation time: " << (double)(finish - mid) / CLOCKS_PER_SEC << " s" << endl;
/* <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
vector<double> vVs(colNum);
vector<double> vPs(colNum);
// <20><><EFBFBD>м<EFBFBD><D0BC><EFBFBD>ƽ<EFBFBD><C6BD>ֵ
vector<double> vMean(colNum);
vector<double> vStd(colNum);
for (int i = 0; i < vvTr.size(); ++i) {
for (int j = 0; j < vvTr[i].size(); ++j) {
vMean[j] += vvTr[i][j];
}
}
for (auto& val : vMean) { val /= loopNum; } // <20><>ֵ
for (int i = 0; i < vvTr.size(); ++i) {
for (int j = 0; j < vvTr[i].size(); ++j) {
const double diff = vvTr[i][j] - vMean[j];
vStd[j] += diff * diff;
}
}
for (auto& val : vStd) { val = sqrt(val / (loopNum - 1)); } // <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// <20><><EFBFBD><EFBFBD>vs
for (int i = 0; i < vVs.size(); ++i) {
vVs[i] = (vTs[i] - vMean[i]) / vStd[i];
}
// <20><><EFBFBD><EFBFBD>ps
vector<double> vSumGreater(colNum);
vector<double> vSumLess(colNum);
for (int i = 0; i < loopNum; ++i) {
for (int j = 0; j < colNum; ++j) {
if (vvTr[i][j] >= vTs[j]) vSumGreater[j] ++;
if (vvTr[i][j] <= vTs[j]) vSumLess[j] ++;
}
}
for (auto& val : vSumGreater) val /= loopNum;
for (auto& val : vSumLess) val /= loopNum;
for (int i = 0; i < colNum; ++i) {
vPs[i] = min(vSumGreater[i], vSumLess[i]);
}
// ofstream ofs("d:\\result_2.txt");
// for (int i = 0; i < colNum; ++i) {
// ofs << vVs[i] << '\t' << vPs[i] << endl;
// }
// ofs.close();
/* д<><D0B4><EFBFBD><EFBFBD><EFBFBD><EFBFBD> */
if (nlhs > 0) { // vs
plhs[0] = writeToMatDouble(vVs.data(), 1, vVs.size());
}
if (nlhs > 1) { // ps
plhs[1] = writeToMatDouble(vPs.data(), 1, vPs.size());
}
finish = clock();
cout << "Cluster Random simulation Total time: " << (double)(finish - begin) / CLOCKS_PER_SEC << " s" << endl;
}
// <20><>c++<2B><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
void mexFunctionWrap(int nlhs, mxArray* plhs[], int nrhs, const mxArray* prhs[]) {
return mexFunction(nlhs, plhs, nrhs, prhs);
}