twirls/RandSim/rand_sim.cpp

185 lines
4.8 KiB
C++
Raw Permalink Normal View History

#include <iostream>
#include <fstream>
#include <algorithm>
#include <random>
#include <unordered_map>
#include <omp.h>
#include <time.h>
#include <string>
#ifdef _WIN32
#include <io.h>
#include <process.h>
#define F_OK 0
#else
#include <unistd.h>
#endif
#include "rand_sim.h"
using namespace std;
void txtReaderVvs(vector<vector<string>>& vecVecStr, string cf) {
filebuf fb;
string fileName = cf;
if (fb.open(fileName.c_str(), ios::in) == NULL) {
cout << "error" << endl;
}
istream is(&fb);
string lineInfo;
while (getline(is, lineInfo)) {
int i = 0;
vector<string> vecStr;
string tmp;
while (i < lineInfo.length()) {
while (i < lineInfo.length() && lineInfo[i] != ' ') {
tmp += lineInfo[i++];
}
if (!tmp.empty()) {
transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
vecStr.push_back(tmp);
}
tmp.clear();
++i;
}
vecVecStr.push_back(vecStr);
}
fb.close();
}
template <class AUTO>
void txtWriterVvs(vector<vector<AUTO>>& vecVecStr, string cf) {
ofstream os;
string fileName = cf;
os.open(fileName, ios::out);
for (auto vecStr : vecVecStr) {
for (auto s : vecStr)
os << s << " ";
os << "" << endl;
}
}
void txtReaderVs(vector<string>& vecStr, string cf) {
filebuf fb;
string fileName = cf;
if (fb.open(fileName.c_str(), ios::in) == NULL) {
cout << "error" << endl;
}
istream is(&fb);
string lineInfo;
while (getline(is, lineInfo)) {
transform(lineInfo.begin(), lineInfo.end(), lineInfo.begin(), ::tolower);
vecStr.push_back(lineInfo);
}
fb.close();
}
void txtWriterVs(vector<string>& vecStr, string cf) {
ofstream os;
string fileName = cf;
os.open(fileName, ios::out);
for (auto s : vecStr) {
os << s << endl;
}
}
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
int main(int argc, char** argv) {
//input : dicr, wd2, wd1s;
vector<string> dicr;
vector<vector<string>> wd2;
vector<vector<string>> wd1s;
string dicrName = "F:\\myWork\\20191129\\dicr.txt";
string wd2Name = "F:\\myWork\\20191129\\wd2.txt";
string wd1sName = "F:\\myWork\\20191129\\wd1s.txt";
string ZrName = "F:\\myWork\\20191129\\Zr.txt";
int loopNum;
if (argc != 6) {
cout << "Unexpected count of input arguments! Please input paths of dicr, wd2, wd1s, Zr and loop num! eg: F:\\myWork\\dicr.txt or 1000" << endl;
return -1;
}
dicrName = argv[1];
wd2Name = argv[2];
wd1sName = argv[3];
ZrName = argv[4];
loopNum = atoi(argv[5]);
if (access(dicrName.c_str(), F_OK) == -1) {
cout << "Invalid dicr file path! Please input a right path!" << endl;
return -1;
}
if (access(wd2Name.c_str(), F_OK) == -1) {
cout << "Invalid wd2 file path! Please input a right path!" << endl;
return -1;
}
if (access(wd1sName.c_str(), F_OK) == -1) {
cout << "Invalid wd1s file path! Please input a right path!" << endl;
return -1;
}
txtReaderVs(dicr, dicrName);
txtReaderVvs(wd2, wd2Name);
txtReaderVvs(wd1s, wd1sName);
// cout<<"/************* Input file read finished! ***************/"<<endl;
//cout<<dicr.size()<<" "<<wd2.size()<<" "<<wd1s.size()<<endl;
//for(int i=0;i<50;++i)
// cout<<wd1s[i].size()<<endl;
unordered_map<string, int> dicrHashMap;
for (int i = 0;i < dicr.size();++i)
dicrHashMap[dicr[i]] = i;
//output : Zr;
vector<vector<int>> Zr(loopNum, vector<int>(dicr.size(), 0));
vector<int> randNums(wd2.size(), 0);
for (int i = 0; i < wd2.size(); ++i) {
randNums[i] = i;
}
//cout<<"12345"<<endl;
clock_t start, end;
start = clock();
//cout<<dicr.size()<<endl;
//cout<<wd2.size()<<" "<<wd2[0].size()<<endl;
//cout<<wd1s.size()<<" "<<wd1s[0].size()<<endl;
// #pragma omp parallel for
for (int i = 0; i < loopNum; ++i) {
clock_t begin = clock();
//#pragma omp atomic
/*vector<int> randNums(wd2.size(),0);
for(int i = 0; i < wd2.size(); ++i){
randNums[i] = i;
}*/
std::random_device rd;
std::shuffle(randNums.begin(), randNums.end(), std::default_random_engine(rd()));
//cout<<i<<endl;
for (int j = 0; j < wd1s.size(); ++j) {
vector<string> wd2rCell = wd2[randNums[j]];
//cout<<"step 1"<<endl;
vector<int> f(dicr.size(), 0);
//cout<<"step 2"<<endl;
for (int k = 0;k < wd2rCell.size();++k) {
string tmp = wd2rCell[k];
//cout<<tmp<<" "<<k<<endl;
if (dicrHashMap.find(tmp) != dicrHashMap.end()) {
//cout<<dicrHashMap[tmp]<<endl;
f[dicrHashMap[tmp]] = 1;
}
}
//cout<<"step 3"<<endl;
for (int k = 0;k < dicr.size();++k) {
if (f[k] == 1)
Zr[i][k]++;
}
//cout<<"step 4"<<endl;
}
clock_t finish = clock();
double loopTime = (double)(finish - begin) / CLOCKS_PER_SEC;
// cout<<"Loop : "<<i<<" Time consuming :"<<loopTime<<"s"<<endl;
}
end = clock();
double endtime = (double)(end - start) / CLOCKS_PER_SEC;
cout<<"RandSim Total time:"<<endtime << " s" << endl;
txtWriterVvs(Zr, ZrName);
return 0;
}