185 lines
4.7 KiB
C++
185 lines
4.7 KiB
C++
#include <iostream>
|
|
#include <fstream>
|
|
#include <algorithm>
|
|
#include <random>
|
|
#include <unordered_map>
|
|
#include <omp.h>
|
|
#include <time.h>
|
|
#include <string>
|
|
#ifdef _WIN32
|
|
#include <io.h>
|
|
#include <process.h>
|
|
#define F_OK 0
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
#include "rand_sim.h"
|
|
using namespace std;
|
|
|
|
void txtReaderVvs(vector<vector<string>>& vecVecStr, string cf) {
|
|
filebuf fb;
|
|
string fileName = cf;
|
|
if (fb.open(fileName.c_str(), ios::in) == NULL) {
|
|
cout << "error" << endl;
|
|
}
|
|
istream is(&fb);
|
|
string lineInfo;
|
|
while (getline(is, lineInfo)) {
|
|
int i = 0;
|
|
vector<string> vecStr;
|
|
string tmp;
|
|
while (i < lineInfo.length()) {
|
|
while (i < lineInfo.length() && lineInfo[i] != ' ') {
|
|
tmp += lineInfo[i++];
|
|
}
|
|
if (!tmp.empty()) {
|
|
transform(tmp.begin(), tmp.end(), tmp.begin(), ::tolower);
|
|
vecStr.push_back(tmp);
|
|
}
|
|
tmp.clear();
|
|
++i;
|
|
}
|
|
vecVecStr.push_back(vecStr);
|
|
}
|
|
fb.close();
|
|
}
|
|
template <class AUTO>
|
|
void txtWriterVvs(vector<vector<AUTO>>& vecVecStr, string cf) {
|
|
ofstream os;
|
|
string fileName = cf;
|
|
os.open(fileName, ios::out);
|
|
|
|
for (auto vecStr : vecVecStr) {
|
|
for (auto s : vecStr)
|
|
os << s << " ";
|
|
os << "" << endl;
|
|
}
|
|
}
|
|
|
|
void txtReaderVs(vector<string>& vecStr, string cf) {
|
|
filebuf fb;
|
|
string fileName = cf;
|
|
if (fb.open(fileName.c_str(), ios::in) == NULL) {
|
|
cout << "error" << endl;
|
|
}
|
|
istream is(&fb);
|
|
string lineInfo;
|
|
while (getline(is, lineInfo)) {
|
|
transform(lineInfo.begin(), lineInfo.end(), lineInfo.begin(), ::tolower);
|
|
vecStr.push_back(lineInfo);
|
|
}
|
|
fb.close();
|
|
}
|
|
void txtWriterVs(vector<string>& vecStr, string cf) {
|
|
ofstream os;
|
|
string fileName = cf;
|
|
os.open(fileName, ios::out);
|
|
|
|
for (auto s : vecStr) {
|
|
os << s << endl;
|
|
}
|
|
}
|
|
|
|
/* run this program using the console pauser or add your own getch, system("pause") or input loop */
|
|
int main(int argc, char** argv) {
|
|
//input : dicr, wd2, wd1s;
|
|
vector<string> dicr;
|
|
vector<vector<string>> wd2;
|
|
vector<vector<string>> wd1s;
|
|
|
|
string dicrName = "F:\\myWork\\20191129\\dicr.txt";
|
|
string wd2Name = "F:\\myWork\\20191129\\wd2.txt";
|
|
string wd1sName = "F:\\myWork\\20191129\\wd1s.txt";
|
|
string ZrName = "F:\\myWork\\20191129\\Zr.txt";
|
|
int loopNum;
|
|
|
|
if (argc != 6) {
|
|
cout << "Unexpected count of input arguments! Please input paths of dicr, wd2, wd1s, Zr and loop num! eg: F:\\myWork\\dicr.txt or 1000" << endl;
|
|
return -1;
|
|
}
|
|
|
|
dicrName = argv[1];
|
|
wd2Name = argv[2];
|
|
wd1sName = argv[3];
|
|
ZrName = argv[4];
|
|
loopNum = atoi(argv[5]);
|
|
|
|
if (access(dicrName.c_str(), F_OK) == -1) {
|
|
cout << "Invalid dicr file path! Please input a right path!" << endl;
|
|
return -1;
|
|
}
|
|
|
|
if (access(wd2Name.c_str(), F_OK) == -1) {
|
|
cout << "Invalid wd2 file path! Please input a right path!" << endl;
|
|
return -1;
|
|
}
|
|
|
|
if (access(wd1sName.c_str(), F_OK) == -1) {
|
|
cout << "Invalid wd1s file path! Please input a right path!" << endl;
|
|
return -1;
|
|
}
|
|
|
|
txtReaderVs(dicr, dicrName);
|
|
txtReaderVvs(wd2, wd2Name);
|
|
txtReaderVvs(wd1s, wd1sName);
|
|
// cout<<"/************* Input file read finished! ***************/"<<endl;
|
|
//cout<<dicr.size()<<" "<<wd2.size()<<" "<<wd1s.size()<<endl;
|
|
//for(int i=0;i<50;++i)
|
|
// cout<<wd1s[i].size()<<endl;
|
|
unordered_map<string, int> dicrHashMap;
|
|
for (int i = 0;i < dicr.size();++i)
|
|
dicrHashMap[dicr[i]] = i;
|
|
//output : Zr;
|
|
vector<vector<int>> Zr(loopNum, vector<int>(dicr.size(), 0));
|
|
vector<int> randNums(wd2.size(), 0);
|
|
for (int i = 0; i < wd2.size(); ++i) {
|
|
randNums[i] = i;
|
|
}
|
|
//cout<<"12345"<<endl;
|
|
clock_t start, end;
|
|
start = clock();
|
|
//cout<<dicr.size()<<endl;
|
|
//cout<<wd2.size()<<" "<<wd2[0].size()<<endl;
|
|
//cout<<wd1s.size()<<" "<<wd1s[0].size()<<endl;
|
|
//#pragma omp parallel for
|
|
for (int i = 0; i < loopNum; ++i) {
|
|
clock_t begin = clock();
|
|
//#pragma omp atomic
|
|
/*vector<int> randNums(wd2.size(),0);
|
|
for(int i = 0; i < wd2.size(); ++i){
|
|
randNums[i] = i;
|
|
}*/
|
|
std::random_device rd;
|
|
std::shuffle(randNums.begin(), randNums.end(), std::default_random_engine(rd()));
|
|
//cout<<i<<endl;
|
|
for (int j = 0; j < wd1s.size(); ++j) {
|
|
vector<string> wd2rCell = wd2[randNums[j]];
|
|
//cout<<"step 1"<<endl;
|
|
vector<int> f(dicr.size(), 0);
|
|
//cout<<"step 2"<<endl;
|
|
for (int k = 0;k < wd2rCell.size();++k) {
|
|
string tmp = wd2rCell[k];
|
|
//cout<<tmp<<" "<<k<<endl;
|
|
if (dicrHashMap.find(tmp) != dicrHashMap.end()) {
|
|
//cout<<dicrHashMap[tmp]<<endl;
|
|
f[dicrHashMap[tmp]] = 1;
|
|
}
|
|
}
|
|
//cout<<"step 3"<<endl;
|
|
for (int k = 0;k < dicr.size();++k) {
|
|
if (f[k] == 1)
|
|
Zr[i][k]++;
|
|
}
|
|
//cout<<"step 4"<<endl;
|
|
}
|
|
clock_t finish = clock();
|
|
double loopTime = (double)(finish - begin) / CLOCKS_PER_SEC;
|
|
//cout<<"Loop : "<<i<<" Time consuming :"<<loopTime<<"s"<<endl;
|
|
}
|
|
end = clock();
|
|
double endtime = (double)(end - start) / CLOCKS_PER_SEC;
|
|
//cout<<"Total time:"<<endtime<<endl;
|
|
txtWriterVvs(Zr, ZrName);
|
|
return 0;
|
|
}
|