完成pubmed txt文件的处理,将cerr改成cout,因为这样在matlab里调用的时候,可以在命令行窗口显示错误信息

This commit is contained in:
zzh 2023-09-19 23:36:04 +08:00
parent efac589b58
commit ae3fbe017d
4 changed files with 43 additions and 15 deletions

View File

@ -26,7 +26,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) {
cerr << "filePath is error!" << endl;
cout << "filePath is error!" << endl;
return false;
}
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
@ -38,7 +38,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i);
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
cerr << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
return false;
}
vStr[i * colNum + j] = strBuf;
@ -57,7 +57,7 @@ T* ReadMtxDouble(const string& filePath, const string& mtxName, int* pRowNum, in
double* matData;
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) {
cerr << "filePath is error!" << endl;
cout << "filePath is error!" << endl;
return nullptr;
}
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
@ -90,7 +90,7 @@ bool SaveMtxDouble(T* src, MATFile* pMatFile, string matrixName, int rowNum, int
// memset(mtxData, 0, datasize * sizeof(double));
if (pMatFile == nullptr)
{
cerr << "mat file pointer is error!" << endl;
cout << "mat file pointer is error!" << endl;
return false;
}
for (int i = 0; i < rowNum; i++)

View File

@ -17,7 +17,7 @@ int main(int argc, const char** argv) {
clock_t begin, finish;
begin = clock();
if (argc < 2) {
cerr << "This program take at least 1 arguments(CMD; [Options])!" << endl;
cout << "This program take at least 1 arguments(CMD; [Options])!" << endl;
return 1;
}

View File

@ -19,24 +19,52 @@ using namespace std;
/* 将结果写入mat文件 */
/* 将数据写入mat文件中用给定的名称命名 */
bool SavePubmed(const string& matPath,
const vector<string> &vTgName,
const vector<unordered_map<string, string> >& vumPaperTagVal)
bool SavePubmed(const string& matPath,
const vector<string>& vTgName,
vector<unordered_map<string, string> >& vumPaperTagVal)
{
MATFile* pMatFile = matOpen(matPath.c_str(), "r"); //打开.mat文件
MATFile* pMatFile = matOpen(matPath.c_str(), "w"); //打开.mat文件
if (pMatFile == nullptr) {
cerr << "filePath is error! " << matPath << endl;
cout << "filePath is error! " << matPath << endl;
return false;
}
vector<const char*> vTgChars;
for (auto strTg : vTgName) {
for (auto& strTg : vTgName) {
vTgChars.push_back(strTg.c_str());
}
// 创建结构体数据
mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data());
// 创建cell matrix
unordered_map<string, mxArray*> ummxCellMtx;
for (auto & tgName : vTgName) {
ummxCellMtx[tgName] = mxCreateCellMatrix(1, vumPaperTagVal.size());
}
// 遍历每一篇文章
for (int i = 0; i < vumPaperTagVal.size(); ++i) {
auto& umTagVal = vumPaperTagVal[i];
// 遍历文章的每一个tag
for (auto& tgName : vTgName) {
mxArray* mxStr = mxCreateString(umTagVal[tgName].c_str());
mxArray* pMxArr = ummxCellMtx[tgName];
mxSetCell(pMxArr, i, mxStr);
}
}
// 将cell matrix赋值给struct matrix
for (auto& tgName : vTgName) {
mxArray* pMxArr = ummxCellMtx[tgName];
mxSetField(mxStruct, 0, tgName.c_str(), pMxArr);
}
// 将结构体写入mat并命名为Tx
matPutVariable(pMatFile, "Tx", mxStruct);
// 将abstract信息写入mat并命名为abs1
matPutVariable(pMatFile, "abs1", ummxCellMtx["AB"]);
matClose(pMatFile);
return true;
}
@ -129,7 +157,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
vumPaperTagVal.push_back(umTagContent);
}
cout << "文件个数:" << vumPaperTagVal.size() << endl;
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
/* 去除没有摘要的文章 */
const string abstractTag = "AB";
@ -147,7 +175,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
const string pmidTag = "PMID";
for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) {
if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) {
cout << "duplicate " << (*itr)[pmidTag] << endl;
// out << "duplicate " << (*itr)[pmidTag] << endl;
itr = vumPaperTagVal.erase(itr);
}
else {
@ -172,7 +200,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
testOfs << vumPaperTagVal[i][abstractTag] << endl;
}
testOfs.close();
cout << "文件个数:" << vumPaperTagVal.size() << endl;
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
// for (auto num : vPaperStartIdx) cout << num << endl;
ifsPubmedTxt.close();

View File

@ -183,7 +183,7 @@ void ThreadProcessData(const ThreadParam& param) {
int main(int argc, const char** argv) {
if (argc != 5) {
cerr << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
cout << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
return 1;
}
string parrentDir(argv[1]); // 知识颗粒的父目录名称