完成pubmed txt文件的处理,将cerr改成cout,因为这样在matlab里调用的时候,可以在命令行窗口显示错误信息

This commit is contained in:
zzh 2023-09-19 23:36:04 +08:00
parent efac589b58
commit ae3fbe017d
4 changed files with 43 additions and 15 deletions

View File

@ -26,7 +26,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件 pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) { if (pMatFile == nullptr) {
cerr << "filePath is error!" << endl; cout << "filePath is error!" << endl;
return false; return false;
} }
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵 pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
@ -38,7 +38,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
for (int j = 0; j < colNum; ++j) { for (int j = 0; j < colNum; ++j) {
pCell = mxGetCell(pMxArray, j * rowNum + i); pCell = mxGetCell(pMxArray, j * rowNum + i);
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) { if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
cerr << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl; cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
return false; return false;
} }
vStr[i * colNum + j] = strBuf; vStr[i * colNum + j] = strBuf;
@ -57,7 +57,7 @@ T* ReadMtxDouble(const string& filePath, const string& mtxName, int* pRowNum, in
double* matData; double* matData;
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件 pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
if (pMatFile == nullptr) { if (pMatFile == nullptr) {
cerr << "filePath is error!" << endl; cout << "filePath is error!" << endl;
return nullptr; return nullptr;
} }
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵 pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
@ -90,7 +90,7 @@ bool SaveMtxDouble(T* src, MATFile* pMatFile, string matrixName, int rowNum, int
// memset(mtxData, 0, datasize * sizeof(double)); // memset(mtxData, 0, datasize * sizeof(double));
if (pMatFile == nullptr) if (pMatFile == nullptr)
{ {
cerr << "mat file pointer is error!" << endl; cout << "mat file pointer is error!" << endl;
return false; return false;
} }
for (int i = 0; i < rowNum; i++) for (int i = 0; i < rowNum; i++)

View File

@ -17,7 +17,7 @@ int main(int argc, const char** argv) {
clock_t begin, finish; clock_t begin, finish;
begin = clock(); begin = clock();
if (argc < 2) { if (argc < 2) {
cerr << "This program take at least 1 arguments(CMD; [Options])!" << endl; cout << "This program take at least 1 arguments(CMD; [Options])!" << endl;
return 1; return 1;
} }

View File

@ -19,24 +19,52 @@ using namespace std;
/* 将结果写入mat文件 */ /* 将结果写入mat文件 */
/* 将数据写入mat文件中用给定的名称命名 */ /* 将数据写入mat文件中用给定的名称命名 */
bool SavePubmed(const string& matPath, bool SavePubmed(const string& matPath,
const vector<string> &vTgName, const vector<string>& vTgName,
const vector<unordered_map<string, string> >& vumPaperTagVal) vector<unordered_map<string, string> >& vumPaperTagVal)
{ {
MATFile* pMatFile = matOpen(matPath.c_str(), "r"); //打开.mat文件 MATFile* pMatFile = matOpen(matPath.c_str(), "w"); //打开.mat文件
if (pMatFile == nullptr) { if (pMatFile == nullptr) {
cerr << "filePath is error! " << matPath << endl; cout << "filePath is error! " << matPath << endl;
return false; return false;
} }
vector<const char*> vTgChars; vector<const char*> vTgChars;
for (auto strTg : vTgName) { for (auto& strTg : vTgName) {
vTgChars.push_back(strTg.c_str()); vTgChars.push_back(strTg.c_str());
} }
// 创建结构体数据 // 创建结构体数据
mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data()); mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data());
// 创建cell matrix
unordered_map<string, mxArray*> ummxCellMtx;
for (auto & tgName : vTgName) {
ummxCellMtx[tgName] = mxCreateCellMatrix(1, vumPaperTagVal.size());
}
// 遍历每一篇文章
for (int i = 0; i < vumPaperTagVal.size(); ++i) {
auto& umTagVal = vumPaperTagVal[i];
// 遍历文章的每一个tag
for (auto& tgName : vTgName) {
mxArray* mxStr = mxCreateString(umTagVal[tgName].c_str());
mxArray* pMxArr = ummxCellMtx[tgName];
mxSetCell(pMxArr, i, mxStr);
}
}
// 将cell matrix赋值给struct matrix
for (auto& tgName : vTgName) {
mxArray* pMxArr = ummxCellMtx[tgName];
mxSetField(mxStruct, 0, tgName.c_str(), pMxArr);
}
// 将结构体写入mat并命名为Tx
matPutVariable(pMatFile, "Tx", mxStruct);
// 将abstract信息写入mat并命名为abs1
matPutVariable(pMatFile, "abs1", ummxCellMtx["AB"]);
matClose(pMatFile);
return true; return true;
} }
@ -129,7 +157,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
vumPaperTagVal.push_back(umTagContent); vumPaperTagVal.push_back(umTagContent);
} }
cout << "文件个数:" << vumPaperTagVal.size() << endl; // cout << "文件个数:" << vumPaperTagVal.size() << endl;
/* 去除没有摘要的文章 */ /* 去除没有摘要的文章 */
const string abstractTag = "AB"; const string abstractTag = "AB";
@ -147,7 +175,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
const string pmidTag = "PMID"; const string pmidTag = "PMID";
for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) { for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) {
if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) { if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) {
cout << "duplicate " << (*itr)[pmidTag] << endl; // out << "duplicate " << (*itr)[pmidTag] << endl;
itr = vumPaperTagVal.erase(itr); itr = vumPaperTagVal.erase(itr);
} }
else { else {
@ -172,7 +200,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
testOfs << vumPaperTagVal[i][abstractTag] << endl; testOfs << vumPaperTagVal[i][abstractTag] << endl;
} }
testOfs.close(); testOfs.close();
cout << "文件个数:" << vumPaperTagVal.size() << endl; // cout << "文件个数:" << vumPaperTagVal.size() << endl;
// for (auto num : vPaperStartIdx) cout << num << endl; // for (auto num : vPaperStartIdx) cout << num << endl;
ifsPubmedTxt.close(); ifsPubmedTxt.close();

View File

@ -183,7 +183,7 @@ void ThreadProcessData(const ThreadParam& param) {
int main(int argc, const char** argv) { int main(int argc, const char** argv) {
if (argc != 5) { if (argc != 5) {
cerr << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl; cout << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
return 1; return 1;
} }
string parrentDir(argv[1]); // 知识颗粒的父目录名称 string parrentDir(argv[1]); // 知识颗粒的父目录名称