完成pubmed txt文件的处理,将cerr改成cout,因为这样在matlab里调用的时候,可以在命令行窗口显示错误信息
This commit is contained in:
parent
efac589b58
commit
ae3fbe017d
|
|
@ -26,7 +26,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
|
|||
|
||||
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
||||
if (pMatFile == nullptr) {
|
||||
cerr << "filePath is error!" << endl;
|
||||
cout << "filePath is error!" << endl;
|
||||
return false;
|
||||
}
|
||||
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
||||
|
|
@ -38,7 +38,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
|
|||
for (int j = 0; j < colNum; ++j) {
|
||||
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
||||
cerr << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
||||
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
||||
return false;
|
||||
}
|
||||
vStr[i * colNum + j] = strBuf;
|
||||
|
|
@ -57,7 +57,7 @@ T* ReadMtxDouble(const string& filePath, const string& mtxName, int* pRowNum, in
|
|||
double* matData;
|
||||
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
||||
if (pMatFile == nullptr) {
|
||||
cerr << "filePath is error!" << endl;
|
||||
cout << "filePath is error!" << endl;
|
||||
return nullptr;
|
||||
}
|
||||
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
||||
|
|
@ -90,7 +90,7 @@ bool SaveMtxDouble(T* src, MATFile* pMatFile, string matrixName, int rowNum, int
|
|||
// memset(mtxData, 0, datasize * sizeof(double));
|
||||
if (pMatFile == nullptr)
|
||||
{
|
||||
cerr << "mat file pointer is error!" << endl;
|
||||
cout << "mat file pointer is error!" << endl;
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < rowNum; i++)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ int main(int argc, const char** argv) {
|
|||
clock_t begin, finish;
|
||||
begin = clock();
|
||||
if (argc < 2) {
|
||||
cerr << "This program take at least 1 arguments(CMD; [Options])!" << endl;
|
||||
cout << "This program take at least 1 arguments(CMD; [Options])!" << endl;
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,24 +19,52 @@ using namespace std;
|
|||
|
||||
/* 将结果写入mat文件 */
|
||||
/* 将数据写入mat文件中,用给定的名称命名 */
|
||||
bool SavePubmed(const string& matPath,
|
||||
const vector<string> &vTgName,
|
||||
const vector<unordered_map<string, string> >& vumPaperTagVal)
|
||||
bool SavePubmed(const string& matPath,
|
||||
const vector<string>& vTgName,
|
||||
vector<unordered_map<string, string> >& vumPaperTagVal)
|
||||
{
|
||||
MATFile* pMatFile = matOpen(matPath.c_str(), "r"); //打开.mat文件
|
||||
MATFile* pMatFile = matOpen(matPath.c_str(), "w"); //打开.mat文件
|
||||
if (pMatFile == nullptr) {
|
||||
cerr << "filePath is error! " << matPath << endl;
|
||||
cout << "filePath is error! " << matPath << endl;
|
||||
return false;
|
||||
}
|
||||
|
||||
vector<const char*> vTgChars;
|
||||
for (auto strTg : vTgName) {
|
||||
for (auto& strTg : vTgName) {
|
||||
vTgChars.push_back(strTg.c_str());
|
||||
}
|
||||
|
||||
// 创建结构体数据
|
||||
mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data());
|
||||
// 创建cell matrix
|
||||
unordered_map<string, mxArray*> ummxCellMtx;
|
||||
for (auto & tgName : vTgName) {
|
||||
ummxCellMtx[tgName] = mxCreateCellMatrix(1, vumPaperTagVal.size());
|
||||
}
|
||||
|
||||
// 遍历每一篇文章
|
||||
for (int i = 0; i < vumPaperTagVal.size(); ++i) {
|
||||
auto& umTagVal = vumPaperTagVal[i];
|
||||
// 遍历文章的每一个tag
|
||||
for (auto& tgName : vTgName) {
|
||||
mxArray* mxStr = mxCreateString(umTagVal[tgName].c_str());
|
||||
mxArray* pMxArr = ummxCellMtx[tgName];
|
||||
mxSetCell(pMxArr, i, mxStr);
|
||||
}
|
||||
}
|
||||
|
||||
// 将cell matrix赋值给struct matrix
|
||||
for (auto& tgName : vTgName) {
|
||||
mxArray* pMxArr = ummxCellMtx[tgName];
|
||||
mxSetField(mxStruct, 0, tgName.c_str(), pMxArr);
|
||||
}
|
||||
|
||||
// 将结构体写入mat,并命名为Tx
|
||||
matPutVariable(pMatFile, "Tx", mxStruct);
|
||||
// 将abstract信息写入mat,并命名为abs1
|
||||
matPutVariable(pMatFile, "abs1", ummxCellMtx["AB"]);
|
||||
|
||||
matClose(pMatFile);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
@ -129,7 +157,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
|||
vumPaperTagVal.push_back(umTagContent);
|
||||
}
|
||||
|
||||
cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||
|
||||
/* 去除没有摘要的文章 */
|
||||
const string abstractTag = "AB";
|
||||
|
|
@ -147,7 +175,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
|||
const string pmidTag = "PMID";
|
||||
for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) {
|
||||
if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) {
|
||||
cout << "duplicate " << (*itr)[pmidTag] << endl;
|
||||
// out << "duplicate " << (*itr)[pmidTag] << endl;
|
||||
itr = vumPaperTagVal.erase(itr);
|
||||
}
|
||||
else {
|
||||
|
|
@ -172,7 +200,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
|||
testOfs << vumPaperTagVal[i][abstractTag] << endl;
|
||||
}
|
||||
testOfs.close();
|
||||
cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||
// for (auto num : vPaperStartIdx) cout << num << endl;
|
||||
ifsPubmedTxt.close();
|
||||
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ void ThreadProcessData(const ThreadParam& param) {
|
|||
int main(int argc, const char** argv) {
|
||||
|
||||
if (argc != 5) {
|
||||
cerr << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
|
||||
cout << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
|
||||
return 1;
|
||||
}
|
||||
string parrentDir(argv[1]); // 知识颗粒的父目录名称
|
||||
|
|
|
|||
Loading…
Reference in New Issue