完成pubmed txt文件的处理,将cerr改成cout,因为这样在matlab里调用的时候,可以在命令行窗口显示错误信息
This commit is contained in:
parent
efac589b58
commit
ae3fbe017d
|
|
@ -26,7 +26,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
|
||||||
|
|
||||||
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
||||||
if (pMatFile == nullptr) {
|
if (pMatFile == nullptr) {
|
||||||
cerr << "filePath is error!" << endl;
|
cout << "filePath is error!" << endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
||||||
|
|
@ -38,7 +38,7 @@ bool ReadMtxString(const string& filePath, const string& mtxName,
|
||||||
for (int j = 0; j < colNum; ++j) {
|
for (int j = 0; j < colNum; ++j) {
|
||||||
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
pCell = mxGetCell(pMxArray, j * rowNum + i);
|
||||||
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
if (mxGetString(pCell, strBuf, STRING_BUF_SIZE) != 0) {
|
||||||
cerr << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
cout << "String is too large to fit in the buffer! " << i + 1 << '\t' << j + 1 << endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
vStr[i * colNum + j] = strBuf;
|
vStr[i * colNum + j] = strBuf;
|
||||||
|
|
@ -57,7 +57,7 @@ T* ReadMtxDouble(const string& filePath, const string& mtxName, int* pRowNum, in
|
||||||
double* matData;
|
double* matData;
|
||||||
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
pMatFile = matOpen(filePath.c_str(), "r"); //打开.mat文件
|
||||||
if (pMatFile == nullptr) {
|
if (pMatFile == nullptr) {
|
||||||
cerr << "filePath is error!" << endl;
|
cout << "filePath is error!" << endl;
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
pMxArray = matGetVariable(pMatFile, mtxName.c_str()); //获取.mat文件里面名为matrixName的矩阵
|
||||||
|
|
@ -90,7 +90,7 @@ bool SaveMtxDouble(T* src, MATFile* pMatFile, string matrixName, int rowNum, int
|
||||||
// memset(mtxData, 0, datasize * sizeof(double));
|
// memset(mtxData, 0, datasize * sizeof(double));
|
||||||
if (pMatFile == nullptr)
|
if (pMatFile == nullptr)
|
||||||
{
|
{
|
||||||
cerr << "mat file pointer is error!" << endl;
|
cout << "mat file pointer is error!" << endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < rowNum; i++)
|
for (int i = 0; i < rowNum; i++)
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,7 @@ int main(int argc, const char** argv) {
|
||||||
clock_t begin, finish;
|
clock_t begin, finish;
|
||||||
begin = clock();
|
begin = clock();
|
||||||
if (argc < 2) {
|
if (argc < 2) {
|
||||||
cerr << "This program take at least 1 arguments(CMD; [Options])!" << endl;
|
cout << "This program take at least 1 arguments(CMD; [Options])!" << endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -19,24 +19,52 @@ using namespace std;
|
||||||
|
|
||||||
/* 将结果写入mat文件 */
|
/* 将结果写入mat文件 */
|
||||||
/* 将数据写入mat文件中,用给定的名称命名 */
|
/* 将数据写入mat文件中,用给定的名称命名 */
|
||||||
bool SavePubmed(const string& matPath,
|
bool SavePubmed(const string& matPath,
|
||||||
const vector<string> &vTgName,
|
const vector<string>& vTgName,
|
||||||
const vector<unordered_map<string, string> >& vumPaperTagVal)
|
vector<unordered_map<string, string> >& vumPaperTagVal)
|
||||||
{
|
{
|
||||||
MATFile* pMatFile = matOpen(matPath.c_str(), "r"); //打开.mat文件
|
MATFile* pMatFile = matOpen(matPath.c_str(), "w"); //打开.mat文件
|
||||||
if (pMatFile == nullptr) {
|
if (pMatFile == nullptr) {
|
||||||
cerr << "filePath is error! " << matPath << endl;
|
cout << "filePath is error! " << matPath << endl;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
vector<const char*> vTgChars;
|
vector<const char*> vTgChars;
|
||||||
for (auto strTg : vTgName) {
|
for (auto& strTg : vTgName) {
|
||||||
vTgChars.push_back(strTg.c_str());
|
vTgChars.push_back(strTg.c_str());
|
||||||
}
|
}
|
||||||
|
|
||||||
// 创建结构体数据
|
// 创建结构体数据
|
||||||
mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data());
|
mxArray* mxStruct = mxCreateStructMatrix(1, 1, vTgName.size(), vTgChars.data());
|
||||||
|
// 创建cell matrix
|
||||||
|
unordered_map<string, mxArray*> ummxCellMtx;
|
||||||
|
for (auto & tgName : vTgName) {
|
||||||
|
ummxCellMtx[tgName] = mxCreateCellMatrix(1, vumPaperTagVal.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
// 遍历每一篇文章
|
||||||
|
for (int i = 0; i < vumPaperTagVal.size(); ++i) {
|
||||||
|
auto& umTagVal = vumPaperTagVal[i];
|
||||||
|
// 遍历文章的每一个tag
|
||||||
|
for (auto& tgName : vTgName) {
|
||||||
|
mxArray* mxStr = mxCreateString(umTagVal[tgName].c_str());
|
||||||
|
mxArray* pMxArr = ummxCellMtx[tgName];
|
||||||
|
mxSetCell(pMxArr, i, mxStr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将cell matrix赋值给struct matrix
|
||||||
|
for (auto& tgName : vTgName) {
|
||||||
|
mxArray* pMxArr = ummxCellMtx[tgName];
|
||||||
|
mxSetField(mxStruct, 0, tgName.c_str(), pMxArr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 将结构体写入mat,并命名为Tx
|
||||||
|
matPutVariable(pMatFile, "Tx", mxStruct);
|
||||||
|
// 将abstract信息写入mat,并命名为abs1
|
||||||
|
matPutVariable(pMatFile, "abs1", ummxCellMtx["AB"]);
|
||||||
|
|
||||||
|
matClose(pMatFile);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -129,7 +157,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
||||||
vumPaperTagVal.push_back(umTagContent);
|
vumPaperTagVal.push_back(umTagContent);
|
||||||
}
|
}
|
||||||
|
|
||||||
cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||||
|
|
||||||
/* 去除没有摘要的文章 */
|
/* 去除没有摘要的文章 */
|
||||||
const string abstractTag = "AB";
|
const string abstractTag = "AB";
|
||||||
|
|
@ -147,7 +175,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
||||||
const string pmidTag = "PMID";
|
const string pmidTag = "PMID";
|
||||||
for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) {
|
for (auto itr = vumPaperTagVal.begin(); itr != vumPaperTagVal.end(); ) {
|
||||||
if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) {
|
if (umPMID.find((*itr)[pmidTag]) != umPMID.end()) {
|
||||||
cout << "duplicate " << (*itr)[pmidTag] << endl;
|
// out << "duplicate " << (*itr)[pmidTag] << endl;
|
||||||
itr = vumPaperTagVal.erase(itr);
|
itr = vumPaperTagVal.erase(itr);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
|
@ -172,7 +200,7 @@ void ProcessPubmedTxt(int argc, const char** argv) {
|
||||||
testOfs << vumPaperTagVal[i][abstractTag] << endl;
|
testOfs << vumPaperTagVal[i][abstractTag] << endl;
|
||||||
}
|
}
|
||||||
testOfs.close();
|
testOfs.close();
|
||||||
cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
// cout << "文件个数:" << vumPaperTagVal.size() << endl;
|
||||||
// for (auto num : vPaperStartIdx) cout << num << endl;
|
// for (auto num : vPaperStartIdx) cout << num << endl;
|
||||||
ifsPubmedTxt.close();
|
ifsPubmedTxt.close();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -183,7 +183,7 @@ void ThreadProcessData(const ThreadParam& param) {
|
||||||
int main(int argc, const char** argv) {
|
int main(int argc, const char** argv) {
|
||||||
|
|
||||||
if (argc != 5) {
|
if (argc != 5) {
|
||||||
cerr << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
|
cout << "This program should take 4 arguments(1.parrent Dir; 2. mat file suffix; 3. out mat filename; 4. thread number)!" << endl;
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
string parrentDir(argv[1]); // 知识颗粒的父目录名称
|
string parrentDir(argv[1]); // 知识颗粒的父目录名称
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue