main.cpp
2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#include <cstdio>
#include <exception>
#include <fstream>
#include <iostream>
#include <string>
#include <vector>
#include <boost/regex.hpp>
#include <json/json.h>
using namespace std;
const char* DBLP_FILENAME = "test.json";
const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json";
const string COLUMN_DELIMITER = "||";
const string AUTHOR_DELIMITER = "&&";
int main() {
//init
Json::Value root;
Json::Reader reader;
ifstream dblp_paper, dblp_coauthor;
ofstream dblp_paper_out, dblp_coauthor_out;
try {
//1. dblp paper dataset
dblp_paper.open(DBLP_FILENAME);
dblp_paper_out.open((string(DBLP_FILENAME)+string(".out")).c_str());
if (!dblp_paper || !dblp_paper_out) {
throw exception("dblp paper file error");
}
printf("* complete open\n");
if (!reader.parse(dblp_paper, root)) {
throw exception("parse error");
}
printf("* complete parse\n");
Json::Value row;
Json::Value coauthors;
std::string paper_key;
std::vector<string> coauthor_list;
int year;
int count = 1;
for (auto it=root.begin();
it!=root.end();
++it) {
//Àüó¸®
row.clear();
coauthors.clear();
coauthor_list.clear();
//print
if (count%1000000 == 0) {
printf("* [%d]\n", count);
}
//row ´ÜÀ§·Î read
row = *it;
paper_key = row[0].asString();
coauthors = row[1];
for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) {
coauthor_list.push_back(coit->asString());
}
year = ((row[2].isNull())?-1:row[2].asInt());
//write
dblp_paper_out << paper_key << COLUMN_DELIMITER;
for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end(); ++auit) {
dblp_paper_out << (*auit);
if () {
dblp_paper_out << AUTHOR_DELIMITER;
}
}
dblp_paper_out << endl;
//ÈÄó¸®
++count;
}
printf("* complete convert dblp paper\n");
//2. dblp coauthorship dataset
//dblp_coauthor.open(DBLP_COAUTHOR_FILENAME);
}
catch (const exception& e) {
cerr << "Error: " << e.what() << endl;
return -1;
}
//release
dblp_paper.close();
dblp_coauthor.close();
dblp_paper_out.close();
dblp_coauthor_out.close();
return 0;
}