조성현

complete dblp paper v1

...@@ -11,7 +11,7 @@ ...@@ -11,7 +11,7 @@
11 11
12 using namespace std; 12 using namespace std;
13 13
14 -const char* DBLP_FILENAME = "test.json"; 14 +const char* DBLP_FILENAME = "dblp.json";
15 const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json"; 15 const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json";
16 const string COLUMN_DELIMITER = "||"; 16 const string COLUMN_DELIMITER = "||";
17 const string AUTHOR_DELIMITER = "&&"; 17 const string AUTHOR_DELIMITER = "&&";
...@@ -22,6 +22,7 @@ int main() { ...@@ -22,6 +22,7 @@ int main() {
22 Json::Reader reader; 22 Json::Reader reader;
23 ifstream dblp_paper, dblp_coauthor; 23 ifstream dblp_paper, dblp_coauthor;
24 ofstream dblp_paper_out, dblp_coauthor_out; 24 ofstream dblp_paper_out, dblp_coauthor_out;
25 + boost::regex paper_reg{"(conf|journals).*"};
25 26
26 try { 27 try {
27 //1. dblp paper dataset 28 //1. dblp paper dataset
...@@ -49,6 +50,7 @@ int main() { ...@@ -49,6 +50,7 @@ int main() {
49 for (auto it=root.begin(); 50 for (auto it=root.begin();
50 it!=root.end(); 51 it!=root.end();
51 ++it) { 52 ++it) {
53 +
52 //Àüó¸® 54 //Àüó¸®
53 row.clear(); 55 row.clear();
54 coauthors.clear(); 56 coauthors.clear();
...@@ -62,21 +64,35 @@ int main() { ...@@ -62,21 +64,35 @@ int main() {
62 //row ´ÜÀ§·Î read 64 //row ´ÜÀ§·Î read
63 row = *it; 65 row = *it;
64 paper_key = row[0].asString(); 66 paper_key = row[0].asString();
65 - coauthors = row[1]; 67 +
66 - for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) { 68 + //check whether it is paper
67 - coauthor_list.push_back(coit->asString()); 69 + if (boost::regex_match(paper_key, paper_reg)) {
68 - } 70 + coauthors = row[1];
69 - year = ((row[2].isNull())?-1:row[2].asInt()); 71 + for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) {
70 - 72 + coauthor_list.push_back(coit->asString());
71 - //write
72 - dblp_paper_out << paper_key << COLUMN_DELIMITER;
73 - for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end(); ++auit) {
74 - dblp_paper_out << (*auit);
75 - if () {
76 - dblp_paper_out << AUTHOR_DELIMITER;
77 } 73 }
74 + year = ((row[2].isNull())?-1:row[2].asInt());
75 +
76 + //write
77 + dblp_paper_out << paper_key << COLUMN_DELIMITER;
78 + if (coauthor_list.size() > 0) {
79 + for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end();) {
80 + dblp_paper_out << (*auit);
81 + ++auit;
82 + if (auit != coauthor_list.end()) {
83 + dblp_paper_out << AUTHOR_DELIMITER;
84 + }
85 + }
86 + } else {
87 + //empty
88 + throw exception("paper without author");
89 + }
90 + dblp_paper_out << COLUMN_DELIMITER
91 + << year
92 + << endl;
93 + } else {
94 + //not paper
78 } 95 }
79 - dblp_paper_out << endl;
80 96
81 //ÈÄó¸® 97 //ÈÄó¸®
82 ++count; 98 ++count;
...@@ -87,17 +103,17 @@ int main() { ...@@ -87,17 +103,17 @@ int main() {
87 //2. dblp coauthorship dataset 103 //2. dblp coauthorship dataset
88 //dblp_coauthor.open(DBLP_COAUTHOR_FILENAME); 104 //dblp_coauthor.open(DBLP_COAUTHOR_FILENAME);
89 105
90 -
91 } 106 }
92 catch (const exception& e) { 107 catch (const exception& e) {
93 cerr << "Error: " << e.what() << endl; 108 cerr << "Error: " << e.what() << endl;
94 return -1; 109 return -1;
95 } 110 }
96 111
112 +
97 //release 113 //release
98 - dblp_paper.close(); 114 + if (dblp_paper) dblp_paper.close();
99 - dblp_coauthor.close(); 115 + if (dblp_coauthor) dblp_coauthor.close();
100 - dblp_paper_out.close(); 116 + if (dblp_paper_out) dblp_paper_out.close();
101 - dblp_coauthor_out.close(); 117 + if (dblp_coauthor_out) dblp_coauthor_out.close();
102 return 0; 118 return 0;
103 } 119 }
...\ No newline at end of file ...\ No newline at end of file
......