Showing
1 changed file
with
35 additions
and
19 deletions
... | @@ -11,7 +11,7 @@ | ... | @@ -11,7 +11,7 @@ |
11 | 11 | ||
12 | using namespace std; | 12 | using namespace std; |
13 | 13 | ||
14 | -const char* DBLP_FILENAME = "test.json"; | 14 | +const char* DBLP_FILENAME = "dblp.json"; |
15 | const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json"; | 15 | const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json"; |
16 | const string COLUMN_DELIMITER = "||"; | 16 | const string COLUMN_DELIMITER = "||"; |
17 | const string AUTHOR_DELIMITER = "&&"; | 17 | const string AUTHOR_DELIMITER = "&&"; |
... | @@ -22,6 +22,7 @@ int main() { | ... | @@ -22,6 +22,7 @@ int main() { |
22 | Json::Reader reader; | 22 | Json::Reader reader; |
23 | ifstream dblp_paper, dblp_coauthor; | 23 | ifstream dblp_paper, dblp_coauthor; |
24 | ofstream dblp_paper_out, dblp_coauthor_out; | 24 | ofstream dblp_paper_out, dblp_coauthor_out; |
25 | + boost::regex paper_reg{"(conf|journals).*"}; | ||
25 | 26 | ||
26 | try { | 27 | try { |
27 | //1. dblp paper dataset | 28 | //1. dblp paper dataset |
... | @@ -49,6 +50,7 @@ int main() { | ... | @@ -49,6 +50,7 @@ int main() { |
49 | for (auto it=root.begin(); | 50 | for (auto it=root.begin(); |
50 | it!=root.end(); | 51 | it!=root.end(); |
51 | ++it) { | 52 | ++it) { |
53 | + | ||
52 | //Àüó¸® | 54 | //Àüó¸® |
53 | row.clear(); | 55 | row.clear(); |
54 | coauthors.clear(); | 56 | coauthors.clear(); |
... | @@ -62,21 +64,35 @@ int main() { | ... | @@ -62,21 +64,35 @@ int main() { |
62 | //row ´ÜÀ§·Î read | 64 | //row ´ÜÀ§·Î read |
63 | row = *it; | 65 | row = *it; |
64 | paper_key = row[0].asString(); | 66 | paper_key = row[0].asString(); |
65 | - coauthors = row[1]; | 67 | + |
66 | - for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) { | 68 | + //check whether it is paper |
67 | - coauthor_list.push_back(coit->asString()); | 69 | + if (boost::regex_match(paper_key, paper_reg)) { |
68 | - } | 70 | + coauthors = row[1]; |
69 | - year = ((row[2].isNull())?-1:row[2].asInt()); | 71 | + for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) { |
70 | - | 72 | + coauthor_list.push_back(coit->asString()); |
71 | - //write | ||
72 | - dblp_paper_out << paper_key << COLUMN_DELIMITER; | ||
73 | - for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end(); ++auit) { | ||
74 | - dblp_paper_out << (*auit); | ||
75 | - if () { | ||
76 | - dblp_paper_out << AUTHOR_DELIMITER; | ||
77 | } | 73 | } |
74 | + year = ((row[2].isNull())?-1:row[2].asInt()); | ||
75 | + | ||
76 | + //write | ||
77 | + dblp_paper_out << paper_key << COLUMN_DELIMITER; | ||
78 | + if (coauthor_list.size() > 0) { | ||
79 | + for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end();) { | ||
80 | + dblp_paper_out << (*auit); | ||
81 | + ++auit; | ||
82 | + if (auit != coauthor_list.end()) { | ||
83 | + dblp_paper_out << AUTHOR_DELIMITER; | ||
84 | + } | ||
85 | + } | ||
86 | + } else { | ||
87 | + //empty | ||
88 | + throw exception("paper without author"); | ||
89 | + } | ||
90 | + dblp_paper_out << COLUMN_DELIMITER | ||
91 | + << year | ||
92 | + << endl; | ||
93 | + } else { | ||
94 | + //not paper | ||
78 | } | 95 | } |
79 | - dblp_paper_out << endl; | ||
80 | 96 | ||
81 | //ÈÄó¸® | 97 | //ÈÄó¸® |
82 | ++count; | 98 | ++count; |
... | @@ -87,17 +103,17 @@ int main() { | ... | @@ -87,17 +103,17 @@ int main() { |
87 | //2. dblp coauthorship dataset | 103 | //2. dblp coauthorship dataset |
88 | //dblp_coauthor.open(DBLP_COAUTHOR_FILENAME); | 104 | //dblp_coauthor.open(DBLP_COAUTHOR_FILENAME); |
89 | 105 | ||
90 | - | ||
91 | } | 106 | } |
92 | catch (const exception& e) { | 107 | catch (const exception& e) { |
93 | cerr << "Error: " << e.what() << endl; | 108 | cerr << "Error: " << e.what() << endl; |
94 | return -1; | 109 | return -1; |
95 | } | 110 | } |
96 | 111 | ||
112 | + | ||
97 | //release | 113 | //release |
98 | - dblp_paper.close(); | 114 | + if (dblp_paper) dblp_paper.close(); |
99 | - dblp_coauthor.close(); | 115 | + if (dblp_coauthor) dblp_coauthor.close(); |
100 | - dblp_paper_out.close(); | 116 | + if (dblp_paper_out) dblp_paper_out.close(); |
101 | - dblp_coauthor_out.close(); | 117 | + if (dblp_coauthor_out) dblp_coauthor_out.close(); |
102 | return 0; | 118 | return 0; |
103 | } | 119 | } |
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment