Showing
4 changed files
with
317 additions
and
0 deletions
Parser.sln
0 → 100644
1 | + | ||
2 | +Microsoft Visual Studio Solution File, Format Version 12.00 | ||
3 | +# Visual Studio 14 | ||
4 | +VisualStudioVersion = 14.0.25420.1 | ||
5 | +MinimumVisualStudioVersion = 10.0.40219.1 | ||
6 | +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "dblpJsonParser", "dblpJsonParser\dblpJsonParser.vcxproj", "{D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}" | ||
7 | +EndProject | ||
8 | +Global | ||
9 | + GlobalSection(SolutionConfigurationPlatforms) = preSolution | ||
10 | + Debug|x64 = Debug|x64 | ||
11 | + Debug|x86 = Debug|x86 | ||
12 | + Release|x64 = Release|x64 | ||
13 | + Release|x86 = Release|x86 | ||
14 | + EndGlobalSection | ||
15 | + GlobalSection(ProjectConfigurationPlatforms) = postSolution | ||
16 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Debug|x64.ActiveCfg = Debug|x64 | ||
17 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Debug|x64.Build.0 = Debug|x64 | ||
18 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Debug|x86.ActiveCfg = Debug|Win32 | ||
19 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Debug|x86.Build.0 = Debug|Win32 | ||
20 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Release|x64.ActiveCfg = Release|x64 | ||
21 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Release|x64.Build.0 = Release|x64 | ||
22 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Release|x86.ActiveCfg = Release|Win32 | ||
23 | + {D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}.Release|x86.Build.0 = Release|Win32 | ||
24 | + EndGlobalSection | ||
25 | + GlobalSection(SolutionProperties) = preSolution | ||
26 | + HideSolutionNode = FALSE | ||
27 | + EndGlobalSection | ||
28 | +EndGlobal |
dblpJsonParser/dblpJsonParser.vcxproj
0 → 100644
1 | +<?xml version="1.0" encoding="utf-8"?> | ||
2 | +<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
3 | + <ItemGroup Label="ProjectConfigurations"> | ||
4 | + <ProjectConfiguration Include="Debug|Win32"> | ||
5 | + <Configuration>Debug</Configuration> | ||
6 | + <Platform>Win32</Platform> | ||
7 | + </ProjectConfiguration> | ||
8 | + <ProjectConfiguration Include="Release|Win32"> | ||
9 | + <Configuration>Release</Configuration> | ||
10 | + <Platform>Win32</Platform> | ||
11 | + </ProjectConfiguration> | ||
12 | + <ProjectConfiguration Include="Debug|x64"> | ||
13 | + <Configuration>Debug</Configuration> | ||
14 | + <Platform>x64</Platform> | ||
15 | + </ProjectConfiguration> | ||
16 | + <ProjectConfiguration Include="Release|x64"> | ||
17 | + <Configuration>Release</Configuration> | ||
18 | + <Platform>x64</Platform> | ||
19 | + </ProjectConfiguration> | ||
20 | + </ItemGroup> | ||
21 | + <PropertyGroup Label="Globals"> | ||
22 | + <ProjectGuid>{D0FA588C-B5C0-422F-8CA1-4FD40054BD1C}</ProjectGuid> | ||
23 | + <Keyword>Win32Proj</Keyword> | ||
24 | + <RootNamespace>dblpJsonParser</RootNamespace> | ||
25 | + <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion> | ||
26 | + </PropertyGroup> | ||
27 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> | ||
28 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> | ||
29 | + <ConfigurationType>Application</ConfigurationType> | ||
30 | + <UseDebugLibraries>true</UseDebugLibraries> | ||
31 | + <PlatformToolset>v140</PlatformToolset> | ||
32 | + <CharacterSet>Unicode</CharacterSet> | ||
33 | + </PropertyGroup> | ||
34 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration"> | ||
35 | + <ConfigurationType>Application</ConfigurationType> | ||
36 | + <UseDebugLibraries>false</UseDebugLibraries> | ||
37 | + <PlatformToolset>v140</PlatformToolset> | ||
38 | + <WholeProgramOptimization>true</WholeProgramOptimization> | ||
39 | + <CharacterSet>Unicode</CharacterSet> | ||
40 | + </PropertyGroup> | ||
41 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration"> | ||
42 | + <ConfigurationType>Application</ConfigurationType> | ||
43 | + <UseDebugLibraries>true</UseDebugLibraries> | ||
44 | + <PlatformToolset>v140</PlatformToolset> | ||
45 | + <CharacterSet>Unicode</CharacterSet> | ||
46 | + </PropertyGroup> | ||
47 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration"> | ||
48 | + <ConfigurationType>Application</ConfigurationType> | ||
49 | + <UseDebugLibraries>false</UseDebugLibraries> | ||
50 | + <PlatformToolset>v140</PlatformToolset> | ||
51 | + <WholeProgramOptimization>true</WholeProgramOptimization> | ||
52 | + <CharacterSet>Unicode</CharacterSet> | ||
53 | + </PropertyGroup> | ||
54 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" /> | ||
55 | + <ImportGroup Label="ExtensionSettings"> | ||
56 | + </ImportGroup> | ||
57 | + <ImportGroup Label="Shared"> | ||
58 | + </ImportGroup> | ||
59 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
60 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
61 | + </ImportGroup> | ||
62 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
63 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
64 | + </ImportGroup> | ||
65 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
66 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
67 | + </ImportGroup> | ||
68 | + <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
69 | + <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" /> | ||
70 | + </ImportGroup> | ||
71 | + <PropertyGroup Label="UserMacros" /> | ||
72 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
73 | + <LinkIncremental>true</LinkIncremental> | ||
74 | + <IncludePath>C:\boost\boost_1_62_0;C:\JsonCpp\jsoncpp-master\include;$(IncludePath)</IncludePath> | ||
75 | + <LibraryPath>C:\boost\boost_1_62_0\stage\lib;C:\JsonCpp\jsoncpp-master\build\vs71\debug\lib_json;$(LibraryPath)</LibraryPath> | ||
76 | + </PropertyGroup> | ||
77 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
78 | + <LinkIncremental>true</LinkIncremental> | ||
79 | + <IncludePath>C:\boost\boost_1_62_0;C:\JsonCpp\jsoncpp-master\include;$(IncludePath)</IncludePath> | ||
80 | + <LibraryPath>C:\boost\boost_1_62_0\stage\lib;C:\JsonCpp\jsoncpp-master\build\vs71\debug\lib_json;$(LibraryPath)</LibraryPath> | ||
81 | + </PropertyGroup> | ||
82 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
83 | + <LinkIncremental>false</LinkIncremental> | ||
84 | + </PropertyGroup> | ||
85 | + <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
86 | + <LinkIncremental>false</LinkIncremental> | ||
87 | + </PropertyGroup> | ||
88 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> | ||
89 | + <ClCompile> | ||
90 | + <PrecompiledHeader> | ||
91 | + </PrecompiledHeader> | ||
92 | + <WarningLevel>Level3</WarningLevel> | ||
93 | + <Optimization>Disabled</Optimization> | ||
94 | + <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
95 | + <SDLCheck>true</SDLCheck> | ||
96 | + <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
97 | + </ClCompile> | ||
98 | + <Link> | ||
99 | + <SubSystem>Console</SubSystem> | ||
100 | + <GenerateDebugInformation>true</GenerateDebugInformation> | ||
101 | + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | ||
102 | + <AdditionalDependencies>json_vc71_libmtd.lib;%(AdditionalDependencies)</AdditionalDependencies> | ||
103 | + </Link> | ||
104 | + </ItemDefinitionGroup> | ||
105 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> | ||
106 | + <ClCompile> | ||
107 | + <PrecompiledHeader> | ||
108 | + </PrecompiledHeader> | ||
109 | + <WarningLevel>Level3</WarningLevel> | ||
110 | + <Optimization>Disabled</Optimization> | ||
111 | + <PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
112 | + <SDLCheck>true</SDLCheck> | ||
113 | + <AdditionalIncludeDirectories>%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories> | ||
114 | + </ClCompile> | ||
115 | + <Link> | ||
116 | + <SubSystem>Console</SubSystem> | ||
117 | + <GenerateDebugInformation>true</GenerateDebugInformation> | ||
118 | + <AdditionalLibraryDirectories>%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories> | ||
119 | + <AdditionalDependencies>json_vc71_libmtd.lib;%(AdditionalDependencies)</AdditionalDependencies> | ||
120 | + </Link> | ||
121 | + </ItemDefinitionGroup> | ||
122 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> | ||
123 | + <ClCompile> | ||
124 | + <WarningLevel>Level3</WarningLevel> | ||
125 | + <PrecompiledHeader> | ||
126 | + </PrecompiledHeader> | ||
127 | + <Optimization>MaxSpeed</Optimization> | ||
128 | + <FunctionLevelLinking>true</FunctionLevelLinking> | ||
129 | + <IntrinsicFunctions>true</IntrinsicFunctions> | ||
130 | + <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
131 | + <SDLCheck>true</SDLCheck> | ||
132 | + </ClCompile> | ||
133 | + <Link> | ||
134 | + <SubSystem>Console</SubSystem> | ||
135 | + <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
136 | + <OptimizeReferences>true</OptimizeReferences> | ||
137 | + <GenerateDebugInformation>true</GenerateDebugInformation> | ||
138 | + </Link> | ||
139 | + </ItemDefinitionGroup> | ||
140 | + <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> | ||
141 | + <ClCompile> | ||
142 | + <WarningLevel>Level3</WarningLevel> | ||
143 | + <PrecompiledHeader> | ||
144 | + </PrecompiledHeader> | ||
145 | + <Optimization>MaxSpeed</Optimization> | ||
146 | + <FunctionLevelLinking>true</FunctionLevelLinking> | ||
147 | + <IntrinsicFunctions>true</IntrinsicFunctions> | ||
148 | + <PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | ||
149 | + <SDLCheck>true</SDLCheck> | ||
150 | + </ClCompile> | ||
151 | + <Link> | ||
152 | + <SubSystem>Console</SubSystem> | ||
153 | + <EnableCOMDATFolding>true</EnableCOMDATFolding> | ||
154 | + <OptimizeReferences>true</OptimizeReferences> | ||
155 | + <GenerateDebugInformation>true</GenerateDebugInformation> | ||
156 | + </Link> | ||
157 | + </ItemDefinitionGroup> | ||
158 | + <ItemGroup> | ||
159 | + <ClCompile Include="main.cpp" /> | ||
160 | + </ItemGroup> | ||
161 | + <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> | ||
162 | + <ImportGroup Label="ExtensionTargets"> | ||
163 | + </ImportGroup> | ||
164 | +</Project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
1 | +<?xml version="1.0" encoding="utf-8"?> | ||
2 | +<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> | ||
3 | + <ItemGroup> | ||
4 | + <Filter Include="소스 파일"> | ||
5 | + <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier> | ||
6 | + <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions> | ||
7 | + </Filter> | ||
8 | + <Filter Include="헤더 파일"> | ||
9 | + <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier> | ||
10 | + <Extensions>h;hh;hpp;hxx;hm;inl;inc;xsd</Extensions> | ||
11 | + </Filter> | ||
12 | + <Filter Include="리소스 파일"> | ||
13 | + <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier> | ||
14 | + <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions> | ||
15 | + </Filter> | ||
16 | + </ItemGroup> | ||
17 | + <ItemGroup> | ||
18 | + <ClCompile Include="main.cpp"> | ||
19 | + <Filter>소스 파일</Filter> | ||
20 | + </ClCompile> | ||
21 | + </ItemGroup> | ||
22 | +</Project> | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
dblpJsonParser/main.cpp
0 → 100644
1 | +#include <cstdio> | ||
2 | +#include <exception> | ||
3 | +#include <fstream> | ||
4 | +#include <iostream> | ||
5 | +#include <string> | ||
6 | +#include <vector> | ||
7 | + | ||
8 | +#include <boost/regex.hpp> | ||
9 | + | ||
10 | +#include <json/json.h> | ||
11 | + | ||
12 | +using namespace std; | ||
13 | + | ||
14 | +const char* DBLP_FILENAME = "test.json"; | ||
15 | +const char* DBLP_COAUTHOR_FILENAME = "tmp_dblp_coauthorship.json"; | ||
16 | +const string COLUMN_DELIMITER = "||"; | ||
17 | +const string AUTHOR_DELIMITER = "&&"; | ||
18 | + | ||
19 | +int main() { | ||
20 | + //init | ||
21 | + Json::Value root; | ||
22 | + Json::Reader reader; | ||
23 | + ifstream dblp_paper, dblp_coauthor; | ||
24 | + ofstream dblp_paper_out, dblp_coauthor_out; | ||
25 | + | ||
26 | + try { | ||
27 | + //1. dblp paper dataset | ||
28 | + dblp_paper.open(DBLP_FILENAME); | ||
29 | + dblp_paper_out.open((string(DBLP_FILENAME)+string(".out")).c_str()); | ||
30 | + if (!dblp_paper || !dblp_paper_out) { | ||
31 | + throw exception("dblp paper file error"); | ||
32 | + } | ||
33 | + printf("* complete open\n"); | ||
34 | + | ||
35 | + if (!reader.parse(dblp_paper, root)) { | ||
36 | + throw exception("parse error"); | ||
37 | + } | ||
38 | + printf("* complete parse\n"); | ||
39 | + | ||
40 | + | ||
41 | + Json::Value row; | ||
42 | + Json::Value coauthors; | ||
43 | + | ||
44 | + std::string paper_key; | ||
45 | + std::vector<string> coauthor_list; | ||
46 | + int year; | ||
47 | + | ||
48 | + int count = 1; | ||
49 | + for (auto it=root.begin(); | ||
50 | + it!=root.end(); | ||
51 | + ++it) { | ||
52 | + //Àüó¸® | ||
53 | + row.clear(); | ||
54 | + coauthors.clear(); | ||
55 | + coauthor_list.clear(); | ||
56 | + | ||
57 | |||
58 | + if (count%1000000 == 0) { | ||
59 | + printf("* [%d]\n", count); | ||
60 | + } | ||
61 | + | ||
62 | + //row ´ÜÀ§·Î read | ||
63 | + row = *it; | ||
64 | + paper_key = row[0].asString(); | ||
65 | + coauthors = row[1]; | ||
66 | + for (auto coit=coauthors.begin(); coit!=coauthors.end(); ++coit) { | ||
67 | + coauthor_list.push_back(coit->asString()); | ||
68 | + } | ||
69 | + year = ((row[2].isNull())?-1:row[2].asInt()); | ||
70 | + | ||
71 | + //write | ||
72 | + dblp_paper_out << paper_key << COLUMN_DELIMITER; | ||
73 | + for (auto auit=coauthor_list.begin(); auit!=coauthor_list.end(); ++auit) { | ||
74 | + dblp_paper_out << (*auit); | ||
75 | + if () { | ||
76 | + dblp_paper_out << AUTHOR_DELIMITER; | ||
77 | + } | ||
78 | + } | ||
79 | + dblp_paper_out << endl; | ||
80 | + | ||
81 | + //ÈÄó¸® | ||
82 | + ++count; | ||
83 | + } | ||
84 | + printf("* complete convert dblp paper\n"); | ||
85 | + | ||
86 | + | ||
87 | + //2. dblp coauthorship dataset | ||
88 | + //dblp_coauthor.open(DBLP_COAUTHOR_FILENAME); | ||
89 | + | ||
90 | + | ||
91 | + } | ||
92 | + catch (const exception& e) { | ||
93 | + cerr << "Error: " << e.what() << endl; | ||
94 | + return -1; | ||
95 | + } | ||
96 | + | ||
97 | + //release | ||
98 | + dblp_paper.close(); | ||
99 | + dblp_coauthor.close(); | ||
100 | + dblp_paper_out.close(); | ||
101 | + dblp_coauthor_out.close(); | ||
102 | + return 0; | ||
103 | +} | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
-
Please register or login to post a comment