sourceCode -checkpoint.ipynb
27.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
{
"cells": [
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [],
"source": [
"#-*- coding:utf-8 -*-\n",
"#CP949\n",
"\n",
"path = '/Users/yangyoonji/Documents/dataCapstone/data/source_code/test_plot/난설.txt'\n",
"file = open(path,'r',encoding='UTF-8')\n",
"\n",
"All = file.read()\n",
"file.close()\n",
"lines = All.split('\\n')\n",
"\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"print(lines[1])"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 26/26 [00:00<00:00, 162.62it/s]\n"
]
}
],
"source": [
"from konlpy.tag import Komoran\n",
"from tqdm import tqdm\n",
"allMorphs = []\n",
"tokens = []\n",
"spliter = Komoran()\n",
"for i in tqdm(lines):\n",
" allMorphs.extend(spliter.pos(i))\n",
" tokens.extend(spliter.phrases(i))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('조선시대', 'Noun'), ('최고', 'Noun'), ('의', 'Josa'), ('여류', 'Noun'), ('시인', 'Noun'), ('으로', 'Josa'), ('당시', 'Noun'), ('명나라', 'Noun'), ('의', 'Josa'), ('사신', 'Noun'), ('‘', 'Foreign'), ('주', 'Modifier'), ('지번', 'Noun'), ('’', 'Punctuation'), ('에게서', 'Josa'), ('“', 'Foreign'), ('난', 'Noun'), ('설헌', 'Noun'), ('의', 'Josa'), ('시', 'Noun'), ('는', 'Josa'), ('속', 'Noun'), ('된', 'Verb'), ('세상', 'Noun'), ('바깥', 'Noun'), ('에', 'Josa'), ('있는', 'Adjective'), ('것', 'Noun'), ('같다', 'Adjective'), ('.', 'Punctuation'), ('그', 'Noun'), ('시구', 'Noun'), ('는', 'Josa'), ('모두', 'Noun'), ('주옥', 'Noun'), ('같다', 'Adjective'), ('.', 'Punctuation'), ('”', 'Foreign'), ('라는', 'Josa'), ('극찬', 'Noun'), ('을', 'Josa'), ('받고', 'Verb'), (',', 'Punctuation'), ('일본', 'Noun'), ('에까지', 'Josa'), ('그', 'Noun'), ('명성', 'Noun'), ('을', 'Josa'), ('떨쳤던', 'Verb'), ('‘', 'Foreign'), ('허난설헌', 'Noun'), ('(', 'Punctuation'), ('許蘭雪軒', 'Foreign'), ('/', 'Punctuation'), ('본명', 'Noun'), ('_', 'Punctuation'), ('허초희', 'Noun'), ('(', 'Punctuation'), ('許楚姬', 'Foreign'), (')', 'Punctuation'), ('/', 'Punctuation'), ('1563', 'Number'), ('~', 'Punctuation'), ('1589', 'Number'), (')’', 'Punctuation'), ('의', 'Noun'), ('시', 'Noun'), ('(', 'Punctuation'), ('詩', 'Foreign'), (')', 'Punctuation'), ('가', 'Verb'), ('뮤지컬', 'Noun'), ('로', 'Josa'), ('다시', 'Noun'), ('태어난다', 'Verb'), ('.', 'Punctuation'), ('공연', 'Noun'), ('제작사', 'Noun'), ('㈜', 'Foreign'), ('콘텐츠', 'Noun'), ('플래닝', 'Noun'), ('(', 'Punctuation'), ('대표', 'Noun'), ('_', 'Punctuation'), ('노재환', 'Noun'), (')', 'Punctuation'), ('은', 'Noun'), ('오는', 'Verb'), ('7월', 'Number'), ('대학로', 'Noun'), ('콘텐츠', 'Noun'), ('그', 'Determiner'), ('라운드', 'Noun'), ('극장', 'Noun'), ('에서', 'Josa'), ('창작', 'Noun'), ('뮤지컬', 'Noun'), ('<', 'Punctuation'), ('난설', 'Noun'), ('(', 'Punctuation'), ('극작', 'Noun'), ('_', 'Punctuation'), ('옥', 'Noun'), ('경선', 'Noun'), (',', 'Punctuation'), ('작곡', 'Noun'), ('_', 'Punctuation'), ('다미', 'Noun'), ('로', 'Josa'), (',', 'Punctuation'), ('연출', 'Noun'), ('_', 'Punctuation'), ('이', 'Determiner'), ('기쁨', 'Noun'), (')>', 'Punctuation'), ('의', 'Noun'), ('초연', 'Noun'), ('을', 'Josa'), ('개막', 'Noun'), ('한다고', 'Verb'), ('밝혔다', 'Verb'), ('.', 'Punctuation'), ('뮤지컬', 'Noun'), ('<', 'Punctuation'), ('난설', 'Noun'), ('>', 'Punctuation'), ('은', 'Noun'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('남동생', 'Noun'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('이', 'Noun'), ('역모', 'Noun'), ('죄', 'Noun'), ('로', 'Josa'), ('처형', 'Noun'), ('되기', 'Verb'), ('전날', 'Noun'), ('밤', 'Noun'), ('에', 'Josa'), ('떠올리는', 'Verb'), ('그리웠던', 'Adjective'), ('기억', 'Noun'), ('으로부터', 'Josa'), ('시작', 'Noun'), ('된다', 'Verb'), ('.', 'Punctuation'), ('8', 'Number'), ('세', 'Noun'), ('때', 'Noun'), ('부터', 'Josa'), ('시를', 'Adjective'), ('짓기', 'Verb'), ('시작', 'Noun'), ('해', 'Verb'), ('조선', 'Noun'), ('최고', 'Noun'), ('의', 'Josa'), ('천', 'Modifier'), ('재시', 'Noun'), ('인', 'Suffix'), ('으로', 'Josa'), ('남아있는', 'Verb'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('(', 'Punctuation'), ('허난설헌', 'Noun'), (')’', 'Punctuation'), ('와', 'Verb'), ('그녀', 'Noun'), ('의', 'Josa'), ('시를', 'Adjective'), ('사랑', 'Noun'), ('하는', 'Verb'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('남동생', 'Noun'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’,', 'Punctuation'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('와', 'Verb'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('스승', 'Noun'), ('인', 'Josa'), ('‘', 'Foreign'), ('이', 'Determiner'), ('달', 'Noun'), ('’', 'Punctuation'), ('은', 'Noun'), ('각자', 'Noun'), ('의', 'Josa'), ('삶', 'Noun'), ('의', 'Josa'), ('소용돌이', 'Noun'), ('속', 'Noun'), ('에서', 'Josa'), ('희망', 'Noun'), ('을', 'Josa'), ('이야기', 'Noun'), ('하고', 'Josa'), ('세상', 'Noun'), ('을', 'Josa'), ('바라보는', 'Verb'), ('상반', 'Noun'), ('된', 'Verb'), ('시선', 'Noun'), ('으로', 'Josa'), ('때로는', 'Noun'), ('싸우기도', 'Verb'), ('하며', 'Verb'), ('문장가', 'Noun'), ('들', 'Suffix'), ('로서', 'Noun'), ('의', 'Josa'), ('우정', 'Noun'), ('을', 'Josa'), ('쌓는다', 'Verb'), ('.', 'Punctuation'), ('이', 'Noun'), ('뮤지컬', 'Noun'), ('은', 'Josa'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('가', 'Verb'), ('인생', 'Noun'), ('을', 'Josa'), ('돌아보는', 'Verb'), ('방식', 'Noun'), ('또는', 'Adverb'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('일생', 'Noun'), ('을', 'Josa'), ('재', 'Noun'), ('조명하', 'Noun'), ('는', 'Josa'), ('방식', 'Noun'), ('을', 'Josa'), ('따르지', 'Verb'), ('않는다', 'Verb'), ('.', 'Punctuation'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('이', 'Noun'), ('가까이', 'Noun'), ('에서', 'Josa'), ('바라봤기', 'Verb'), ('때문', 'Noun'), ('에', 'Josa'), ('이해', 'Noun'), ('할', 'Verb'), ('수', 'Noun'), ('있는', 'Adjective'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('시', 'Noun'), (',', 'Punctuation'), ('스승', 'Noun'), ('‘', 'Foreign'), ('이', 'Determiner'), ('달', 'Noun'), ('’', 'Punctuation'), ('과의', 'Josa'), ('대화', 'Noun'), ('를', 'Josa'), ('통해', 'Noun'), ('구축', 'Noun'), ('된', 'Verb'), ('세계관', 'Noun'), ('을', 'Josa'), ('통한', 'Noun'), ('허초희', 'Noun'), ('의', 'Josa'), ('시', 'Noun'), ('의', 'Josa'), ('세계', 'Noun'), ('등', 'Noun'), ('을', 'Josa'), ('허균', 'Noun'), ('’', 'Punctuation'), ('과', 'Noun'), ('‘', 'Foreign'), ('이', 'Determiner'), ('달', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('관점', 'Noun'), ('의', 'Josa'), ('대립', 'Noun'), ('으로', 'Josa'), ('표현', 'Noun'), ('해', 'Noun'), ('낸', 'Verb'), ('것', 'Noun'), ('이', 'Josa'), ('이번', 'Noun'), ('작품', 'Noun'), ('의', 'Josa'), ('특징', 'Noun'), ('이다', 'Josa'), ('.', 'Punctuation'), ('작품', 'Noun'), ('을', 'Josa'), ('집필', 'Noun'), ('하기', 'Verb'), ('전', 'Noun'), ('수', 'Modifier'), ('개', 'Noun'), ('월간', 'Noun'), ('『', 'Foreign'), ('허난설헌', 'Noun'), ('집', 'Noun'), ('(', 'Punctuation'), ('許蘭雪軒集', 'Foreign'), (')', 'Punctuation'), ('』', 'Foreign'), ('을', 'Josa'), ('연구', 'Noun'), ('한', 'Josa'), ('작가', 'Noun'), ('‘', 'Foreign'), ('옥', 'Noun'), ('경선', 'Noun'), ('’', 'Punctuation'), ('은', 'Noun'), ('‘‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('방', 'Noun'), ('안', 'Noun'), ('을', 'Josa'), ('가득', 'Noun'), ('채웠던', 'Verb'), ('시들이', 'Verb'), ('결국', 'Adverb'), ('그녀', 'Noun'), ('본인', 'Noun'), ('의', 'Josa'), ('의지', 'Noun'), ('에', 'Josa'), ('의해', 'Adjective'), ('소실', 'Noun'), ('되었지만', 'Verb'), (',', 'Punctuation'), ('잊혀', 'Verb'), ('지기', 'Noun'), ('를', 'Josa'), ('바랐던', 'Verb'), ('그녀', 'Noun'), ('의', 'Josa'), ('의지', 'Noun'), ('를', 'Josa'), ('누구', 'Noun'), ('보다도', 'Josa'), ('잘', 'Verb'), ('알', 'Noun'), ('고', 'Josa'), ('있었을', 'Adjective'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('에', 'Josa'), ('의해', 'Adjective'), ('『', 'Foreign'), ('허난설헌', 'Noun'), ('집', 'Noun'), ('』', 'Foreign'), ('이라는', 'Josa'), ('시집', 'Noun'), ('으로', 'Josa'), ('만들어져', 'Verb'), ('세상', 'Noun'), ('의', 'Josa'), ('극찬', 'Noun'), ('을', 'Josa'), ('받게', 'Verb'), ('한', 'Verb'), ('에너지', 'Noun'), ('의', 'Josa'), ('근원', 'Noun'), ('이', 'Josa'), ('무엇', 'Noun'), ('이었을까', 'Verb'), ('’', 'Punctuation'), ('에', 'Josa'), ('대한', 'Noun'), ('고민', 'Noun'), ('을', 'Josa'), ('거듭', 'Noun'), ('했다', 'Verb'), ('.', 'Punctuation'), ('그', 'Noun'), ('에', 'Josa'), ('따라', 'Verb'), ('아름다운', 'Adjective'), ('시', 'Noun'), ('(', 'Punctuation'), ('詩', 'Foreign'), (')', 'Punctuation'), ('구절', 'Noun'), ('들', 'Suffix'), ('속', 'Noun'), ('에서', 'Josa'), ('넘치는', 'Adjective'), ('기개', 'Noun'), ('와', 'Josa'), ('힘', 'Noun'), ('은', 'Josa'), ('그녀', 'Noun'), ('의', 'Josa'), ('동생', 'Noun'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('과', 'Noun'), ('스승', 'Noun'), ('인', 'Josa'), ('‘', 'Foreign'), ('이', 'Determiner'), ('달', 'Noun'), ('’', 'Punctuation'), ('개개인', 'Noun'), ('을', 'Josa'), ('넘어', 'Verb'), ('동시', 'Noun'), ('대', 'Suffix'), ('또는', 'Adverb'), ('후세', 'Noun'), ('의', 'Josa'), ('사람', 'Noun'), ('들', 'Suffix'), ('의', 'Josa'), ('마음', 'Noun'), ('에도', 'Josa'), ('큰', 'Verb'), ('울림', 'Noun'), ('을', 'Josa'), ('줄', 'Noun'), ('수', 'Noun'), ('있을', 'Adjective'), ('것', 'Noun'), ('이라는', 'Josa'), ('결론', 'Noun'), ('이', 'Josa'), ('이', 'Noun'), ('작품', 'Noun'), ('을', 'Josa'), ('탄생', 'Noun'), ('하게', 'Verb'), ('했다', 'Verb'), ('.', 'Punctuation'), ('실제', 'Noun'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('글', 'Noun'), ('들', 'Suffix'), ('에', 'Josa'), ('큰', 'Verb'), ('감명', 'Noun'), ('을', 'Josa'), ('받은', 'Verb'), ('작가', 'Noun'), ('는', 'Josa'), ('5', 'Number'), ('편의', 'Noun'), ('시', 'Noun'), ('(', 'Punctuation'), ('견흥', 'Noun'), ('(', 'Punctuation'), ('遣興', 'Foreign'), ('),', 'Punctuation'), ('상봉', 'Noun'), ('행', 'Noun'), ('(', 'Punctuation'), ('相逢行', 'Foreign'), ('),', 'Punctuation'), ('가객', 'Noun'), ('사', 'Noun'), ('(', 'Punctuation'), ('賈客詞', 'Foreign'), ('),', 'Punctuation'), ('죽지사', 'Verb'), ('(', 'Punctuation'), ('竹枝詞', 'Foreign'), ('),', 'Punctuation'), ('유선', 'Noun'), ('사', 'Noun'), ('(', 'Punctuation'), ('遊仙詞', 'Foreign'), ('))', 'Punctuation'), ('와', 'Verb'), ('허난설헌', 'Noun'), ('집의', 'Noun'), ('유일한', 'Adjective'), ('산문', 'Noun'), ('(', 'Punctuation'), ('광', 'Noun'), ('한', 'Determiner'), ('전', 'Modifier'), ('백옥루', 'Noun'), ('상', 'Suffix'), ('량문', 'Noun'), ('(', 'Punctuation'), ('廣寒殿白玉樓上樑文', 'Foreign'), ('))', 'Punctuation'), ('을', 'Josa'), ('노랫말', 'Noun'), ('에', 'Josa'), ('활용', 'Noun'), ('하기도', 'Verb'), ('했다', 'Verb'), ('.', 'Punctuation'), ('여기', 'Noun'), ('에', 'Josa'), ('작곡가', 'Noun'), ('다', 'Adverb'), ('미로', 'Noun'), ('의', 'Josa'), ('아름다운', 'Adjective'), ('선율', 'Noun'), ('이', 'Josa'), ('더해져', 'Adjective'), ('음악', 'Noun'), ('이', 'Josa'), ('완성', 'Noun'), ('되었으며', 'Verb'), ('최근', 'Noun'), ('다양한', 'Adjective'), ('작업', 'Noun'), ('으로', 'Josa'), ('주목', 'Noun'), ('받으며', 'Verb'), ('2019년', 'Number'), ('부활', 'Noun'), ('한', 'Josa'), ('백상예술대상', 'Noun'), ('의', 'Josa'), ('연극', 'Noun'), ('부문', 'Noun'), ('시상', 'Noun'), ('인', 'Josa'), ('젊은', 'Adjective'), ('연극인', 'Noun'), ('상', 'Suffix'), ('에', 'Josa'), ('노미네이트', 'Noun'), ('되었던', 'Verb'), ('연출가', 'Noun'), ('이', 'Determiner'), ('기쁨', 'Noun'), ('이', 'Josa'), ('합세', 'Noun'), ('를', 'Josa'), ('하여', 'Verb'), ('센세이션', 'Noun'), ('을', 'Josa'), ('일으킬', 'Verb'), ('새로운', 'Adjective'), ('작품', 'Noun'), ('의', 'Josa'), ('탄생', 'Noun'), ('을', 'Josa'), ('예고', 'Noun'), ('한', 'Josa'), ('다', 'Adverb'), ('.', 'Punctuation'), ('\\xa0', 'Foreign'), ('정제', 'Noun'), ('된', 'Verb'), ('문장', 'Noun'), ('을', 'Josa'), ('쓰지만', 'Verb'), ('자신', 'Noun'), ('의', 'Josa'), ('감정', 'Noun'), ('을', 'Josa'), ('숨기', 'Noun'), ('지', 'Josa'), ('않고', 'Verb'), ('느끼는', 'Verb'), ('것', 'Noun'), ('을', 'Josa'), ('그대로', 'Noun'), ('표현', 'Noun'), ('하는', 'Verb'), ('맑은', 'Noun'), ('사람', 'Noun'), ('으로', 'Josa'), (',', 'Punctuation'), ('자신', 'Noun'), ('을', 'Josa'), ('향', 'Noun'), ('해', 'Verb'), ('굳게', 'Adjective'), ('닫혀', 'Verb'), ('있는', 'Adjective'), ('세상', 'Noun'), ('의', 'Josa'), ('문', 'Noun'), ('을', 'Josa'), ('오직', 'Noun'), ('가진', 'Verb'), ('붓', 'Noun'), ('하나로', 'Noun'), ('열', 'Modifier'), ('고자', 'Noun'), ('한', 'Verb'), ('천재', 'Noun'), ('시인', 'Noun'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('역은', 'Adjective'), ('뮤지컬', 'Noun'), ('배우', 'Noun'), ('‘', 'Foreign'), ('정인지', 'Noun'), ('’', 'Punctuation'), ('와', 'Verb'), ('‘', 'Foreign'), ('하현', 'Noun'), ('지', 'Josa'), ('’', 'Punctuation'), ('가', 'Verb'), ('맡았다', 'Verb'), ('.', 'Punctuation'), ('배우', 'Noun'), ('‘', 'Foreign'), ('유현', 'Noun'), ('석', 'Noun'), ('’', 'Punctuation'), ('과', 'Noun'), ('‘', 'Foreign'), ('백', 'Modifier'), ('기범', 'Noun'), ('’', 'Punctuation'), ('은', 'Noun'), ('누', 'Noun'), ('이인', 'Noun'), ('‘', 'Foreign'), ('허초희', 'Noun'), ('’', 'Punctuation'), ('의', 'Noun'), ('재능', 'Noun'), ('과', 'Josa'), ('시를', 'Adjective'), ('사랑', 'Noun'), ('하고', 'Josa'), ('그녀', 'Noun'), ('의', 'Josa'), ('시를', 'Adjective'), ('통해', 'Noun'), ('세상', 'Noun'), ('을', 'Josa'), ('바라보고', 'Verb'), ('타인', 'Noun'), ('들', 'Suffix'), ('에게도', 'Josa'), ('그녀', 'Noun'), ('의', 'Josa'), ('시를', 'Adjective'), ('전', 'Noun'), ('하기', 'Verb'), ('위해', 'Noun'), ('애쓰는', 'Verb'), ('‘', 'Foreign'), ('허균', 'Noun'), ('’', 'Punctuation'), ('역', 'Noun'), ('을', 'Josa'), ('맡았다', 'Verb'), ('.', 'Punctuation'), ('술', 'Noun'), ('과', 'Josa'), ('풍류', 'Noun'), ('를', 'Josa'), ('사랑', 'Noun'), ('하는', 'Verb'), ('한량', 'Noun'), ('이지만', 'Josa'), ('초희', 'Noun'), ('의', 'Josa'), ('재능', 'Noun'), ('을', 'Josa'), ('한눈', 'Noun'), ('에', 'Josa'), ('알아보고', 'Verb'), ('사랑', 'Noun'), ('으로', 'Josa'), ('보듬는', 'Verb'), ('스승', 'Noun'), ('‘', 'Foreign'), ('이', 'Determiner'), ('달', 'Noun'), ('‘', 'Foreign'), ('역은', 'Adjective'), ('뮤지컬', 'Noun'), ('배우', 'Noun'), ('‘', 'Foreign'), ('안재영', 'Noun'), ('’', 'Punctuation'), ('과', 'Noun'), ('‘', 'Foreign'), ('유승현', 'Noun'), ('’', 'Punctuation'), ('이', 'Noun'), ('연기', 'Noun'), ('한', 'Josa'), ('다', 'Adverb'), ('.', 'Punctuation'), ('“', 'Foreign'), ('이', 'Noun'), ('세상', 'Noun'), ('이', 'Josa'), ('이', 'Noun'), ('세상', 'Noun'), ('의', 'Josa'), ('낮', 'Noun'), ('들', 'Suffix'), ('이', 'Josa'), ('내', 'Noun'), ('것', 'Noun'), ('이', 'Josa'), (',', 'Punctuation'), ('우리', 'Noun'), ('의', 'Josa'), ('것', 'Noun'), ('이', 'Josa'), ('아니었으니', 'Adjective'), ('우리', 'Noun'), ('가', 'Josa'), ('가진', 'Verb'), ('유일한', 'Adjective'), ('검고', 'Adjective'), ('검은', 'Adjective'), ('붓', 'Noun'), ('으로', 'Josa'), ('낮', 'Noun'), ('을', 'Josa'), ('그렸다', 'Verb'), ('.', 'Punctuation'), ('”', 'Foreign'), ('광해군', 'Noun'), ('10년', 'Number'), (',', 'Punctuation'), ('인정전', 'Noun'), ('도성', 'Noun'), ('내', 'Noun'), ('에', 'Josa'), ('흉', 'Noun'), ('서', 'Josa'), ('를', 'Noun'), ('붙여', 'Verb'), ('백성', 'Noun'), ('들', 'Suffix'), ('을', 'Josa'), ('선동', 'Noun'), ('하고', 'Josa'), ('역도', 'Noun'), ('들', 'Suffix'), ('의', 'Josa'), ('무리', 'Noun'), ('와', 'Josa'), ('역모', 'Noun'), ('를', 'Josa'), ('도모', 'Noun'), ('하였다는', 'Verb'), ('죄', 'Noun'), ('로', 'Josa'), ('끌려', 'Verb'), ('온', 'Noun'), ('허균', 'Noun'), ('이', 'Josa'), ('추국', 'Noun'), ('을', 'Josa'), ('받는다', 'Verb'), ('.', 'Punctuation'), ('모진', 'Noun'), ('고문', 'Noun'), ('에도', 'Josa'), ('죄', 'Noun'), ('를', 'Josa'), ('인정', 'Noun'), ('하지', 'Verb'), ('않는', 'Verb'), ('허균', 'Noun'), (',', 'Punctuation'), ('오히려', 'Noun'), ('자신', 'Noun'), ('을', 'Josa'), ('모함', 'Noun'), ('한', 'Josa'), ('무리', 'Noun'), ('들', 'Suffix'), ('을', 'Josa'), ('향', 'Noun'), ('해', 'Verb'), ('역적', 'Noun'), ('이라', 'Josa'), ('꾸짖는다', 'Verb'), ('.', 'Punctuation'), ('그러나', 'Conjunction'), (',', 'Punctuation'), ('함께', 'Adverb'), ('끌려', 'Verb'), ('온', 'Noun'), ('이', 'Noun'), ('들', 'Suffix'), ('이', 'Josa'), ('고문', 'Noun'), ('끝', 'Noun'), ('에', 'Josa'), ('거짓', 'Noun'), ('을', 'Josa'), ('자복', 'Noun'), ('하고', 'Josa'), ('허균', 'Noun'), ('을', 'Josa'), ('그', 'Noun'), ('들', 'Suffix'), ('의', 'Josa'), ('우두머리', 'Noun'), ('로', 'Josa'), ('지목', 'Noun'), ('한다', 'Verb'), ('.', 'Punctuation'), ('처형', 'Noun'), ('이', 'Josa'), ('있기', 'Adjective'), ('전날', 'Noun'), ('밤', 'Noun'), (',', 'Punctuation'), ('고문', 'Noun'), ('으로', 'Josa'), ('정신', 'Noun'), ('이', 'Josa'), ('흐릿', 'Noun'), ('해진', 'Verb'), ('허균', 'Noun'), ('에게', 'Josa'), ('누', 'Noun'), ('이', 'Suffix'), (',', 'Punctuation'), ('허초희', 'Noun'), ('와', 'Josa'), ('자신', 'Noun'), ('에게', 'Josa'), ('시를', 'Adjective'), ('가르쳐', 'Verb'), ('준', 'Noun'), ('스승', 'Noun'), (',', 'Punctuation'), ('이', 'Determiner'), ('달이', 'Noun'), ('찾아온다', 'Verb'), ('.', 'Punctuation'), ('허균', 'Noun'), ('은', 'Josa'), ('이', 'Determiner'), ('달', 'Noun'), ('을', 'Josa'), ('보자', 'Verb'), ('짐승', 'Noun'), ('처럼', 'Josa'), ('울부짖으며', 'Verb'), ('오래전', 'Adverb'), ('그', 'Noun'), ('들', 'Suffix'), ('을', 'Josa'), ('떠난', 'Verb'), ('이유', 'Noun'), ('를', 'Josa'), ('묻는다', 'Verb'), ('.', 'Punctuation'), ('그러자', 'Conjunction'), ('이', 'Determiner'), ('달', 'Noun'), ('은', 'Josa'), ('세', 'Noun'), ('사람', 'Noun'), ('이', 'Josa'), ('함께', 'Adverb'), ('했던', 'Verb'), ('밤', 'Noun'), ('과', 'Josa'), ('그', 'Noun'), ('들', 'Suffix'), ('이', 'Josa'), ('아끼고', 'Verb'), ('사랑', 'Noun'), ('했던', 'Verb'), ('시인', 'Noun'), (',', 'Punctuation'), ('허초희', 'Noun'), ('를', 'Josa'), ('떠올리는데', 'Verb'), ('...', 'Punctuation')]\n",
"918\n"
]
}
],
"source": [
"print(allMorphs)\n",
"print(len(allMorphs))"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"#foreign, punctuation, Number, Josa, Alpha,suffix => 필요없는 요소 제거\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"#json 파일로 만들기"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"import nltk\n",
"from pprint import pprint\n",
"text = nltk.Text(tokens,name='NMSC')"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"401\n"
]
}
],
"source": [
"print(len(text.tokens))"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('허균', 8),\n",
" ('허초희', 6),\n",
" ('세상', 6),\n",
" ('이달', 5),\n",
" ('스승', 4),\n",
" ('허난설헌', 3),\n",
" ('뮤지컬', 3),\n",
" ('시인', 3),\n",
" ('그녀', 3),\n",
" ('사랑', 3),\n",
" ('자신', 3),\n",
" ('고문', 3),\n",
" ('그들', 3),\n",
" ('극찬', 2),\n",
" ('난설', 2),\n",
" ('옥경선', 2),\n",
" ('이기쁨', 2),\n",
" ('최고', 2),\n",
" ('경선', 2),\n",
" ('처형', 2),\n",
" ('전날', 2),\n",
" ('전날 밤', 2),\n",
" ('통해', 2),\n",
" ('표현', 2),\n",
" ('역모', 2),\n",
" ('작품', 2),\n",
" ('사람', 2),\n",
" ('이 세상', 2),\n",
" ('우리', 2),\n",
" ('조선시대', 1),\n",
" ('조선시대 최고', 1),\n",
" ('조선시대 최고의 여류시인', 1),\n",
" ('당시', 1),\n",
" ('당시 명나라', 1),\n",
" ('당시 명나라의 사신', 1),\n",
" ('주지번', 1),\n",
" ('난설헌', 1),\n",
" ('난설헌의 시', 1),\n",
" ('속된 세상', 1),\n",
" ('속된 세상 바깥', 1),\n",
" ('있는 것', 1),\n",
" ('그 시구', 1),\n",
" ('모두', 1),\n",
" ('모두 주옥', 1),\n",
" ('일본', 1),\n",
" ('그 명성', 1),\n",
" ('본명', 1),\n",
" ('1563', 1),\n",
" ('1589', 1),\n",
" ('다시', 1),\n",
" ('공연제작사', 1),\n",
" ('콘텐츠플래닝', 1),\n",
" ('대표', 1),\n",
" ('노재환', 1),\n",
" ('은 오는 7월', 1),\n",
" ('은 오는 7월 대학로', 1),\n",
" ('은 오는 7월 대학로 콘텐츠그라운드', 1),\n",
" ('은 오는 7월 대학로 콘텐츠그라운드 극장', 1),\n",
" ('창작뮤지컬', 1),\n",
" ('극작', 1),\n",
" ('작곡', 1),\n",
" ('다미', 1),\n",
" ('연출', 1),\n",
" ('의 초연', 1),\n",
" ('개막', 1),\n",
" ('여류', 1),\n",
" ('명나라', 1),\n",
" ('사신', 1),\n",
" ('설헌', 1),\n",
" ('바깥', 1),\n",
" ('시구', 1),\n",
" ('주옥', 1),\n",
" ('명성', 1),\n",
" ('공연', 1),\n",
" ('제작사', 1),\n",
" ('콘텐츠', 1),\n",
" ('플래닝', 1),\n",
" ('7월', 1),\n",
" ('대학로', 1),\n",
" ('그라운드', 1),\n",
" ('극장', 1),\n",
" ('창작', 1),\n",
" ('초연', 1),\n",
" ('의 남동생', 1),\n",
" ('이 역모죄', 1),\n",
" ('그리웠던 기억', 1),\n",
" ('시작', 1),\n",
" ('8세', 1),\n",
" ('8세 때', 1),\n",
" ('조선', 1),\n",
" ('조선 최고', 1),\n",
" ('조선 최고의 천재시인', 1),\n",
" ('그녀의 시를 사랑', 1),\n",
" ('의 스승', 1),\n",
" ('은 각자', 1),\n",
" ('은 각자의 삶', 1),\n",
" ('은 각자의 삶의 소용돌이', 1),\n",
" ('은 각자의 삶의 소용돌이 속', 1),\n",
" ('희망', 1),\n",
" ('이야기', 1),\n",
" ('상반', 1),\n",
" ('상반된 시선', 1),\n",
" ('때로는', 1),\n",
" ('문장가들로서', 1),\n",
" ('문장가들로서의 우정', 1),\n",
" ('이 뮤지컬', 1),\n",
" ('인생', 1),\n",
" ('방식', 1),\n",
" ('의 일생', 1),\n",
" ('재조명하', 1),\n",
" ('이 가까이', 1),\n",
" ('때문', 1),\n",
" ('이해', 1),\n",
" ('이해할 수', 1),\n",
" ('대화', 1),\n",
" ('통해 구축', 1),\n",
" ('통해 구축 된 세계관', 1),\n",
" ('통한', 1),\n",
" ('통한 허초희', 1),\n",
" ('통한 허초희의 시', 1),\n",
" ('통한 허초희의 시의 세계', 1),\n",
" ('통한 허초희의 시의 세계 등', 1),\n",
" ('의 관점', 1),\n",
" ('의 관점의 대립', 1),\n",
" ('표현 해', 1),\n",
" ('표현 해 낸 것', 1),\n",
" ('이번', 1),\n",
" ('이번 작품', 1),\n",
" ('이번 작품의 특징', 1),\n",
" ('남동생', 1),\n",
" ('기억', 1),\n",
" ('천재시인', 1),\n",
" ('각자', 1),\n",
" ('소용돌이', 1),\n",
" ('시선', 1),\n",
" ('문장가들', 1),\n",
" ('로서', 1),\n",
" ('우정', 1),\n",
" ('일생', 1),\n",
" ('조명하', 1),\n",
" ('가까이', 1),\n",
" ('구축', 1),\n",
" ('세계관', 1),\n",
" ('세계', 1),\n",
" ('관점', 1),\n",
" ('대립', 1),\n",
" ('특징', 1),\n",
" ('집필', 1),\n",
" ('전 수개월간', 1),\n",
" ('허난설헌집', 1),\n",
" ('연구', 1),\n",
" ('작가', 1),\n",
" ('가득', 1),\n",
" ('그녀 본인', 1),\n",
" ('그녀 본인의 의지', 1),\n",
" ('소실', 1),\n",
" ('지기', 1),\n",
" ('그녀의 의지', 1),\n",
" ('누구', 1),\n",
" ('시집', 1),\n",
" ('세상의 극찬', 1),\n",
" ('에너지', 1),\n",
" ('에너지의 근원', 1),\n",
" ('무엇', 1),\n",
" ('대한', 1),\n",
" ('대한 고민', 1),\n",
" ('거듭', 1),\n",
" ('아름다운 시', 1),\n",
" ('구절들', 1),\n",
" ('구절들 속', 1),\n",
" ('넘치는 기개', 1),\n",
" ('넘치는 기개와 힘', 1),\n",
" ('그녀의 동생', 1),\n",
" ('과 스승', 1),\n",
" ('개개인', 1),\n",
" ('동시대', 1),\n",
" ('후세', 1),\n",
" ('후세의 사람들', 1),\n",
" ('후세의 사람들의 마음', 1),\n",
" ('울림', 1),\n",
" ('줄 수 있을 것', 1),\n",
" ('결론', 1),\n",
" ('이 작품', 1),\n",
" ('탄생', 1),\n",
" ('실제', 1),\n",
" ('의 글들', 1),\n",
" ('감명', 1),\n",
" ('5편의', 1),\n",
" ('5편의 시', 1),\n",
" ('견흥', 1),\n",
" ('상봉행', 1),\n",
" ('가객사', 1),\n",
" ('유선사', 1),\n",
" ('허난설헌집의', 1),\n",
" ('허난설헌집의 유일한 산문', 1),\n",
" ('광한전백옥루상량문', 1),\n",
" ('노랫말', 1),\n",
" ('활용', 1),\n",
" ('여기', 1),\n",
" ('작곡가', 1)]\n"
]
}
],
"source": [
"pprint(text.vocab().most_common(200))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}