전체 인덱싱+뮤지컬-checkpoint.ipynb
156 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
1423
1424
1425
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450
1451
1452
1453
1454
1455
1456
1457
1458
1459
1460
1461
1462
1463
1464
1465
1466
1467
1468
1469
1470
1471
1472
1473
1474
1475
1476
1477
1478
1479
1480
1481
1482
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494
1495
1496
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512
1513
1514
1515
1516
1517
1518
1519
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565
1566
1567
1568
1569
1570
1571
1572
1573
1574
1575
1576
1577
1578
1579
1580
1581
1582
1583
1584
1585
1586
1587
1588
1589
1590
1591
1592
1593
1594
1595
1596
1597
1598
1599
1600
1601
1602
1603
1604
1605
1606
1607
1608
1609
1610
1611
1612
1613
1614
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627
1628
1629
1630
1631
1632
1633
1634
1635
1636
1637
1638
1639
1640
1641
1642
1643
1644
1645
1646
1647
1648
1649
1650
1651
1652
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696
1697
1698
1699
1700
1701
1702
1703
1704
1705
1706
1707
1708
1709
1710
1711
1712
1713
1714
1715
1716
1717
1718
1719
1720
1721
1722
1723
1724
1725
1726
1727
1728
1729
1730
1731
1732
1733
1734
1735
1736
1737
1738
1739
1740
1741
1742
1743
1744
1745
1746
1747
1748
1749
1750
1751
1752
1753
1754
1755
1756
1757
1758
1759
1760
1761
1762
1763
1764
1765
1766
1767
1768
1769
1770
1771
1772
1773
1774
1775
1776
1777
1778
1779
1780
1781
1782
1783
1784
1785
1786
1787
1788
1789
1790
1791
1792
1793
1794
1795
1796
1797
1798
1799
1800
1801
1802
1803
1804
1805
1806
1807
1808
1809
1810
1811
1812
1813
1814
1815
1816
1817
1818
1819
1820
1821
1822
1823
1824
1825
1826
1827
1828
1829
1830
1831
1832
1833
1834
1835
1836
1837
1838
1839
1840
1841
1842
1843
1844
1845
1846
1847
1848
1849
1850
1851
1852
1853
1854
1855
1856
1857
1858
1859
1860
1861
1862
1863
1864
1865
1866
1867
1868
1869
1870
1871
1872
1873
1874
1875
1876
1877
1878
1879
1880
1881
1882
1883
1884
1885
1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
1901
1902
1903
1904
1905
1906
1907
1908
1909
1910
1911
1912
1913
1914
1915
1916
1917
1918
1919
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5699\n",
"9823\n",
"14020\n",
"2727\n",
"1498\n",
"1464\n"
]
}
],
"source": [
"import pandas as pd\n",
"from nltk.corpus import stopwords\n",
"from nltk.tokenize import word_tokenize\n",
"import re\n",
"\n",
"\"\"\"\n",
"thriller_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/thrillerPlot.csv')\n",
"drama_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/dramaPlot.csv')\n",
"fantasy_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/fantasyPlot.csv')\n",
"history_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/historyPlot.csv')\n",
"social_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/socialPlot.csv')\n",
"romance_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/moviedata/moviePlot/romancePlot.csv')\n",
"musical_plot = pd.read_csv('/Users/yangyoonji/Documents/2020-1/2020-dataCapstone/data/musicalData/broadMusicalPlot.csv',encoding='cp949')\n",
"\n",
"# /Users/김서영/Desktop/datacap/data/moviedata/moviePlot/romancePlot.csv\n",
"\"\"\"\n",
"romance_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/romancePlot.csv')\n",
"thriller_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/thrillerPlot.csv')\n",
"drama_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/dramaPlot.csv')\n",
"fantasy_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/fantasyPlot.csv')\n",
"history_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/historyPlot.csv')\n",
"social_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/moviedata/moviePlot/socialPlot.csv')\n",
"\n",
"musical_plot = pd.read_csv('/Users/김서영/Desktop/datacap/data/musicalData/broadMusicalPlot.csv',encoding='cp949')\n",
"\n",
"\n",
"print(len(romance_plot)) #5699 ==> train 2500 test 2500\n",
"print(len(thriller_plot)) #9823 ==> train 2500 test 2500\n",
"print(len(drama_plot))\n",
"print(len(fantasy_plot))\n",
"print(len(history_plot))\n",
"print(len(social_plot))\n",
"\n",
"train_data_size = 1463\n",
"test_data_size = 1463\n",
"\n",
"#전처리(1) 전부 소문자로 변환\n",
"\n",
"\n",
"#romance_plot.줄거리 = romance_plot.줄거리.str.lower()\n",
"#thriller_plot.줄거리 = thriller_plot.줄거리.str.lower()\n",
"\n",
"#전처리(1-1) 데이터 csv 파일로 옮기기\n",
"#romance_plot 2899개 train_data로 to_csv || 2800개 test_data로 to_csv\n",
"#thriller_plot 2899개 train_data로 to_csv || 2800개 test_data로 to_csv\n"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"RM = [[] for _ in range(5699)]\n",
"for i in range(5699):\n",
" RM[i].append(''.join(romance_plot.줄거리[i]))\n",
" \n",
"TH = [[] for _ in range(9823)]\n",
"for i in range(9823):\n",
" TH[i].append(''.join(thriller_plot.줄거리[i]))\n",
"\n",
"FN = [[] for _ in range(2727)]\n",
"for i in range(2727):\n",
" FN[i].append(''.join(fantasy_plot.줄거리[i]))\n",
" \n",
"HS = [[] for _ in range(1498)]\n",
"for i in range(1498):\n",
" HS[i].append(''.join(history_plot.줄거리[i]))\n",
" \n",
"SC = [[] for _ in range(1464)]\n",
"for i in range(1464):\n",
" SC[i].append(''.join(social_plot.줄거리[i]))\n",
"\n",
"DR = [[] for _ in range(14019)]\n",
"for i in range(14019):\n",
" DR[i].append(''.join(drama_plot.줄거리[i]))\n",
" \n",
"Mu = [[] for _ in range(307)]\n",
"for i in range(307):\n",
" Mu[i].append(''.join(musical_plot.muplot[i]))\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"allplot = RM+TH+FN+HS+SC+DR #모든 드라마 줄거리"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"35230\n",
"307\n"
]
}
],
"source": [
"print(len(allplot))\n",
"print(len(musical_plot))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 모든 장르 줄거리 "
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████| 35230/35230 [02:26<00:00, 240.55it/s]\n"
]
}
],
"source": [
"# 토큰화+전처리(3) 전체 불용어 처리\n",
"# 전체 플롯\n",
"from tqdm import tqdm\n",
"all_vocab = {} \n",
"all_sentences = []\n",
"stop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(allplot):\n",
" all_sentences = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
" for word in all_sentences: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in stop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in all_vocab:\n",
" all_vocab[word] = 0 \n",
" all_vocab[word] += 1\n",
" all_sentences.append(result) "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"all_vocab_sorted = sorted(all_vocab.items(), key = lambda x:x[1], reverse = True)\n",
"\n",
"#전처리(4) 인덱스 부여\n",
"all_word_to_index = {}\n",
"i=0\n",
"for (word, frequency) in all_vocab_sorted :\n",
" if frequency > 1 : # 정제(Cleaning) 챕터에서 언급했듯이 빈도수가 적은 단어는 제외한다.\n",
" i=i+1\n",
" all_word_to_index[word] = i\n",
"#print(all_word_to_index)\n",
"\n",
"vocab_size = 15000 #상위 15000개 단어만 사용\n",
"words_frequency = [w for w,c in all_word_to_index.items() if c >= vocab_size + 1] # 인덱스가 200 초과인 단어 제거\n",
"for w in words_frequency:\n",
" del all_word_to_index[w] # 해당 단어에 대한 인덱스 정보를 삭제\n",
"\n",
" \n",
"all_word_to_index['OOV'] = len(all_word_to_index) + 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 로맨스"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 5699/5699 [00:21<00:00, 267.88it/s]\n"
]
}
],
"source": [
"# 토큰화+전처리(3) 불용어 처리\n",
"# 로맨스 플롯\n",
"\n",
"vocab_r = {} \n",
"RMsentences = []\n",
"RMstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(RM):\n",
" RMsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in RMsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in RMstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_r:\n",
" vocab_r[word] = 0 \n",
" vocab_r[word] += 1\n",
" RMsentences.append(result) \n",
"\n",
"R_encoded = []\n",
"for s in RMsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" R_encoded.append(temp)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 스릴러"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 9823/9823 [00:40<00:00, 240.93it/s]\n"
]
}
],
"source": [
"# 토큰화+전처리(3) 불용어 처리\n",
"# 스릴러 플롯\n",
"\n",
"vocab_th = {} \n",
"THsentences = []\n",
"THstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(TH):\n",
" \n",
" THsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in THsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in THstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_th:\n",
" vocab_th[word] = 0 \n",
" vocab_th[word] += 1\n",
" THsentences.append(result) \n",
"\n",
"TH_encoded = []\n",
"for s in THsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" TH_encoded.append(temp)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 역사"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 1498/1498 [00:05<00:00, 267.17it/s]\n"
]
}
],
"source": [
"vocab_HS = {} \n",
"HSsentences = []\n",
"HSstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(HS):\n",
" \n",
" HSsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in HSsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in HSstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_HS:\n",
" vocab_HS[word] = 0 \n",
" vocab_HS[word] += 1\n",
" HSsentences.append(result) \n",
"\n",
"HS_encoded = []\n",
"for s in HSsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" HS_encoded.append(temp)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 드라마"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|███████████████████████████████████████████████████████████████████████████| 14019/14019 [00:47<00:00, 294.57it/s]\n"
]
}
],
"source": [
"vocab_DR = {} \n",
"DRsentences = []\n",
"DRstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(DR):\n",
" \n",
" DRsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in DRsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in DRstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_DR:\n",
" vocab_DR[word] = 0 \n",
" vocab_DR[word] += 1\n",
" DRsentences.append(result) \n",
"\n",
"DR_encoded = []\n",
"for s in DRsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" DR_encoded.append(temp)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 판타지"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 2727/2727 [00:14<00:00, 184.44it/s]\n"
]
}
],
"source": [
"vocab_FN = {} \n",
"FNsentences = []\n",
"FNstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(FN):\n",
" \n",
" FNsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in FNsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in FNstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_FN:\n",
" vocab_FN[word] = 0 \n",
" vocab_FN[word] += 1\n",
" FNsentences.append(result) \n",
"\n",
"FN_encoded = []\n",
"for s in FNsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" FN_encoded.append(temp)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 사회"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|█████████████████████████████████████████████████████████████████████████████| 1464/1464 [00:06<00:00, 216.19it/s]\n"
]
}
],
"source": [
"vocab_SC = {} \n",
"SCsentences = []\n",
"SCstop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(SC):\n",
" \n",
" SCsentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
"\n",
" for word in SCsentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in SCstop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in vocab_SC:\n",
" vocab_SC[word] = 0 \n",
" vocab_SC[word] += 1\n",
" SCsentences.append(result) \n",
"\n",
"SC_encoded = []\n",
"for s in SCsentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" SC_encoded.append(temp)\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 뮤지컬"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████| 307/307 [00:00<00:00, 1131.54it/s]\n"
]
}
],
"source": [
"# 토큰화+전처리(3) 전체 불용어 처리\n",
"# 전체 플롯\n",
"from tqdm import tqdm\n",
"Mu_vocab = {} \n",
"Mu_sentences = []\n",
"Mu_stop_words = set(stopwords.words('english'))\n",
"\n",
"for i in tqdm(Mu):\n",
" Mu_sentence = word_tokenize(str(i)) # 단어 토큰화를 수행합니다.\n",
" result = []\n",
" \n",
" for word in Mu_sentence: \n",
" word = word.lower() # 모든 단어를 소문자화하여 단어의 개수를 줄입니다.\n",
" if word not in Mu_stop_words: # 단어 토큰화 된 결과에 대해서 불용어를 제거합니다.\n",
" if len(word) > 2: # 단어 길이가 2이하인 경우에 대하여 추가로 단어를 제거합니다.\n",
" result.append(word)\n",
" if word not in Mu_vocab:\n",
" Mu_vocab[word] = 0 \n",
" Mu_vocab[word] += 1\n",
"\n",
" Mu_sentences.append(result) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"print(Mu_sentences)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"Mu_encoded = []\n",
"for s in Mu_sentences:\n",
" temp = []\n",
" for w in s:\n",
" try:\n",
" temp.append(all_word_to_index[w])\n",
" except KeyError:\n",
" temp.append(all_word_to_index['OOV'])\n",
" Mu_encoded.append(temp)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"print(Mu_encoded)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"#전처리 방법에는 NLTK의 FreqDist, 케라스(Keras) 토크나이저도 사용 가능."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"로맨스 플롯, 스릴러 따로 토큰화 해서 x train에 넣을지... 고민중\n",
"\n",
"이번주 : 전처리 완료, \n",
"이번 달 목표 : 뮤지컬 장르 분류 << 다양한 모델 사용해보기.\n",
"\n",
"6월에 교차검증 및 장르 시각화 설계까지.\n",
"\n",
"다음주 : 2진분류(LSTM) 완료, RNN 분류기 만들어보기"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 학습데이터"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"# 영화 줄거리는 X_train에, 장르 정보는 y_train에 저장된다.\n",
"# 테스트용 줄거리 X_test에, 테스트용 줄거리의 장르 정보는 y_test에 저장된다.\n",
"#맞춰서 저장하기. (진행중)\n",
"\n",
"#X_train = train_sc_df.dropna().drop(‘trade_price_idx_value’, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"X_train = []\n",
"Y_train = [] #0 : romance, 1 : thriller \n",
"for i in range(train_data_size):\n",
" X_train.append(R_encoded[i])\n",
" Y_train.append([1,0])\n",
" X_train.append(TH_encoded[i])\n",
" Y_train.append([0,1])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 1974\n",
"줄거리 평균 길이 : 267.093984962406\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYUAAAD4CAYAAAAD6PrjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAdIElEQVR4nO3df5DU9Z3n8eeLSYt7ahQUlR+yY3IkhaEuaii1SjbnrBtFsiXkKslJpZTVuSPZEy7WJhYYrko3ualyvY1WZPfYwx1K3DJjvDWWVNQlxHPPoyoa0RDFTFzREDNCgAQWUZQ4zPv++H76azP0DD3TPd3T7etR1dXf/vS3+/ue6Yb3fD+fz/f9UURgZmYGMKHRAZiZ2fjhpGBmZjknBTMzyzkpmJlZzknBzMxyH2p0AMdzxhlnRHt7e6PDsBb13HPP/TYiptT7uP5e21iq5ns97pNCe3s7W7ZsaXQY1qIk/aoRx/X32sZSNd9rdx+ZmVnOScHMzHLHTQqSzpH0pKReSS9J+mpqnyxpk6RX0v2k1C5Jd0vaLukFSReWvNeStP8rkpaM3Y9lZmajUcmZQj/wtYiYDVwC3CjpPGAl8EREzAKeSI8BrgJmpdtSYA1kSQS4FbgYuAi4tZhIzMxsfDhuUoiIXRHxfNo+CPQC04GFwPq023pgUdpeCNwXmaeB0yRNBa4ENkXEvojYD2wC5tf0pzEzs6qMaExBUjtwAfAMcFZE7IIscQBnpt2mA78ueVlfahuq3Wqgp6eHOXPm0NbWxpw5c+jp6Wl0SGbWhCqekirpZOAh4KaIeFPSkLuWaYth2ssdaylZ1xMzZ86sNMQPrJ6eHlatWkV3dzfz5s1j8+bNdHZ2ArB48eIGR2dmzaSiMwVJBbKEcH9EfD81707dQqT7Pam9Dzin5OUzgJ3DtB8jItZGxNyImDtlSt2vK2o6XV1ddHd309HRQaFQoKOjg+7ubrq6uhodmpk1mUpmHwnoBnoj4s6SpzYAxRlES4BHStqvS7OQLgEOpO6ljcAVkialAeYrUptVqbe3l3nz5h3VNm/ePHp7exsUkZk1q0q6jy4FrgVelLQ1tX0DuB14UFIn8DrwhfTcY8ACYDtwCLgeICL2SfoW8Gza75sRsa8mP8UH3OzZs9m8eTMdHR152+bNm5k9e3YDo7JaaF/5aL694/bPNjAS+6A4blKIiM2UHw8AuLzM/gHcOMR7rQPWjSRAO75Vq1bR2dl5zJiCu4/MbKTGfe0jO77iYPLy5cvp7e1l9uzZdHV1eZDZzEbMSaFFLF682EnAzKrm2kdmZpZzUjAzs5yTgtkgkk6U9BNJP0tFIP8ytZ8r6ZlU0PF7kk5I7RPT4+3p+fZGxm9WDScFs2MdBv44Ij4JnA/MT9fc/BVwVyoCuR/oTPt3Avsj4t8Cd6X9zJqSk4LZIKmY41vpYSHdAvhj4B9T++AikMXikP8IXK5h6sCYjWdOCmZlSGpLF2vuIavo+yrwrxHRn3YpLeiYF3tMzx8ATq9vxGa14aRgVkZEHImI88lqdF0ElLs8vFjQsaJij5KWStoiacvevXtrF6xZDTkpmA0jIv4V+GeyBaZOk1S8tqe0oGNe7DE9fypwTAkXF3q0ZuCkYDaIpCmSTkvbfwD8CdniUk8Cn0+7DS4CWSwO+Xng/6RyL2ZNx1c0mx1rKrBeUhvZH04PRsQPJP0ceEDSfwd+SlY9mHT/D5K2k50hXNOIoM1qwUnBbJCIeIFshcHB7a+RjS8Mbn+X96sEmzU1dx+ZmVnOScGsSbSvfPSo9RXMxoKTgpmZ5ZwUzMwsV8kazesk7ZG0raTte5K2ptuO4jKdktolvVPy3N+VvOZTkl5MRcPudhkAM7Pxp5LZR/cCfwPcV2yIiP9Y3Jb0bbLL+oteTVeCDrYGWAo8TbaO83zg8ZGHbGZmY+W4ZwoR8RRlrs4ESH/tfxHoGe49JE0FPhwRP04X9dzH+8XEzMxsnKh2TOGPgN0R8UpJ27mSfirp/0r6o9Q2nawUQFFpMbFjuEaMmVljVJsUFnP0WcIuYGZEXAD8BfBdSR+mwoJh+ROuEWNm1hCjvqI5Ff76D8Cnim0RcZhsgRIi4jlJrwIfIzszmFHy8tJiYmZmNk5Uc6bwJ8AvIiLvFkqFxNrS9keAWcBrEbELOCjpkjQOcR3vFxMzM7NxopIpqT3Aj4GPS+qTVFyC8BqOHWD+NPCCpJ+RrUD1lYgoDlL/OfD3wHayBUs888jMbJw5bvdRRCweov3PyrQ9BDw0xP5bgDkjjM/MzOrIVzSbmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknhRbR09PDnDlzaGtrY86cOfT0DFu41sysrFHXPrLxo6enh1WrVtHd3c28efPYvHkznZ3ZheeLF5e99tDMrCyfKbSArq4uuru76ejooFAo0NHRQXd3N11dXY0OzcyajJNCC+jt7aWvr++o7qO+vj56e3sbHZqZNRknhRYwbdo0VqxYwerVq3n33XdZvXo1K1asYNq0aY0OrSlJOkfSk5J6Jb0k6aup/TZJb5SsQb6g5DW3pPXHX5Z0ZeOiN6uOxxRaRLbK6dCPbUT6ga9FxPOSTgGek7QpPXdXRPx16c6SziOrGvwJYBrwI0kfi4gjozl4+8pHqwjdrDo+U2gBO3fu5I477mD58uWceOKJLF++nDvuuIOdO72O0WhExK6IeD5tHwR6GWb5WGAh8EBEHI6IX5KVh79o7CM1qz0nhRYwe/ZsZsyYwbZt2zhy5Ajbtm1jxowZzJ49u9GhNT1J7cAFwDOpaZmkFyStkzQptU0Hfl3ysrJrkHvtcWsGTgotYNWqVXR2dvLkk0/y3nvv8eSTT9LZ2cmqVasaHVpTk3Qy2fogN0XEm8Aa4KPA+WTrkX+7uGuZlx/Tf+e1x60ZHHdMQdI64E+BPRExJ7XdBvxnoPjnzjci4rH03C1AJ3AE+K8RsTG1zwe+A7QBfx8Rt9f2R/ngKl6LsHz5cnp7e5k9ezZdXV2+RqEKkgpkCeH+iPg+QETsLnn+HuAH6WEfcE7Jy70GuTWtSgaa7wX+BrhvUHvFA27p6b8FPkP2D+hZSRsi4udVxG4lFi9e7CRQI2kd8W6gNyLuLGmfmtYbB/gcsC1tbwC+K+lOsu/9LOAndQzZrGYqWY7zqdSvWol8wA34paTSAbftEfEagKQH0r5OCjYeXQpcC7woaWtq+wawWNL5ZF1DO4AvA0TES5IeJPs+9wM3jnbmkVmjVTMldZmk64AtZNP39pMNrj1dsk/pgNvggbiLh3pjSUuBpQAzZ86sIkSzkYuIzZQfJ3hsmNd0Ab6E3JreaAeaRzrgVtFAXP6EB+TMzBpiVGcKoxxw80Ccmdk4N6ozBUlTSx4OHnC7RtJESefy/oDbs8AsSedKOoFsMHrD6MM2M7OxUMmU1B7gMuAMSX3ArcBlIx1wk7QM2Eg2JXVdRLxU85/GzMyqUsnso3LzHLuH2b/sgFu6jmHIgTozM2s8X9FsZmY5JwUzM8s5KZiZWc5JwczMck4KZmaWc1Iwa0HtKx/1Cm42Kk4KZmaWc1IwM7Ock4KZmeWqKZ1tZk2kdIxhx+2fbWAkNp75TMHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZrnjJgVJ6yTtkbStpO1/SPqFpBckPSzptNTeLukdSVvT7e9KXvMpSS9K2i7pbkkamx/JzMxGq5IzhXuB+YPaNgFzIuLfAf8C3FLy3KsRcX66faWkfQ2wlGzd5lll3tPMzBrsuEkhIp4C9g1q+2FE9KeHTwMzhnsPSVOBD0fEjyMigPuARaML2czMxkotrmi+AfheyeNzJf0UeBP4bxHx/4DpQF/JPn2prSxJS8nOKpg5c2YNQjSrnKRzyP5wORsYANZGxHckTSb7rrcDO4AvRsT+1BX6HWABcAj4s4h4vt5xuyqq1UJVA82SVgH9wP2paRcwMyIuAP4C+K6kDwPlxg9iqPeNiLURMTci5k6ZMqWaEM1Gox/4WkTMBi4BbpR0HrASeCIiZgFPpMcAV/F+t+hSsq5Ss6Y06qQgaQnwp8CXUpcQEXE4In6Xtp8DXgU+RnZmUNrFNAPYOdpjm42liNhV/Es/Ig4CvWRntguB9Wm39bzfBboQuC8yTwOnpS5Ts6Yzqu4jSfOBFcC/j4hDJe1TgH0RcUTSR8j+cnotIvZJOijpEuAZ4DpgdfXhm40tSe3ABWTf27MiYhdkiUPSmWm36cCvS15W7B7dNei96t4t6i4lG6njJgVJPcBlwBmS+oBbyWYbTQQ2pZmlT6eZRp8GvimpHzgCfCUiioPUf042k+kPgMfTzWzcknQy8BBwU0S8Ocws6oq6RyNiLbAWYO7cuUN2n9aDK6baUI6bFCJicZnm7iH2fYjsH1G557YAc0YUnVmDSCqQfZfvj4jvp+bdkqams4SpwJ7U3gecU/Jyd49a0/IVzWaDpNlE3UBvRNxZ8tQGYEnaXgI8UtJ+nTKXAAeK3UxmzcaL7Jgd61LgWuBFSVtT2zeA24EHJXUCrwNfSM89RjYddTvZlNTr6xWoxwys1pwUzAaJiM2UHycAuLzM/gHcOKZBmdWJu4/MzCznpGBmZjknBTMzyzkpmJlZzgPNZk3GM45sLPlMwczMck4KZmaWc1JoET09PcyZM4e2tjbmzJlDT09Po0MysybkMYUW0NPTw6pVq+ju7mbevHls3ryZzs5OABYvLle6yux9Lo5npXym0AK6urro7u6mo6ODQqFAR0cH3d3ddHV1NTo0M2syTgotoLe3l76+vqO6j/r6+ujt7W10aGbWZNx91AKmTZvGihUruP/++/Puoy996UtMmzat0aGZWZNxUmgRhw4d4oYbbuD1119n5syZHDp0iFNOOaXRYZlZk6mo+0jSOkl7JG0raZssaZOkV9L9pNQuSXdL2i7pBUkXlrxmSdr/lbTGs9XAG2+8wcDAQNl7M7ORqHRM4V5g/qC2lcATETELeCI9BriKbG3mWWTr0a6BLImQLeV5MXARcGsxkVh12traKBQKbNy4kd///vds3LiRQqFAW1tbo0MzsyZTUVKIiKeAfYOaFwLr0/Z6YFFJ+32ReRo4LS1deCWwKSL2RcR+YBPHJhobhf7+fgqFwlFthUKB/v7+BkVkZs2qmtlHZxWXHEz3Z6b26cCvS/brS21DtR9D0lJJWyRt2bt3bxUhfnBcf/31LF++nBNPPJHly5dz/fV1W/zLzFrIWExJLbdiVQzTfmxjxNqImBsRc6dMmVLT4FrRjBkzWLNmDW+//TYAb7/9NmvWrGHGjBkNjszMmk01SWF36hYi3e9J7X3AOSX7zQB2DtNuVVq0aBEHDhxgx44dDAwMsGPHDg4cOMCiRYuO/2IzsxLVJIUNQHEG0RLgkZL269IspEuAA6l7aSNwhaRJaYD5itRmVbrnnnsAmDBhwlH3xXYzs0pVdJ2CpB7gMuAMSX1ks4huBx6U1Am8Dnwh7f4YsADYDhwCrgeIiH2SvgU8m/b7ZkQMHry2UTh8+DATJkygra2NgYGBfNbR4cOHGxyZmTWbipJCRAxVVe3yMvsGcOMQ77MOWFdxdFaxgYEBzjzzTPbs2cPpp5/Ob37zm0aHZGZNyLWPWsjNN9/MwYMHufnmmxsdipk1KZe5aCG33norX//61znppJMaHYqZNSmfKbSQt956i4jgrbfeanQoTW2Isi63SXpD0tZ0W1Dy3C2prMvLkq5sTNRmteGk0AImTpwIwMknn3zUfbHdRuxeyl9tf1dEnJ9ujwFIOg+4BvhEes3/lOT6Ita0nBRawJEjRygUCvkZwltvvUWhUODIkSMNjqw5DVHWZSgLgQci4nBE/JJs1t1FYxac2RhzUmgB/f39nHrqqbS3tzNhwgTa29s59dRTXfuo9palyr/rSoo5unyLtRQnhRYgibPPPptdu3YxMDDArl27OPvss5HKVRaxUVoDfBQ4H9gFfDu1u3yLtRQnhRYQEWzbti2vlFooFNi2bRvZJSNWCxGxOyKORMQAcA/vdxG5fIu1FCeFFjFhwoSjxhSKpS6sNop1vpLPAcWZSRuAayRNlHQu2ToiP6l3fLXSvvJR2lc+2ugwrIF8nUKLGBgY4EMf+hD9/f35vY3OEGVdLpN0PlnX0A7gywAR8ZKkB4GfA/3AjRHhEX5rWk4KLaSYCJwQqjNEWZfuYfbvArrGLiKz+nEfQwu5+uqr2bt3L1dffXWjQzGzJuUzhRZRKBR4/PHHmTJlCoVCgUKhwHvvvdfosMysyfhMoUWccMIJTJ8+nQkTJjB9+nROOOGERodkZk3ISaEFTJgwgUOHDvHOO+8wMDDAO++8w6FDhzwDycxGzN1HTarchWm7d+8+6j4i8v18zYKZVWLUf0pK+nhJxcitkt6UdJOrSdZHRBx1W7ZsWV4Ab+LEiSxbtuyo583MKjHqM4WIeJnskn9SVcg3gIfJlt+8KyL+unT/QdUkpwE/kvQxz+mujdWrV7N69Wok8e677zY6HDNrUrXqdL4ceDUifjXMPq4maWY2ztUqKVwD9JQ8rqqapJmZNUbVSUHSCcDVwP9OTVVXk3SJYTOzxqjFmcJVwPMRsRtqU03SJYbNzBqjFklhMSVdRx+UapJmZq2oqusUJP0b4DOkipHJHa4maWbWnKpKChFxCDh9UNu1w+zvapJmZuOY6yCYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnlnBTMzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgpmg6R1QPZI2lbSNlnSJkmvpPtJqV2S7k7LzL4g6cLGRW5WPScFs2PdC8wf1LYSeCIiZgFPpMeQlY6flW5LydYTMWtaTgpmg0TEU8C+Qc0LgfVpez2wqKT9vsg8DZw2qHy8WVNxUjCrzFkRsQsg3Z+Z2iteZtYrClozcFIwq07Fy8x6RUFrBk4KZpXZXewWSvd7UnvFy8yaNQMnBbPKbACWpO0lwCMl7delWUiXAAeK3UxmzajqpCBph6QXJW2VtCW1efqeNS1JPcCPgY9L6pPUCdwOfEbSK2RL0N6edn8MeA3YDtwD/JcGhGxWM1Utx1miIyJ+W/K4OH3vdkkr0+MVHD1972Ky6XsX1ygGs5qIiMVDPHV5mX0DuHFsIzKrn7HqPvL0PTOzJlSLpBDADyU9J2lpaqtq+p6n7pmZNUYtuo8ujYidks4ENkn6xTD7VjR9LyLWAmsB5s6dW3Z6n5mNnfaVj+bbO27/bAMjsXqr+kwhInam+z3Aw8BFePqemVlTqiopSDpJ0inFbeAKYBuevmdm1pSq7T46C3hYUvG9vhsR/yTpWeDBNJXvdeALaf/HgAVk0/cOAddXeXwzM6uhqpJCRLwGfLJM++/w9D0zs6bjK5rNzCznpGBmZjknBTMzyzkpmJlZzknBzMxyTgrj3OTJk5FU8Q2oeN/Jkyc3+Kczs/GmVlVSbYzs37+fbCZv7RWTiJlZkc8UzMws56RgZmY5JwUzM8s5KZiZWc4DzWY2LK+t8MHiMwUzM8s5KZiZWc5JwczMck4KZmaWG3VSkHSOpCcl9Up6SdJXU/ttkt6QtDXdFpS85hZJ2yW9LOnKWvwAZmZWO9XMPuoHvhYRz6d1mp+TtCk9d1dE/HXpzpLOA64BPgFMA34k6WMRcaSKGMzqStIO4CBwBOiPiLmSJgPfA9qBHcAXI2J/o2I0q8aozxQiYldEPJ+2DwK9wPRhXrIQeCAiDkfEL8nWab5otMc3a6COiDg/IuamxyuBJyJiFvBEemzWlGpynYKkduAC4BngUmCZpOuALWRnE/vJEsbTJS/rY4gkImkpsBRg5syZtQixacWtH4bbTh2797ZaWAhclrbXA/8MrGhUMGbVqDopSDoZeAi4KSLelLQG+BYQ6f7bwA1AuZKcZct/RsRaYC3A3Llzx6ZEaJPQX745plVS47YxeetWFsAPJQXwv9J39ayI2AXZGbSkM8u90H/sWDOoKilIKpAlhPsj4vsAEbG75Pl7gB+kh33AOSUvnwHsrOb4Zg1waUTsTP/xb5L0i0pf6D92rBlUM/tIQDfQGxF3lrRPLdntc8C2tL0BuEbSREnnArOAn4z2+GaNEBE70/0e4GGycbHdxe99ut/TuAjNqlPNmcKlwLXAi5K2prZvAIslnU92mr0D+DJARLwk6UHg52Qzl270zCNrJpJOAiZExMG0fQXwTbI/eJYAt6f7RxoX5dhyHaTWN+qkEBGbKT9O8Ngwr+kCukZ7TLMGOwt4OK1Y9yHguxHxT5KeBR6U1Am8DnyhgTGaVcVVUs0qFBGvAZ8s0/474PL6R2RWey5zYWZmOScFMzPLOSmY2ai0r3z0qIFnaw1OCmZmlnNSMDOznJOCmZnlnBTMzCzn6xSaQLpYquYmTZo0Ju9rZs3LSWGcG2mFVEljVlXVzFqfu4/MzCznpGBmZjknBTMzy3lMwcyq4nLarcVnCmZmlnNSMDOzXN2TgqT5kl6WtF3Synof38zMhlbXpCCpDfhb4CrgPLKlO8+rZwxmZja0eg80XwRsTytYIekBYCHZus02AsNd5VzuOV/QZvXgQefmV+/uo+nAr0se96W2o0haKmmLpC179+6tW3DNJCJGdDMzq0S9k0K5P2+P+R8rItZGxNyImDtlypQ6hGVmZlD/pNAHnFPyeAaws84xmJnZEOqdFJ4FZkk6V9IJwDXAhjrHYGZmQ6jrQHNE9EtaBmwE2oB1EfFSPWMws/orDkB78Hn8q3uZi4h4DHis3sc1s/oqnYlUrm2oBOEZTI3lK5rNasQXZlorcEE8sxoouTDzM2QTKp6VtCEifA3OEMqdSYx0H59J1J6Tgllt+MLMD6ihEtdYJayxHp/ReL+wSdJe4FeNjqOJnAH8ttFBNJE/jIiqL4aR9HlgfkT8p/T4WuDiiFhWss9SYGl6+HHg5TJvNd4+v/EUj2Mpr1wso/5ej/szhVr8g/0gkbQlIuY2Oo4PoONemBkRa4G1w77JOPv8xlM8jqW8WsfigWaz2vCFmdYSnBTMasMXZlpLGPfdRzZiw3ZP2Nio4YWZ4+3zG0/xOJbyahrLuB9oNjOz+nH3kZmZ5ZwUzMws56TQIiStk7RH0rZGx2Kj04gyGZJ2SHpR0lZJW1LbZEmbJL2S7ieldkm6O8X3gqQLqzz2Md/Z0Rxb0pK0/yuSltQ4ntskvZF+P1slLSh57pYUz8uSrixpr+pzlHSOpCcl9Up6SdJX6/q7GekKXr6NzxvwaeBCYFujY/FtVJ9fG/Aq8BHgBOBnwHl1OO4O4IxBbXcAK9P2SuCv0vYC4HGyazIuAZ6p8tjHfGdHemxgMvBaup+UtifVMJ7bgK+X2fe89BlNBM5Nn11bLT5HYCpwYdo+BfiXdLy6/G58ptAiIuIpYF+j47BRy8tkRMTvgWKZjEZYCKxP2+uBRSXt90XmaeA0SVNHe5AhvrMjPfaVwKaI2BcR+4FNwPwaxjOUhcADEXE4In4JbCf7DKv+HCNiV0Q8n7YPAr1kyxbX5XfjpGA2PlS0fvkYCOCHkp5LZTgAzoqIXZD9BwWcWccYR3rsesS0LHXLrCt22dQrHkntwAXAM9Tpd+OkYDY+VLR++Ri4NCIuBK4CbpT06WH2bVSMwx17rGNaA3wUOB/YBXy7XvFIOhl4CLgpIt4cbtdaxuKkYDY+NKRMRkTsTPd7gIfJuj92F7uF0v2eOsY40mOPaUwRsTsijkTEAHAP2e9nzOORVCBLCPdHxPdTc11+N04KZuND3ctkSDpJ0inFbeAKYFs6bnGmyhLgkbS9AbguzXa5BDhQ7M6ooZEeeyNwhaRJqWvnitRWE4PGTD5H9vspxnONpImSzgVmAT+hBp+jJAHdQG9E3FnyVH1+N9XMHvBt/NyAHrLT2/fI/kLobHRMvo34M1xANtPkVWBVHY73EbLZMT8DXioeEzgdeAJ4Jd1PTu0iW0joVeBFYG6Vxz/mOzuaYwM3kA30bgeur3E8/5CO90L6z3dqyf6rUjwvA1fV6nME5pF187wAbE23BfX63bjMhZmZ5dx9ZGZmOScFMzPLOSmYmVnOScHMzHJOCmZmlnNSMDOznJOCmZnl/j8lGxvkzw55lwAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in X_train]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 테스트 데이터\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"X_test = []\n",
"Y_test = [] #0 : romance, 1 : thriller \n",
"M_test = []\n",
"for i in range(test_data_size):\n",
" X_test.append(R_encoded[train_data_size+i])\n",
" Y_test.append([1,0])\n",
" X_test.append(TH_encoded[train_data_size+i])\n",
" Y_test.append([0,1])\n",
"M_test=Mu_encoded"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 1749\n",
"줄거리 평균 길이 : 197.71394395078605\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAbkUlEQVR4nO3db3Bc1Znn8e/Psk2ymRgsMIY1sCLBSZm4NoaoGAqUFIbB2GQqTraSXZut4M1o43jGuJIKWxszegGTLVGE3Qwz7DBQZuSK2cqIZIfx4AJniNc4Q7kqEOTYAYMgFsQBY0dWYkMCGf5IevZFn7ZbUuuP1a3uVt/fp+rWvf3c092P3K1H1+eee64iAjMzy4YZ1U7AzMwqx0XfzCxDXPTNzDLERd/MLENc9M3MMmRmtRMYz1lnnRVNTU3VTsPq1J49e34dEfMq/b7+XttUGut7XfNFv6mpia6urmqnYXVK0i+r8b7+XttUGut77e4dM7MMcdE3M8sQF30zswxx0TczyxAXfTOzDBm36EvaLOmopP0Fse9J2peWg5L2pXiTpH8t2HdfwXM+IelZST2S7pakqfmRsqmzs5PFixfT0NDA4sWL6ezsrHZKZlaDJjJk8zvA3wAP5AMR8Z/y25K+DbxR0P6liFhS5HXuBdYCTwLbgeXAD049ZRuus7OTtrY2Ojo6aGlpYffu3bS2tgKwevXqKmdnZrVk3CP9iHgCOFZsXzpa/4/AmIeVks4F5kTEjyM3l/MDwGdPPV0rpr29nY6ODpYuXcqsWbNYunQpHR0dtLe3Vzs1M6sxpfbpfxLojYgDBbELJe2V9C+SPpliC4BDBW0OpVhRktZK6pLU1dfXV2KK9a+7u5uWlpYhsZaWFrq7u6uUkZnVqlKvyF3N0KP8I8AFEfEbSZ8A/knSx4Bi/fej3r0lIjYBmwCam5t9l5dxLFq0iN27d7N06dITsd27d7No0aIqZmXl0LTx0RPbB+/4dBUzsXox6SN9STOB/wB8Lx+LiHci4jdpew/wEvARckf25xU8/Tzg8GTf24Zqa2ujtbWVXbt28d5777Fr1y5aW1tpa2urdmpmVmNKOdL/I+CFiDjRbSNpHnAsIgYkfQhYCLwcEcck/U7S5cBTwI3A/y4lcTspf7J2w4YNdHd3s2jRItrb230S18xGGLfoS+oErgLOknQIuDUiOoBVjDyB+yngm5L6gQFgXUTkTwL/KbmRQO8nN2rHI3fKaPXq1S7yZjaucYt+RBStJBHxX4rEHgIeGqV9F7D4FPMzM7My8hW5ZmYZ4qJvZpYhLvpmZhniom9mliEu+mZmGeKib1ZEmj322TRbbFeKNUraIelAWs9NcaWZY3skPSPp0upmbzY6F32z0S2NiCUR0ZwebwR2RsRCYGd6DLCC3IWIC8nNJHtvxTM1myAXfbOJWwlsSdtbODlT7Erggch5EjgjzSxrVnNc9M2KC+CHkvZIWpti8yPiCEBan53iC4BXC55bdBZZzx5rtaDUWTbN6tWVEXFY0tnADkkvjNF2QrPIevZYqwU+0jcrIiIOp/VRYCtwGdCb77ZJ66Op+SHg/IKnexZZq1ku+mbDSPqApA/mt4FlwH5gG7AmNVsDPJy2twE3plE8lwNv5LuBzGqNu3fMRpoPbM3dDZSZwN9HxD9Lehr4vqRW4BXgC6n9duB6oAf4PfClyqdsNjEu+mbDRMTLwMeLxH8DXFMkHsD6CqRmVjJ375iZZYiLvplZhrjom5lliIu+mVmGuOibmWXIuEVf0mZJRyXtL4jdJum1NAPhPknXF+y7Jc02+KKk6wriy1OsR9LG4e9jZmZTbyJH+t8BlheJ35VmIFwSEdsBJF0MrAI+lp7zt5IaJDUA95CbjfBiYHVqa2ZmFTTuOP2IeEJS0wRfbyXwYES8A/xCUg+5y9cBetL4ZyQ9mNo+f8oZm5nZpJXSp39TumHE5vzNJBh9tsEJzUJoZmZTa7JF/17gw8AS4Ajw7RQfbbbBCc1CmOcpaM3Mpsakin5E9EbEQEQMAvdzsgtntNkGT2kWwojYFBHNEdE8b968yaRoZmZFTKroD7sr0OfIzUAIudkGV0k6TdKF5G4f9xPgaWChpAslzSZ3snfb5NM2M7PJGPdErqRO4CrgLEmHgFuBqyQtIddFcxD4CkBEPCfp++RO0PYD6yNiIL3OTcBjQAOwOSKeK/tPY2ZmY5rI6J3VRcIdY7RvB9qLxLeTm4LWzMyqxFfkmplliIu+mVmGuOibmWWIi76ZWYa46JuZZYiLvplZhrjom5lliIu+mVmGuOibmWWIi76ZWYa46JuZZYiLvplZhrjom5lliIu+mVmGuOibmWWIi76ZWYa46JuNQlKDpL2SHkmPL5T0lKQDkr6Xbv1Juj3o9yT1pP1N1czbbCwu+maj+yrQXfD4W8BdEbEQOA60pngrcDwiLgLuSu3MapKLvlkRks4DPg38XXos4GrgH1KTLcBn0/bK9Ji0/5rU3qzmuOibFfdXwH8HBtPjM4HXI6I/PT4ELEjbC4BXAdL+N1L7ISStldQlqauvr28qczcb1bhFX9JmSUcl7S+I/U9JL0h6RtJWSWekeJOkf5W0Ly33FTznE5KeTf2ed/tIyGqVpD8GjkbEnsJwkaYxgX0nAxGbIqI5IprnzZtXhkzNTt1EjvS/AywfFtsBLI6Ifw/8HLilYN9LEbEkLesK4vcCa4GFaRn+mma14krgM5IOAg+S69b5K+AMSTNTm/OAw2n7EHA+QNp/OnCskgmbTdS4RT8inmDYFzgifljw39wnyf0CjErSucCciPhxRATwACf7Q81qSkTcEhHnRUQTsAp4PCL+M7AL+HxqtgZ4OG1vS49J+x9P33OzmlOOPv0/AX5Q8PjCNMztXyR9MsUWkDsayivsDx3BfZ9Wo74BfF1SD7k++44U7wDOTPGvAxurlJ/ZuGaO32R0ktqAfuC7KXQEuCAifiPpE8A/SfoYE+zzPLEjYhOwCaC5udlHTFY1EfEj4Edp+2XgsiJt3ga+UNHEzCZp0kVf0hrgj4Fr8v+VjYh3gHfS9h5JLwEfIXdkX9gFVNgfamYFmjY+Wu0UrI5NqntH0nJy/9X9TET8viA+T1JD2v4QuRO2L0fEEeB3ki5Po3Zu5GR/qJmZVci4R/qSOoGrgLMkHQJuJTda5zRgRxp5+WQaqfMp4JuS+oEBYF1E5E8C/ym5kUDvJ3cOoPA8gJmZVcC4RT8iVhcJdxSJEREPAQ+Nsq8LWHxK2ZmZWVn5ilwzswxx0TczyxAXfTOzDHHRrxOdnZ0sXryYhoYGFi9eTGdnZ7VTMrMaVNLFWVYbOjs7aWtro6Ojg5aWFnbv3k1ra26q99Wri52HN7Os8pF+HWhvb6ejo4OlS5cya9Ysli5dSkdHB+3t7dVOzcxqjIt+Heju7qalpWVIrKWlhe7u7lGeYWZZ5aJfBxYtWsTu3buHxHbv3s2iRYuqlJGZ1SoX/TrQ1tZGa2sru3bt4r333mPXrl20trbS1tZW7dTMrMb4RG4dyJ+s3bBhA93d3SxatIj29nafxDWzEVz068Tq1atd5M1sXO7eMTPLEBf9OuGLs8xsIty9Uwd8cZaZTZSP9OtAe3s7N9xwAxs2bOB973sfGzZs4IYbbvDFWWY2go/068Dzzz/PK6+8wttvv83g4CA///nPufvuu3nzzTernZqZ1Rgf6deBGTNm8NZbb3HHHXcMWc+Y4Y/XzIZyVagDAwMDnH766VxyySXMmjWLSy65hNNPP52BgYFqp2ZmNcbdO3WiqamJa665hohAEkuWLOH48ePVTsvMasyEjvQlbZZ0VNL+glijpB2SDqT13BSXpLsl9Uh6RtKlBc9Zk9ofkLSm/D9ONkli7969rFu3jtdff51169axd+9e0k3rrU40bXyUpo2PVjsNm+Ym2r3zHWD5sNhGYGdELAR2pscAK4CFaVkL3Au5PxLArcAfApcBt+b/UFhp8n33W7dupbGxka1btw6Jm5nlTagqRMQTwLFh4ZXAlrS9BfhsQfyByHkSOEPSucB1wI6IOBYRx4EdjPxDYpMwMDDA1VdfTW9vL4ODg/T29nL11Ve7T9/MRijlUHB+RBwBSOuzU3wB8GpBu0MpNlp8BElrJXVJ6urr6yshxWyYOXMme/fuZefOnbz77rvs3LmTvXv3MnOmT9mY2VBTURWKdSTHGPGRwYhNwCaA5ubmom3spDlz5nD8+HGuvfZaBgYGaGhoYHBwkLlz3XtmZkOVcqTfm7ptSOujKX4IOL+g3XnA4THiVqJjx4b3vI0dN7PsKqXobwPyI3DWAA8XxG9Mo3guB95I3T+PAcskzU0ncJelmJVIEuvWraO/v5+IoL+/n3Xr1nn0ziRJep+kn0j6maTnJP1Fil8o6ak0+ux7kman+GnpcU/a31TN/M3GMtEhm53Aj4GPSjokqRW4A7hW0gHg2vQYYDvwMtAD3A/8GUBEHAP+B/B0Wr6ZYlaiiGD79u1D7py1fft2ItwzNknvAFdHxMeBJcDydADzLeCuNGLtONCa2rcCxyPiIuCu1M6sJk2oTz8iRpuq8ZoibQNYP8rrbAY2Tzg7m5DTTjuNlpaWIXfOamlp4Ve/+lW1U5uW0nc4P3HRrLQEcDVwQ4pvAW4jNyR5ZdoG+AfgbyQp/FfXapCHd9SBL3/5y9xzzz3MmDGDwcFBXnjhBZ5//nnWry/6t9cmQFIDsAe4CLgHeAl4PSL6U5PC0WcnRqZFRL+kN4AzgV8Pe8215K5d4YILLpjqH8GsKF+9Uyci4sS4/IGBAXftlCgiBiJiCbkBB5cBi4o1S+sJjUyLiE0R0RwRzfPmzStfsmanwEW/Dtx33300Njby+OOP8+677/L444/T2NjIfffdV+3Upr2IeB34EXA5uQsN8/87Lhx9dmJkWtp/OiMvZjSrCS76daC/v58rrriCFStWMHv2bFasWMEVV1xBf3//+E+2ESTNk3RG2n4/8EdAN7AL+HxqNnzEWn4k2+eBx92fb7XKRb9OPPLII9x+++289dZb3H777TzyyCPVTmk6OxfYJekZciPNdkTEI8A3gK9L6iHXZ9+R2ncAZ6b41zk5D5VZzfGJ3Dpy5513cvPNNzN//vxqpzKtRcQzwCVF4i+T698fHn8b+EIFUjMrmYt+Hent7R2ytvpUOL3ywTs+XcVMbDpy904daWhoGLI2MxvORb+OzJkzhxkzZjBnzpxqp2JmNcpFv07Mnj2bN998k8HBQd58801mz55d7ZTMrAa56NeJGTNmsGDBgiFrM7PhfCK3Trz99tu88sorDA4OnlibmQ3nw8E6kD9xmy/0+bVP6JrZcC76dWC0e+H6HrlmNpyLfh3J3xPX98Y1s9G46NcJSZx55plD1mZmw7no14n8/F7D12ZmhVz068TMmTNP3Aj92LFj7uIxs6JcGepEf3//ibH5AwMDHrJpZkX5SL8O5Iv98CGbvkDLzIabdFWQ9FFJ+wqW30r6mqTbJL1WEL++4Dm3SOqR9KKk68rzI9jg4CCSOOecc5gxYwbnnHMOkny0b2YjTLroR8SLEbEk3Uf0E8Dvga1p9135fRGxHUDSxcAq4GPAcuBv082nrQwuuugient7GRwcpLe3l4suuqjaKZlZDSrX//+vAV6KiF+O0WYl8GBEvBMRvwB6KHJDCpucAwcODBm5c+DAgSpnZGa1qFxFfxXQWfD4JknPSNosaW6KLQBeLWhzKMVGkLRWUpekrr6+vjKlaGZmJRd9SbOBzwD/N4XuBT4MLAGOAN/ONy3y9KKDySNiU0Q0R0TzvHnzSk3RzMySchzprwB+GhG9ABHRGxEDETEI3M/JLpxDwPkFzzsPOFyG97dk/vz5SPI9cs1sVOUo+qsp6NqRdG7Bvs8B+9P2NmCVpNMkXQgsBH5Shve3Ar4S18zGUtLFWZL+DXAt8JWC8J2SlpDrujmY3xcRz0n6PvA80A+sjwhPA1lGvjG6mY2npKIfEb8HzhwW++IY7duB9lLe08zMJs+XbJqZZYiLvplZhrjom5lliIu+mVmGuOjXkfzdsnzXLDMbjYu+mVmGuOjXEd8q0czG46I/TUk6sUyknU2cpPMl7ZLULek5SV9N8UZJOyQdSOu5KS5Jd6d7RTwj6dLq/gRmo3PRn6Yi4sSybNky4OSdsvLrZcuWnWhjp6QfuDkiFgGXA+vT/SA2AjsjYiGwMz2G3PxTC9OyltykgxXRtPHRE4vZRLjo14HHHnvsRIEHTvwheOyxx6qc2fQUEUci4qdp+3dAN7lpwFcCW1KzLcBn0/ZK4IHIeRI4Y9gcVGY1wzdGrxP5Au/bJJaXpCbgEuApYH5EHIHcHwZJZ6dmo90r4siw11pL7n8CXHDBBVOat9lofKRvNgpJfwA8BHwtIn47VtMisRF9ar5PhNUCF32zIiTNIlfwvxsR/5jCvflum7Q+muK+V4RNGy76ZsMoN9ypA+iOiL8s2LUNWJO21wAPF8RvTKN4LgfeyHcDmdUa9+mbjXQl8EXgWUn7UuzPgTuA70tqBV4BvpD2bQeuB3qA3wNfqmy6ZhPnom82TETspng/PcA1RdoHsH5KkzIrE3fvmJlliIu+mVmGuOibmWVIyUVf0kFJz0raJ6krxTxHiZlZDSrXkf7SiFgSEc3pcc3NUWJmZlPXveM5SszMalA5in4AP5S0J80tAsPmKAHGm6NkCElrJXVJ6urr6ytDimZmBuUZp39lRBxOk0/tkPTCGG0nPEcJsAmgubnZ8wKbmZVJyUf6EXE4rY8CW4HL8BwlZmY1qaSiL+kDkj6Y3waWAfvxHCVmZjWp1O6d+cDWdDu+mcDfR8Q/S3oaz1FiZlZzSir6EfEy8PEi8d/gOUrMzGqOr8g1M8sQF30zswzx1MpmdaJp46Mntg/e8ekqZmK1zEf6ZnWoaeOjQ/4ImOW56JuZZYiLvplZhrhP36yOuZ/fhvORvplZhrjom5lliIu+mVmGuOibmWWIi76ZWYa46JuZZYiLvplZhnicvlnGeSx/trjom2WEi7uBu3fMzDLFR/pmGeQZOLPLR/o1rrGxEUkTXoAJt21sbKzyT1ebJG2WdFTS/oJYo6Qdkg6k9dwUl6S7JfVIekbSpdXL3Gx8ky76ks6XtEtSt6TnJH01xW+T9JqkfWm5vuA5t6RfjhclXVeOH6DeHT9+nIiYkuX48ePV/vFq1XeA5cNiG4GdEbEQ2JkeA6wAFqZlLXBvhXI0m5RSunf6gZsj4qeSPgjskbQj7bsrIv5XYWNJFwOrgI8B/xb4f5I+EhEDJeRgVnYR8YSkpmHhlcBVaXsL8CPgGyn+QEQE8KSkMySdGxFHKpNteeW7fXyit35N+kg/Io5ExE/T9u+AbmDBGE9ZCTwYEe9ExC+AHuCyyb6/WYXNzxfytD47xRcArxa0O8QovweS1krqktTV19c3pcmajaYsffrpqOgS4KkUuin1b27O933iXw6rTyoSi2INI2JTRDRHRPO8efOmOC2z4kou+pL+AHgI+FpE/JZcn+aHgSXAEeDb+aZFnu5fDpsueiWdC5DWR1P8EHB+QbvzgMMVzs1swkoq+pJmkSv4342IfwSIiN6IGIiIQeB+Tnbh+JfDprNtwJq0vQZ4uCB+YxrFcznwxnTtz7dsKGX0joAOoDsi/rIgfm5Bs88B+WFv24BVkk6TdCG50Q4/mez7m00VSZ3Aj4GPSjokqRW4A7hW0gHg2vQYYDvwMrlzVPcDf1aFlM0mrJTRO1cCXwSelbQvxf4cWC1pCbmum4PAVwAi4jlJ3weeJzfyZ71H7lgtiojVo+y6pkjbANZPbUZm5TPpoh8RuyneT799jOe0A+2TfU8zMyuNr8g1M8sQF30zswzxhGtmNoKnYa5fPtI3M8sQF30zswxx0TczyxAXfTOzDPGJ3BoXt86B206futc2s0xx0a9x+ovfkrvocwpeWyJum5KXNrMa5e4dM7MMcdE3M8sQd++Y2Zh8oVZ98ZG+mVmGuOibmWWIi76ZWYa46JuZZYhP5E4DuTtTlt/cuXOn5HXNrHa56Ne4U70wS9KUXcxlZtOfu3fMbFKaNj46ZDinTQ8+0jezCXORn/4qXvQlLQf+GmgA/i4i7qh0DmZWPr54a3qpaPeOpAbgHmAFcDGwWtLFlczBzCzLKt2nfxnQExEvR8S7wIPAygrnUBckFV1G22dmBpUv+guAVwseH0qxISStldQlqauvr69iyU0nEXFKi5kZVL5Pv9gh54iKFBGbgE0Azc3Nrlhm04T792tfpY/0DwHnFzw+Dzhc4RzMzDKr0kf6TwMLJV0IvAasAm6ocA5mVgE+6q9NFS36EdEv6SbgMXJDNjdHxHOVzMHMLMsqPk4/IrYD2yv9vmZm5ityzcrGFx6OrtiVvO7yqQ4XfbMyKLjw8FpyAxaelrQtIp6vbma1a7Q+//GmevAfi9K46JuVx4kLDwEk5S88dNGfgFOZ02ey8/8U+8MyHf+AlJq7av3CHUl9wC+rncc0chbw62onMY38u4iYV+qLSPo8sDwi/mt6/EXgDyPipoI2a4G16eFHgReLvFStf37OrzSVym/U73XNH+mX4xcySyR1RURztfPIoHEvPCy86HDUF6nxz8/5laYW8vN8+mbl4QsPbVpw0TcrjxMXHkqaTe7Cw21VzslshJrv3rFTNmb3gU2NMl54WOufn/MrTdXzq/kTuWZmVj7u3jEzyxAXfTOzDHHRrxOSNks6Kml/tXOxyZG0XNKLknokbaxiHgclPStpn6SuFGuUtEPSgbSem+KSdHfK+RlJl05BPiO+25PJR9Ka1P6ApDVTmNttkl5L/377JF1fsO+WlNuLkq4riFfusz/VOzB5qc0F+BRwKbC/2rl4mdTn1wC8BHwImA38DLi4SrkcBM4aFrsT2Ji2NwLfStvXAz8gd53C5cBTU5DPiO/2qeYDNAIvp/XctD13inK7DfhvRdpenD7X04AL0+fdUOnP3kf6dSIingCOVTsPm7Rav3/0SmBL2t4CfLYg/kDkPAmcIenccr7xKN/tU83nOmBHRByLiOPADmD5FOU2mpXAgxHxTkT8Augh97lX9LN30TerDRO6f3SFBPBDSXvS1BEA8yPiCEBan53i1cr7VPOpdJ43pe6lzfmup1rJzUXfrDZM6P7RFXJlRFwKrADWS/rUGG1rKW8YPZ9K5nkv8GFgCXAE+HaK10JuLvpmNaJmpnGIiMNpfRTYSq77oTffbZPWR1PzauV9qvlULM+I6I2IgYgYBO4n9+9XE7mBi75ZraiJaRwkfUDSB/PbwDJgf8olP+JlDfBw2t4G3JhGzVwOvJHvdplip5rPY8AySXNTd8uyFCu7Yec0Pkfu3y+f2ypJp6X7hC8EfkKlP/upOkPspbIL0Enuv5LvkTtyaK12Tl5O+TO8Hvg5uZEcbVXK4UPkRo/8DHgunwdwJrATOJDWjSkucjePeQl4FmiegpxGfLcnkw/wJ+ROnvYAX5rC3P5Peu9nyBXvcwvat6XcXgRWVOOz9zQMZmYZ4u4dM7MMcdE3M8sQF30zswxx0TczyxAXfTOzDHHRNzPLEBd9M7MM+f8TmXUVrZGt4wAAAABJRU5ErkJggg==\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in X_test]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## LSTM 분류 \n"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.preprocessing.sequence import pad_sequences\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import Dense, LSTM, Embedding\n",
"from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint\n",
"import numpy as np\n",
"\n",
"M_test= np.array(M_test)\n",
"max_len = 230\n",
"X_train = pad_sequences(X_train, maxlen=max_len)\n",
"X_test = pad_sequences(X_test, maxlen=max_len)\n"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 2926 samples, validate on 2926 samples\n",
"Epoch 1/3\n",
"2880/2926 [============================>.] - ETA: 0s - loss: 0.6540 - acc: 0.6016\n",
"Epoch 00001: val_acc improved from -inf to 0.66131, saving model to best_model.h5\n",
"2926/2926 [==============================] - 28s 10ms/sample - loss: 0.6509 - acc: 0.6048 - val_loss: 0.5958 - val_acc: 0.6613\n",
"Epoch 2/3\n",
"2880/2926 [============================>.] - ETA: 0s - loss: 0.2971 - acc: 0.8944\n",
"Epoch 00002: val_acc improved from 0.66131 to 0.82177, saving model to best_model.h5\n",
"2926/2926 [==============================] - 24s 8ms/sample - loss: 0.2968 - acc: 0.8941 - val_loss: 0.4080 - val_acc: 0.8218\n",
"Epoch 3/3\n",
"2880/2926 [============================>.] - ETA: 0s - loss: 0.1146 - acc: 0.9672\n",
"Epoch 00003: val_acc did not improve from 0.82177\n",
"2926/2926 [==============================] - 24s 8ms/sample - loss: 0.1164 - acc: 0.9660 - val_loss: 0.4616 - val_acc: 0.8125\n"
]
},
{
"data": {
"text/plain": [
"<tensorflow.python.keras.callbacks.History at 0x202d9223a48>"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model = Sequential()\n",
"model.add(Embedding(15002, 120))\n",
"model.add(LSTM(128))\n",
"model.add(Dense(2, activation='sigmoid'))\n",
"\n",
"es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
"mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
"\n",
"X_train = np.array(X_train)\n",
"Y_train = np.array(Y_train)\n",
"X_test = np.array(X_test)\n",
"Y_test = np.array(Y_test)\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
"model.fit(X_train, Y_train, validation_data=(X_test, Y_test), epochs=3, batch_size=64, callbacks=[es, mc])\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[9.89297211e-01, 1.10417316e-02],\n",
" [3.73128988e-02, 9.67208505e-01],\n",
" [9.98658895e-01, 1.62478851e-03],\n",
" [9.96271133e-01, 4.04474512e-03],\n",
" [8.88998568e-01, 1.09605283e-01],\n",
" [9.98210311e-01, 2.08193017e-03],\n",
" [3.54536772e-01, 6.69178307e-01],\n",
" [9.85074520e-01, 1.48036592e-02],\n",
" [1.29813954e-01, 8.76039565e-01],\n",
" [9.62996125e-01, 3.44542079e-02],\n",
" [7.55765080e-01, 2.49890730e-01],\n",
" [3.65469605e-01, 6.57850266e-01],\n",
" [9.98800039e-01, 1.51763496e-03],\n",
" [9.97327685e-01, 3.15122958e-03],\n",
" [5.75969875e-01, 4.37167883e-01],\n",
" [9.98559892e-01, 1.73308724e-03],\n",
" [9.83565748e-01, 1.57846343e-02],\n",
" [9.99194920e-01, 1.05460756e-03],\n",
" [9.97159958e-01, 3.05769313e-03],\n",
" [7.86324322e-01, 2.11838797e-01],\n",
" [9.94806945e-01, 5.56808943e-03],\n",
" [9.94939566e-01, 5.16507169e-03],\n",
" [9.95327115e-01, 5.44817653e-03],\n",
" [8.30366760e-02, 9.23830807e-01],\n",
" [9.99270737e-01, 9.99870244e-04],\n",
" [9.98326242e-01, 1.99769647e-03],\n",
" [2.01347172e-01, 8.05066824e-01],\n",
" [9.97681141e-01, 2.63667619e-03],\n",
" [6.56308457e-02, 9.39631820e-01],\n",
" [9.97786045e-01, 2.47393013e-03],\n",
" [9.95178938e-01, 4.89054574e-03],\n",
" [9.98978972e-01, 1.31971261e-03],\n",
" [9.96212244e-01, 3.93838761e-03],\n",
" [8.08125079e-01, 1.83967963e-01],\n",
" [9.86248910e-01, 1.37963891e-02],\n",
" [8.94822180e-01, 9.63397995e-02],\n",
" [4.63879943e-01, 5.36770880e-01],\n",
" [9.90485072e-01, 9.23611410e-03],\n",
" [9.98726070e-01, 1.49498892e-03],\n",
" [9.91048157e-01, 9.32559464e-03],\n",
" [3.64511549e-01, 6.21637642e-01],\n",
" [9.82374847e-01, 1.73149928e-02],\n",
" [9.93298471e-01, 6.45173620e-03],\n",
" [7.04595149e-01, 2.89463103e-01],\n",
" [9.93536115e-01, 6.79136021e-03],\n",
" [9.99129593e-01, 1.11387786e-03],\n",
" [9.06374276e-01, 8.96570683e-02],\n",
" [4.10660505e-01, 5.88741422e-01],\n",
" [9.99692917e-01, 4.83944750e-04],\n",
" [7.80944601e-02, 9.30601835e-01],\n",
" [4.43848312e-01, 5.59155643e-01],\n",
" [1.56105727e-01, 8.51845980e-01],\n",
" [9.82641697e-01, 1.64951906e-02],\n",
" [5.26238121e-02, 9.54043090e-01],\n",
" [9.80851293e-01, 1.90182012e-02],\n",
" [9.90199268e-01, 1.01825390e-02],\n",
" [8.12352419e-01, 1.85036004e-01],\n",
" [9.63351190e-01, 3.39603312e-02],\n",
" [6.31880701e-01, 3.64272743e-01],\n",
" [8.38387251e-01, 1.53711647e-01],\n",
" [9.76741195e-01, 2.20144410e-02],\n",
" [9.90605891e-01, 9.22481064e-03],\n",
" [9.98764873e-01, 1.51382165e-03],\n",
" [9.94506538e-01, 6.27602870e-03],\n",
" [9.97006834e-01, 3.38183786e-03],\n",
" [9.80250001e-01, 1.75612047e-02],\n",
" [8.35835814e-01, 1.54920161e-01],\n",
" [9.93885934e-01, 6.47321297e-03],\n",
" [7.32705653e-01, 2.38107413e-01],\n",
" [1.67823553e-01, 8.28654826e-01],\n",
" [9.97954011e-01, 2.44288007e-03],\n",
" [9.97217536e-01, 3.17463791e-03],\n",
" [9.98234391e-01, 2.21486646e-03],\n",
" [8.36275935e-01, 1.66395113e-01],\n",
" [9.97660041e-01, 2.73835333e-03],\n",
" [9.42627370e-01, 5.81915826e-02],\n",
" [4.82731223e-01, 5.22184551e-01],\n",
" [9.30466115e-01, 6.39836937e-02],\n",
" [5.61084867e-01, 4.41416740e-01],\n",
" [4.50993061e-01, 5.64331651e-01],\n",
" [1.45042473e-02, 9.87169504e-01],\n",
" [9.00123000e-01, 1.01546042e-01],\n",
" [3.05879086e-01, 7.04485118e-01],\n",
" [4.30749863e-01, 5.69399238e-01],\n",
" [1.44111022e-01, 8.66918087e-01],\n",
" [9.00533974e-01, 1.01281039e-01],\n",
" [5.68374395e-01, 4.33054060e-01],\n",
" [9.90041316e-01, 9.53172334e-03],\n",
" [9.87220049e-01, 1.34074753e-02],\n",
" [9.50743973e-01, 5.25186770e-02],\n",
" [2.19779104e-01, 7.77013421e-01],\n",
" [1.54326940e-02, 9.86147463e-01],\n",
" [9.98808622e-01, 1.44694082e-03],\n",
" [8.91158879e-01, 1.06756985e-01],\n",
" [1.05071880e-01, 8.98244858e-01],\n",
" [9.99034405e-01, 1.23475143e-03],\n",
" [9.94593918e-01, 5.76898688e-03],\n",
" [9.93712962e-01, 6.65295729e-03],\n",
" [7.02256203e-01, 2.97228634e-01],\n",
" [9.96766925e-01, 3.70637397e-03],\n",
" [9.89144385e-01, 1.11364387e-02],\n",
" [9.79650915e-01, 2.00015400e-02],\n",
" [9.93528545e-01, 6.56247372e-03],\n",
" [9.87190068e-01, 1.34675717e-02],\n",
" [9.87129986e-01, 1.17099285e-02],\n",
" [9.97617066e-01, 2.67686578e-03],\n",
" [9.97251093e-01, 3.04796081e-03],\n",
" [9.99035120e-01, 1.22244644e-03],\n",
" [9.54006970e-01, 4.89431918e-02],\n",
" [9.46570277e-01, 5.37833124e-02],\n",
" [9.43213701e-01, 5.26795760e-02],\n",
" [9.95134652e-01, 5.25745936e-03],\n",
" [8.25005472e-01, 1.69372976e-01],\n",
" [8.70788097e-01, 1.24156632e-01],\n",
" [9.97023284e-01, 3.26669309e-03],\n",
" [7.92557359e-01, 1.97209224e-01],\n",
" [9.98770893e-01, 1.56408502e-03],\n",
" [9.90018487e-01, 1.06498580e-02],\n",
" [9.90723550e-01, 8.96911696e-03],\n",
" [2.37598643e-01, 7.74325430e-01],\n",
" [6.24084949e-01, 3.58046830e-01],\n",
" [9.97389972e-01, 2.78766919e-03],\n",
" [9.84458208e-01, 1.46457935e-02],\n",
" [9.72309589e-01, 2.36137267e-02],\n",
" [9.98931468e-01, 1.41089316e-03],\n",
" [9.94718015e-01, 5.69924805e-03],\n",
" [4.72977042e-01, 5.49978912e-01],\n",
" [9.90615129e-01, 9.25194938e-03],\n",
" [3.34342234e-02, 9.68245149e-01],\n",
" [9.99083996e-01, 1.16300583e-03],\n",
" [9.44297254e-01, 5.47527708e-02],\n",
" [9.72299933e-01, 2.72788983e-02],\n",
" [9.93855774e-01, 6.32649660e-03],\n",
" [9.62590456e-01, 3.52819376e-02],\n",
" [9.79455829e-01, 1.92554407e-02],\n",
" [9.99165893e-01, 1.05177308e-03],\n",
" [9.85086679e-01, 1.40042715e-02],\n",
" [2.78653465e-02, 9.75346625e-01],\n",
" [9.98081207e-01, 2.21448927e-03],\n",
" [9.95232165e-01, 5.13720512e-03],\n",
" [8.88802350e-01, 1.11462571e-01],\n",
" [9.88667548e-01, 1.16618285e-02],\n",
" [9.98724878e-01, 1.53446209e-03],\n",
" [9.77607906e-01, 2.18318254e-02],\n",
" [9.96304035e-01, 4.18575248e-03],\n",
" [2.64609367e-01, 7.45012462e-01],\n",
" [9.72862959e-01, 2.69199926e-02],\n",
" [9.83956635e-01, 1.73143838e-02],\n",
" [9.98144031e-01, 2.16847914e-03],\n",
" [9.98341322e-01, 1.96177256e-03],\n",
" [9.79604721e-01, 1.93097256e-02],\n",
" [9.98944461e-01, 1.24808261e-03],\n",
" [8.86560798e-01, 1.12452798e-01],\n",
" [9.96718824e-01, 3.71032930e-03],\n",
" [2.21701801e-01, 7.80260146e-01],\n",
" [2.55418997e-02, 9.74908948e-01],\n",
" [9.99462545e-01, 7.41661002e-04],\n",
" [8.23981166e-01, 1.70397952e-01],\n",
" [6.52386248e-02, 9.39590633e-01],\n",
" [9.98122513e-01, 2.17202515e-03],\n",
" [7.12066889e-01, 2.86067158e-01],\n",
" [8.19861591e-02, 9.32014406e-01],\n",
" [9.58634377e-01, 3.82429920e-02],\n",
" [8.24238896e-01, 1.68227255e-01],\n",
" [9.98916030e-01, 1.41686620e-03],\n",
" [9.96592700e-01, 3.68587370e-03],\n",
" [7.33839869e-01, 2.57828295e-01],\n",
" [1.06705025e-01, 8.97602201e-01],\n",
" [3.80194485e-01, 6.08735800e-01],\n",
" [8.97293806e-01, 9.94179696e-02],\n",
" [9.10029948e-01, 8.81575868e-02],\n",
" [9.89466846e-01, 1.07162707e-02],\n",
" [1.63196921e-02, 9.86219883e-01],\n",
" [9.79962826e-01, 1.92297176e-02],\n",
" [9.99110639e-01, 1.11217669e-03],\n",
" [9.98811603e-01, 1.45246438e-03],\n",
" [9.10705030e-01, 8.23124573e-02],\n",
" [9.86083686e-01, 1.50616029e-02],\n",
" [9.94467974e-01, 5.80535224e-03],\n",
" [9.93595064e-01, 6.75331987e-03],\n",
" [9.99255478e-01, 9.77048301e-04],\n",
" [8.52798402e-01, 1.40539274e-01],\n",
" [1.64930165e-01, 8.44964683e-01],\n",
" [9.97384012e-01, 2.95930239e-03],\n",
" [9.96524751e-01, 3.76476254e-03],\n",
" [9.98174548e-01, 2.11151456e-03],\n",
" [6.01421416e-01, 3.98873955e-01],\n",
" [1.82097703e-01, 8.35628331e-01],\n",
" [9.97257769e-01, 3.08231893e-03],\n",
" [6.75210297e-01, 3.04572284e-01],\n",
" [9.53007460e-01, 4.57556657e-02],\n",
" [6.44749761e-01, 3.54564339e-01],\n",
" [9.99813974e-01, 2.97560182e-04],\n",
" [1.71126738e-01, 8.52856874e-01],\n",
" [9.67297077e-01, 3.14390585e-02],\n",
" [2.50648465e-02, 9.78119493e-01],\n",
" [7.36107171e-01, 2.60057181e-01],\n",
" [1.12051122e-01, 8.95975471e-01],\n",
" [9.98990476e-01, 1.27349573e-03],\n",
" [2.98825242e-02, 9.72770095e-01],\n",
" [9.96666729e-01, 3.56671633e-03],\n",
" [9.95280921e-01, 5.05312020e-03],\n",
" [4.67087999e-02, 9.55936015e-01],\n",
" [2.31040847e-02, 9.79663670e-01],\n",
" [9.92311239e-01, 7.36055616e-03],\n",
" [8.72887135e-01, 1.25496492e-01],\n",
" [9.83360767e-01, 1.60082821e-02],\n",
" [2.60338001e-02, 9.78127003e-01],\n",
" [9.79926884e-01, 1.79750286e-02],\n",
" [2.64777075e-02, 9.75348890e-01],\n",
" [8.47916961e-01, 1.61791548e-01],\n",
" [9.91084099e-01, 9.56200063e-03],\n",
" [9.97634172e-01, 2.64891679e-03],\n",
" [9.88843799e-01, 1.17526846e-02],\n",
" [9.99542236e-01, 6.39197184e-04],\n",
" [9.99526381e-01, 6.73291273e-04],\n",
" [9.89566207e-01, 1.05931843e-02],\n",
" [9.84892607e-01, 1.59311239e-02],\n",
" [9.47403908e-01, 5.09829447e-02],\n",
" [9.97905850e-01, 2.34915246e-03],\n",
" [3.12148798e-02, 9.70876515e-01],\n",
" [3.71371321e-02, 9.64185297e-01],\n",
" [2.86366083e-02, 9.73299086e-01],\n",
" [9.98304725e-01, 1.95303746e-03],\n",
" [5.43259323e-01, 4.45621550e-01],\n",
" [9.95735526e-01, 4.47552558e-03],\n",
" [9.99664187e-01, 4.84757824e-04],\n",
" [4.77395892e-01, 5.53484499e-01],\n",
" [2.67982967e-02, 9.75562334e-01],\n",
" [5.11212230e-01, 4.82527465e-01],\n",
" [9.96845663e-01, 3.34183313e-03],\n",
" [9.98655796e-01, 1.58252590e-03],\n",
" [9.98682797e-01, 1.57597498e-03],\n",
" [9.97531652e-01, 2.68543395e-03],\n",
" [8.89724314e-01, 1.02902323e-01],\n",
" [9.96919513e-01, 3.27811856e-03],\n",
" [9.76353884e-01, 2.36234013e-02],\n",
" [9.98018861e-01, 2.27649114e-03],\n",
" [2.69563273e-02, 9.76159155e-01],\n",
" [6.68949783e-02, 9.39164698e-01],\n",
" [9.73651588e-01, 2.52439454e-02],\n",
" [9.98613954e-01, 1.62094866e-03],\n",
" [9.44671273e-01, 5.45767955e-02],\n",
" [4.35869396e-02, 9.60251033e-01],\n",
" [1.88457757e-01, 8.20301712e-01],\n",
" [2.59413511e-01, 7.33782709e-01],\n",
" [5.41639701e-02, 9.47131872e-01],\n",
" [9.19235591e-03, 9.91685688e-01],\n",
" [9.94127393e-01, 5.99407079e-03],\n",
" [9.86760855e-01, 1.31814247e-02],\n",
" [9.78301644e-01, 2.23909188e-02],\n",
" [9.95761573e-01, 4.44441987e-03],\n",
" [9.99497414e-01, 7.02120829e-04],\n",
" [9.97229755e-01, 2.99413246e-03],\n",
" [2.33508293e-02, 9.80765402e-01],\n",
" [9.94520366e-01, 6.04913617e-03],\n",
" [9.97494936e-01, 2.87344446e-03],\n",
" [9.98973131e-01, 1.26466772e-03],\n",
" [4.97004330e-01, 4.97818023e-01],\n",
" [9.82652664e-01, 1.84881184e-02],\n",
" [1.31787378e-02, 9.88159657e-01],\n",
" [3.66922885e-01, 6.31668866e-01],\n",
" [9.96259332e-01, 4.08321712e-03],\n",
" [9.98756170e-01, 1.47667748e-03],\n",
" [7.72360861e-02, 9.27823484e-01],\n",
" [9.98449206e-01, 1.86312699e-03],\n",
" [3.15861739e-02, 9.71484125e-01],\n",
" [9.96845782e-01, 3.44880344e-03],\n",
" [7.42987245e-02, 9.30592895e-01],\n",
" [2.09757891e-02, 9.81689215e-01],\n",
" [9.98074651e-01, 2.20919168e-03],\n",
" [6.46533668e-01, 3.48830134e-01],\n",
" [5.98557949e-01, 3.84560645e-01],\n",
" [9.99404311e-01, 7.86028046e-04],\n",
" [9.98620272e-01, 1.63661852e-03],\n",
" [9.99131620e-01, 1.08906056e-03],\n",
" [3.69940363e-02, 9.68174577e-01],\n",
" [9.98901844e-01, 1.32231705e-03],\n",
" [9.85736310e-01, 1.49741964e-02],\n",
" [6.43389583e-01, 3.48876834e-01],\n",
" [4.78291005e-01, 5.33889949e-01],\n",
" [9.99354661e-01, 8.77332001e-04],\n",
" [5.98759837e-02, 9.42334712e-01],\n",
" [9.63255167e-01, 3.59756313e-02],\n",
" [2.05777939e-02, 9.81795371e-01],\n",
" [8.92112032e-02, 9.14921701e-01],\n",
" [9.96331871e-01, 4.17977013e-03],\n",
" [5.23199812e-02, 9.52406943e-01],\n",
" [9.88084376e-01, 1.19793927e-02],\n",
" [9.69554842e-01, 2.73279194e-02],\n",
" [3.67223620e-01, 6.27295792e-01],\n",
" [9.16537941e-01, 8.18802044e-02],\n",
" [9.99439061e-01, 7.88773061e-04],\n",
" [2.48765163e-02, 9.77342725e-01],\n",
" [9.96949136e-01, 3.68632539e-03],\n",
" [6.32767797e-01, 3.68138403e-01],\n",
" [9.95043516e-01, 5.14345011e-03],\n",
" [9.93900299e-01, 6.20957604e-03],\n",
" [9.97990847e-01, 2.28208210e-03],\n",
" [8.38533640e-02, 9.23523605e-01],\n",
" [9.93036926e-01, 6.87335012e-03],\n",
" [9.56128418e-01, 4.12139818e-02],\n",
" [9.56128418e-01, 4.12139818e-02],\n",
" [1.26801789e-01, 8.76263022e-01],\n",
" [2.42131352e-01, 7.80837834e-01],\n",
" [8.39236677e-01, 1.56546742e-01],\n",
" [9.98509943e-01, 1.75377598e-03]], dtype=float32)"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"M_test = pad_sequences(M_test, maxlen=max_len)\n",
"\n",
"predictions = model.predict(M_test)\n",
"predictions"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[9.89297211e-01, 1.10417316e-02],\n",
" [3.73128988e-02, 9.67208505e-01],\n",
" [9.98658895e-01, 1.62478851e-03],\n",
" [9.96271133e-01, 4.04474512e-03],\n",
" [8.88998568e-01, 1.09605283e-01],\n",
" [9.98210311e-01, 2.08193017e-03],\n",
" [3.54536772e-01, 6.69178307e-01],\n",
" [9.85074520e-01, 1.48036592e-02],\n",
" [1.29813954e-01, 8.76039565e-01],\n",
" [9.62996125e-01, 3.44542079e-02],\n",
" [7.55765080e-01, 2.49890730e-01],\n",
" [3.65469605e-01, 6.57850266e-01],\n",
" [9.98800039e-01, 1.51763496e-03],\n",
" [9.97327685e-01, 3.15122958e-03],\n",
" [5.75969875e-01, 4.37167883e-01],\n",
" [9.98559892e-01, 1.73308724e-03],\n",
" [9.83565748e-01, 1.57846343e-02],\n",
" [9.99194920e-01, 1.05460756e-03],\n",
" [9.97159958e-01, 3.05769313e-03],\n",
" [7.86324322e-01, 2.11838797e-01],\n",
" [9.94806945e-01, 5.56808943e-03],\n",
" [9.94939566e-01, 5.16507169e-03],\n",
" [9.95327115e-01, 5.44817653e-03],\n",
" [8.30366760e-02, 9.23830807e-01],\n",
" [9.99270737e-01, 9.99870244e-04],\n",
" [9.98326242e-01, 1.99769647e-03],\n",
" [2.01347172e-01, 8.05066824e-01],\n",
" [9.97681141e-01, 2.63667619e-03],\n",
" [6.56308457e-02, 9.39631820e-01],\n",
" [9.97786045e-01, 2.47393013e-03],\n",
" [9.95178938e-01, 4.89054574e-03],\n",
" [9.98978972e-01, 1.31971261e-03],\n",
" [9.96212244e-01, 3.93838761e-03],\n",
" [8.08125079e-01, 1.83967963e-01],\n",
" [9.86248910e-01, 1.37963891e-02],\n",
" [8.94822180e-01, 9.63397995e-02],\n",
" [4.63879943e-01, 5.36770880e-01],\n",
" [9.90485072e-01, 9.23611410e-03],\n",
" [9.98726070e-01, 1.49498892e-03],\n",
" [9.91048157e-01, 9.32559464e-03],\n",
" [3.64511549e-01, 6.21637642e-01],\n",
" [9.82374847e-01, 1.73149928e-02],\n",
" [9.93298471e-01, 6.45173620e-03],\n",
" [7.04595149e-01, 2.89463103e-01],\n",
" [9.93536115e-01, 6.79136021e-03],\n",
" [9.99129593e-01, 1.11387786e-03],\n",
" [9.06374276e-01, 8.96570683e-02],\n",
" [4.10660505e-01, 5.88741422e-01],\n",
" [9.99692917e-01, 4.83944750e-04],\n",
" [7.80944601e-02, 9.30601835e-01],\n",
" [4.43848312e-01, 5.59155643e-01],\n",
" [1.56105727e-01, 8.51845980e-01],\n",
" [9.82641697e-01, 1.64951906e-02],\n",
" [5.26238121e-02, 9.54043090e-01],\n",
" [9.80851293e-01, 1.90182012e-02],\n",
" [9.90199268e-01, 1.01825390e-02],\n",
" [8.12352419e-01, 1.85036004e-01],\n",
" [9.63351190e-01, 3.39603312e-02],\n",
" [6.31880701e-01, 3.64272743e-01],\n",
" [8.38387251e-01, 1.53711647e-01],\n",
" [9.76741195e-01, 2.20144410e-02],\n",
" [9.90605891e-01, 9.22481064e-03],\n",
" [9.98764873e-01, 1.51382165e-03],\n",
" [9.94506538e-01, 6.27602870e-03],\n",
" [9.97006834e-01, 3.38183786e-03],\n",
" [9.80250001e-01, 1.75612047e-02],\n",
" [8.35835814e-01, 1.54920161e-01],\n",
" [9.93885934e-01, 6.47321297e-03],\n",
" [7.32705653e-01, 2.38107413e-01],\n",
" [1.67823553e-01, 8.28654826e-01],\n",
" [9.97954011e-01, 2.44288007e-03],\n",
" [9.97217536e-01, 3.17463791e-03],\n",
" [9.98234391e-01, 2.21486646e-03],\n",
" [8.36275935e-01, 1.66395113e-01],\n",
" [9.97660041e-01, 2.73835333e-03],\n",
" [9.42627370e-01, 5.81915826e-02],\n",
" [4.82731223e-01, 5.22184551e-01],\n",
" [9.30466115e-01, 6.39836937e-02],\n",
" [5.61084867e-01, 4.41416740e-01],\n",
" [4.50993061e-01, 5.64331651e-01],\n",
" [1.45042473e-02, 9.87169504e-01],\n",
" [9.00123000e-01, 1.01546042e-01],\n",
" [3.05879086e-01, 7.04485118e-01],\n",
" [4.30749863e-01, 5.69399238e-01],\n",
" [1.44111022e-01, 8.66918087e-01],\n",
" [9.00533974e-01, 1.01281039e-01],\n",
" [5.68374395e-01, 4.33054060e-01],\n",
" [9.90041316e-01, 9.53172334e-03],\n",
" [9.87220049e-01, 1.34074753e-02],\n",
" [9.50743973e-01, 5.25186770e-02],\n",
" [2.19779104e-01, 7.77013421e-01],\n",
" [1.54326940e-02, 9.86147463e-01],\n",
" [9.98808622e-01, 1.44694082e-03],\n",
" [8.91158879e-01, 1.06756985e-01],\n",
" [1.05071880e-01, 8.98244858e-01],\n",
" [9.99034405e-01, 1.23475143e-03],\n",
" [9.94593918e-01, 5.76898688e-03],\n",
" [9.93712962e-01, 6.65295729e-03],\n",
" [7.02256203e-01, 2.97228634e-01],\n",
" [9.96766925e-01, 3.70637397e-03],\n",
" [9.89144385e-01, 1.11364387e-02],\n",
" [9.79650915e-01, 2.00015400e-02],\n",
" [9.93528545e-01, 6.56247372e-03],\n",
" [9.87190068e-01, 1.34675717e-02],\n",
" [9.87129986e-01, 1.17099285e-02],\n",
" [9.97617066e-01, 2.67686578e-03],\n",
" [9.97251093e-01, 3.04796081e-03],\n",
" [9.99035120e-01, 1.22244644e-03],\n",
" [9.54006970e-01, 4.89431918e-02],\n",
" [9.46570277e-01, 5.37833124e-02],\n",
" [9.43213701e-01, 5.26795760e-02],\n",
" [9.95134652e-01, 5.25745936e-03],\n",
" [8.25005472e-01, 1.69372976e-01],\n",
" [8.70788097e-01, 1.24156632e-01],\n",
" [9.97023284e-01, 3.26669309e-03],\n",
" [7.92557359e-01, 1.97209224e-01],\n",
" [9.98770893e-01, 1.56408502e-03],\n",
" [9.90018487e-01, 1.06498580e-02],\n",
" [9.90723550e-01, 8.96911696e-03],\n",
" [2.37598643e-01, 7.74325430e-01],\n",
" [6.24084949e-01, 3.58046830e-01],\n",
" [9.97389972e-01, 2.78766919e-03],\n",
" [9.84458208e-01, 1.46457935e-02],\n",
" [9.72309589e-01, 2.36137267e-02],\n",
" [9.98931468e-01, 1.41089316e-03],\n",
" [9.94718015e-01, 5.69924805e-03],\n",
" [4.72977042e-01, 5.49978912e-01],\n",
" [9.90615129e-01, 9.25194938e-03],\n",
" [3.34342234e-02, 9.68245149e-01],\n",
" [9.99083996e-01, 1.16300583e-03],\n",
" [9.44297254e-01, 5.47527708e-02],\n",
" [9.72299933e-01, 2.72788983e-02],\n",
" [9.93855774e-01, 6.32649660e-03],\n",
" [9.62590456e-01, 3.52819376e-02],\n",
" [9.79455829e-01, 1.92554407e-02],\n",
" [9.99165893e-01, 1.05177308e-03],\n",
" [9.85086679e-01, 1.40042715e-02],\n",
" [2.78653465e-02, 9.75346625e-01],\n",
" [9.98081207e-01, 2.21448927e-03],\n",
" [9.95232165e-01, 5.13720512e-03],\n",
" [8.88802350e-01, 1.11462571e-01],\n",
" [9.88667548e-01, 1.16618285e-02],\n",
" [9.98724878e-01, 1.53446209e-03],\n",
" [9.77607906e-01, 2.18318254e-02],\n",
" [9.96304035e-01, 4.18575248e-03],\n",
" [2.64609367e-01, 7.45012462e-01],\n",
" [9.72862959e-01, 2.69199926e-02],\n",
" [9.83956635e-01, 1.73143838e-02],\n",
" [9.98144031e-01, 2.16847914e-03],\n",
" [9.98341322e-01, 1.96177256e-03],\n",
" [9.79604721e-01, 1.93097256e-02],\n",
" [9.98944461e-01, 1.24808261e-03],\n",
" [8.86560798e-01, 1.12452798e-01],\n",
" [9.96718824e-01, 3.71032930e-03],\n",
" [2.21701801e-01, 7.80260146e-01],\n",
" [2.55418997e-02, 9.74908948e-01],\n",
" [9.99462545e-01, 7.41661002e-04],\n",
" [8.23981166e-01, 1.70397952e-01],\n",
" [6.52386248e-02, 9.39590633e-01],\n",
" [9.98122513e-01, 2.17202515e-03],\n",
" [7.12066889e-01, 2.86067158e-01],\n",
" [8.19861591e-02, 9.32014406e-01],\n",
" [9.58634377e-01, 3.82429920e-02],\n",
" [8.24238896e-01, 1.68227255e-01],\n",
" [9.98916030e-01, 1.41686620e-03],\n",
" [9.96592700e-01, 3.68587370e-03],\n",
" [7.33839869e-01, 2.57828295e-01],\n",
" [1.06705025e-01, 8.97602201e-01],\n",
" [3.80194485e-01, 6.08735800e-01],\n",
" [8.97293806e-01, 9.94179696e-02],\n",
" [9.10029948e-01, 8.81575868e-02],\n",
" [9.89466846e-01, 1.07162707e-02],\n",
" [1.63196921e-02, 9.86219883e-01],\n",
" [9.79962826e-01, 1.92297176e-02],\n",
" [9.99110639e-01, 1.11217669e-03],\n",
" [9.98811603e-01, 1.45246438e-03],\n",
" [9.10705030e-01, 8.23124573e-02],\n",
" [9.86083686e-01, 1.50616029e-02],\n",
" [9.94467974e-01, 5.80535224e-03],\n",
" [9.93595064e-01, 6.75331987e-03],\n",
" [9.99255478e-01, 9.77048301e-04],\n",
" [8.52798402e-01, 1.40539274e-01],\n",
" [1.64930165e-01, 8.44964683e-01],\n",
" [9.97384012e-01, 2.95930239e-03],\n",
" [9.96524751e-01, 3.76476254e-03],\n",
" [9.98174548e-01, 2.11151456e-03],\n",
" [6.01421416e-01, 3.98873955e-01],\n",
" [1.82097703e-01, 8.35628331e-01],\n",
" [9.97257769e-01, 3.08231893e-03],\n",
" [6.75210297e-01, 3.04572284e-01],\n",
" [9.53007460e-01, 4.57556657e-02],\n",
" [6.44749761e-01, 3.54564339e-01],\n",
" [9.99813974e-01, 2.97560182e-04],\n",
" [1.71126738e-01, 8.52856874e-01],\n",
" [9.67297077e-01, 3.14390585e-02],\n",
" [2.50648465e-02, 9.78119493e-01],\n",
" [7.36107171e-01, 2.60057181e-01],\n",
" [1.12051122e-01, 8.95975471e-01],\n",
" [9.98990476e-01, 1.27349573e-03],\n",
" [2.98825242e-02, 9.72770095e-01],\n",
" [9.96666729e-01, 3.56671633e-03],\n",
" [9.95280921e-01, 5.05312020e-03],\n",
" [4.67087999e-02, 9.55936015e-01],\n",
" [2.31040847e-02, 9.79663670e-01],\n",
" [9.92311239e-01, 7.36055616e-03],\n",
" [8.72887135e-01, 1.25496492e-01],\n",
" [9.83360767e-01, 1.60082821e-02],\n",
" [2.60338001e-02, 9.78127003e-01],\n",
" [9.79926884e-01, 1.79750286e-02],\n",
" [2.64777075e-02, 9.75348890e-01],\n",
" [8.47916961e-01, 1.61791548e-01],\n",
" [9.91084099e-01, 9.56200063e-03],\n",
" [9.97634172e-01, 2.64891679e-03],\n",
" [9.88843799e-01, 1.17526846e-02],\n",
" [9.99542236e-01, 6.39197184e-04],\n",
" [9.99526381e-01, 6.73291273e-04],\n",
" [9.89566207e-01, 1.05931843e-02],\n",
" [9.84892607e-01, 1.59311239e-02],\n",
" [9.47403908e-01, 5.09829447e-02],\n",
" [9.97905850e-01, 2.34915246e-03],\n",
" [3.12148798e-02, 9.70876515e-01],\n",
" [3.71371321e-02, 9.64185297e-01],\n",
" [2.86366083e-02, 9.73299086e-01],\n",
" [9.98304725e-01, 1.95303746e-03],\n",
" [5.43259323e-01, 4.45621550e-01],\n",
" [9.95735526e-01, 4.47552558e-03],\n",
" [9.99664187e-01, 4.84757824e-04],\n",
" [4.77395892e-01, 5.53484499e-01],\n",
" [2.67982967e-02, 9.75562334e-01],\n",
" [5.11212230e-01, 4.82527465e-01],\n",
" [9.96845663e-01, 3.34183313e-03],\n",
" [9.98655796e-01, 1.58252590e-03],\n",
" [9.98682797e-01, 1.57597498e-03],\n",
" [9.97531652e-01, 2.68543395e-03],\n",
" [8.89724314e-01, 1.02902323e-01],\n",
" [9.96919513e-01, 3.27811856e-03],\n",
" [9.76353884e-01, 2.36234013e-02],\n",
" [9.98018861e-01, 2.27649114e-03],\n",
" [2.69563273e-02, 9.76159155e-01],\n",
" [6.68949783e-02, 9.39164698e-01],\n",
" [9.73651588e-01, 2.52439454e-02],\n",
" [9.98613954e-01, 1.62094866e-03],\n",
" [9.44671273e-01, 5.45767955e-02],\n",
" [4.35869396e-02, 9.60251033e-01],\n",
" [1.88457757e-01, 8.20301712e-01],\n",
" [2.59413511e-01, 7.33782709e-01],\n",
" [5.41639701e-02, 9.47131872e-01],\n",
" [9.19235591e-03, 9.91685688e-01],\n",
" [9.94127393e-01, 5.99407079e-03],\n",
" [9.86760855e-01, 1.31814247e-02],\n",
" [9.78301644e-01, 2.23909188e-02],\n",
" [9.95761573e-01, 4.44441987e-03],\n",
" [9.99497414e-01, 7.02120829e-04],\n",
" [9.97229755e-01, 2.99413246e-03],\n",
" [2.33508293e-02, 9.80765402e-01],\n",
" [9.94520366e-01, 6.04913617e-03],\n",
" [9.97494936e-01, 2.87344446e-03],\n",
" [9.98973131e-01, 1.26466772e-03],\n",
" [4.97004330e-01, 4.97818023e-01],\n",
" [9.82652664e-01, 1.84881184e-02],\n",
" [1.31787378e-02, 9.88159657e-01],\n",
" [3.66922885e-01, 6.31668866e-01],\n",
" [9.96259332e-01, 4.08321712e-03],\n",
" [9.98756170e-01, 1.47667748e-03],\n",
" [7.72360861e-02, 9.27823484e-01],\n",
" [9.98449206e-01, 1.86312699e-03],\n",
" [3.15861739e-02, 9.71484125e-01],\n",
" [9.96845782e-01, 3.44880344e-03],\n",
" [7.42987245e-02, 9.30592895e-01],\n",
" [2.09757891e-02, 9.81689215e-01],\n",
" [9.98074651e-01, 2.20919168e-03],\n",
" [6.46533668e-01, 3.48830134e-01],\n",
" [5.98557949e-01, 3.84560645e-01],\n",
" [9.99404311e-01, 7.86028046e-04],\n",
" [9.98620272e-01, 1.63661852e-03],\n",
" [9.99131620e-01, 1.08906056e-03],\n",
" [3.69940363e-02, 9.68174577e-01],\n",
" [9.98901844e-01, 1.32231705e-03],\n",
" [9.85736310e-01, 1.49741964e-02],\n",
" [6.43389583e-01, 3.48876834e-01],\n",
" [4.78291005e-01, 5.33889949e-01],\n",
" [9.99354661e-01, 8.77332001e-04],\n",
" [5.98759837e-02, 9.42334712e-01],\n",
" [9.63255167e-01, 3.59756313e-02],\n",
" [2.05777939e-02, 9.81795371e-01],\n",
" [8.92112032e-02, 9.14921701e-01],\n",
" [9.96331871e-01, 4.17977013e-03],\n",
" [5.23199812e-02, 9.52406943e-01],\n",
" [9.88084376e-01, 1.19793927e-02],\n",
" [9.69554842e-01, 2.73279194e-02],\n",
" [3.67223620e-01, 6.27295792e-01],\n",
" [9.16537941e-01, 8.18802044e-02],\n",
" [9.99439061e-01, 7.88773061e-04],\n",
" [2.48765163e-02, 9.77342725e-01],\n",
" [9.96949136e-01, 3.68632539e-03],\n",
" [6.32767797e-01, 3.68138403e-01],\n",
" [9.95043516e-01, 5.14345011e-03],\n",
" [9.93900299e-01, 6.20957604e-03],\n",
" [9.97990847e-01, 2.28208210e-03],\n",
" [8.38533640e-02, 9.23523605e-01],\n",
" [9.93036926e-01, 6.87335012e-03],\n",
" [9.56128418e-01, 4.12139818e-02],\n",
" [9.56128418e-01, 4.12139818e-02],\n",
" [1.26801789e-01, 8.76263022e-01],\n",
" [2.42131352e-01, 7.80837834e-01],\n",
" [8.39236677e-01, 1.56546742e-01],\n",
" [9.98509943e-01, 1.75377598e-03]], dtype=float32)"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"스릴러일 확률 (1) 0.2028985507246377\n",
"로맨스일 확률 (0) 0.7971014492753623\n"
]
}
],
"source": [
"import numpy as np\n",
"predict_labels = np.argmax(predictions, axis = 1)\n",
"th_tot = 0\n",
"ro_tot = 0\n",
"for i in range(69):\n",
" if(predict_labels[i]==1):\n",
" th_tot += 1\n",
" else:\n",
" ro_tot += 1\n",
"print(\"스릴러일 확률 (1) \" , th_tot / 69)\n",
"print(\"로맨스일 확률 (0) \" , ro_tot / 69)\n",
"\n",
" "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 판타지 VS 사회"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 2324\n",
"줄거리 평균 길이 : 275.97285714285715\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAYbklEQVR4nO3db2xc9Z3v8fenJglqy16STUCIwJpW2cqpdzdlLYpUt8LLBZKwatKr3d6Yq5JtfEmjBasV+yDJ9QOgrWmyu7TawkJucm0RpMZtdWkvUQPLRtR3kdWlxUBKQ1w2gQ1LIEoMQRARmj/c730wP0cTZ2yP/80Zz/m8pNGc+c4Zz/fg4eOT3/zOOYoIzMwsHz6SdQNmZlY5Dn0zsxxx6JuZ5YhD38wsRxz6ZmY5ckHWDYxm/vz5UV9fn3UbVsOee+65tyJiQaXf159tm06jfa6rOvTr6+vp7+/Pug2rYZJey+J9/dm26TTa59rDO2ZmOeLQNzPLEYe+mVmOOPTNzHLEoW9mliMO/RrR09NDY2MjdXV1NDY20tPTk3VLZlaFqnrKppWnp6eHjo4Ourq6aG5upq+vj7a2NgBaW1sz7s7Mqon39GtAZ2cnXV1dtLS0MGvWLFpaWujq6qKzszPr1mYkSd2SjkraW1T7kaQ96XZQ0p5Ur5f0QdFzW7Lr3Gxs3tOvAQMDAzQ3N59Ta25uZmBgIKOOZryHgQeAR4YKEfFfh5Yl3Qe8W7T+KxGxpGLdmU2CQ78GNDQ00NfXR0tLy9laX18fDQ0NGXY1c0XE05LqSz0nScCXgT+rRC/1G3adXT646eZKvKXVOA/v1ICOjg7a2tro7e3l9OnT9Pb20tbWRkdHR9at1aLPA0ciYn9R7SpJL0j6F0mfH+mFktZK6pfUPzg4OP2dmpXgPf0aMPRlbXt7OwMDAzQ0NNDZ2ekvcadHK1A8NeowcGVEvC3pT4H/I+nTEfHe8BdGxFZgK0BTU5OvU2qZcOjXiNbWVof8NJN0AfBfgD8dqkXESeBkWn5O0ivAHwI+m5pVJQ/vmJXvPwO/jYhDQwVJCyTVpeVPAIuAVzPqz2xMDn2zYST1AP8KfErSIUlt6alVnDu0A/AF4EVJvwb+N7AuIo5Vrluz8fHwjtkwEVFynCwi/qpE7VHg0enuyWyqeE/fzCxHHPpmZjni0DczyxGHvplZjjj0zcxyxKFvZpYjDn0zsxxx6JuZ5YhD38wsRxz6ZmY54tA3M8sRh76ZWY449M3McsShb2aWIw59M7McceibmeWIQ9/MLEfGDH1JV0jqlTQg6SVJX0/1eZJ2S9qf7uemuiR9X9IBSS9KurroZ61O6++XtHr6NsvMzEopZ0//DPA3EdEAXAvcLmkxsAF4KiIWAU+lxwDLKFwcehGwFngICn8kgLuAzwLXAHcN/aEwM7PKGDP0I+JwRDyflo8DA8DlwApge1ptO7AyLa8AHomCZ4CLJV0G3ATsjohjEfEOsBtYOqVbY2ZmoxrXmL6keuAzwC+BSyPiMBT+MACXpNUuB14vetmhVBupPvw91krql9Q/ODg4nvbMpoSkbklHJe0tqt0t6Q1Je9JtedFzG9Nw5suSbsqma7PylB36kj4OPAp8IyLeG23VErUYpX5uIWJrRDRFRNOCBQvKbc9sKj1M6X+Ffi8ilqTb4wBpqHMV8On0mgcl1VWsU7NxKiv0Jc2iEPg/iIifpPKRNGxDuj+a6oeAK4pevhB4c5S6WVWJiKeBY2WuvgL4YUScjIh/Bw5Q+M7KrCqVM3tHQBcwEBHfLXpqJzA0A2c18FhR/dY0i+da4N00/PMkcKOkuekL3BtTzWymuCPNSOsumoRQ1rAleOjSqkM5e/qfA74C/Nmw8cxNwA2S9gM3pMcAjwOvUtjj2Qb8NUBEHAO+BTybbt9MNbOZ4CHgk8AS4DBwX6qXNWwJHrq06nDBWCtERB+lP9gA15dYP4DbR/hZ3UD3eBo0qwYRcWRoWdI24GfpoYctbUbxEblmZRj6/ir5EjA0s2cnsErSHElXUTg+5VeV7s+sXGPu6ZvljaQe4DpgvqRDFA4qvE7SEgpDNweBrwFExEuSfgzso3Ag4+0R8WEWfZuVw6FvNkxEtJYod42yfifQOX0dmU0dD++YmeWIQ9/MLEcc+mZmOeLQNzPLEYe+2QxRv2EX9Rt2Zd2GzXAOfTOzHHHom5nliEPfzCxHHPpmZjni0DczyxGHvplZjjj0zcxyxKFvZpYjDn0zsxxx6JuZ5YhD38wsRxz6ZmY54tA3M8sRXy7RrEYUn4Hz4KabM+zEqplD32yGcbjbZHh4x8wsRxz6ZsNI6pZ0VNLeotrfSfqtpBcl/VTSxaleL+kDSXvSbUt2nZuNzaFvdr6HgaXDaruBxoj4Y+DfgI1Fz70SEUvSbV2FejSbEI/pmw0TEU9Lqh9W++eih88Af1HJnibK4/82nPf0zcZvDfBE0eOrJL0g6V8kfX6kF0laK6lfUv/g4OD0d2lWgkPfbBwkdQBngB+k0mHgyoj4DHAnsEPS75V6bURsjYimiGhasGBBZRo2G8ahb1YmSauBPwf+W0QEQEScjIi30/JzwCvAH2bXpdnoHPpmZZC0FFgPfDEiThTVF0iqS8ufABYBr2bTpdnY/EWu2TCSeoDrgPmSDgF3UZitMwfYLQngmTRT5wvANyWdAT4E1kXEsUwaNyuDQ99smIhoLVHuGmHdR4FHp7cjs6kz5vDOCAeq3C3pjaIDUpYXPbdR0gFJL0u6qai+NNUOSNow9ZtiZmZjKWdM/2HOP1AF4HtFB6Q8DiBpMbAK+HR6zYOS6tKY5z8Cy4DFQGta18zMKmjM4Z1SB6qMYgXww4g4Cfy7pAPANem5AxHxKoCkH6Z19427YzMzm7DJzN65I52HpFvS3FS7HHi9aJ1DqTZS/Tw+gMXMbPpMNPQfAj4JLKFwcMp9qa4S68Yo9fOLPoDFbFrUb9h19mb5NaHZOxFxZGhZ0jbgZ+nhIeCKolUXAm+m5ZHqZjbFHOw2kgnt6Uu6rOjhl4ChmT07gVWS5ki6isKBKr8CngUWSbpK0mwKX/bunHjbZmY2EWPu6Y9woMp1kpZQGKI5CHwNICJekvRjCl/QngFuj4gP08+5A3gSqAO6I+KlKd8aMzMbVTmzd8o+UCWt3wl0lqg/Djw+ru7MzGxK+dw7ZmY54tA3M8sRh76ZWY449M3McsShb2aWIw59M7McceibmeWIQ9/MLEcc+jWip6eHxsZG6urqaGxspKenJ+uWzKwK+XKJNaCnp4eOjg66urpobm6mr6+PtrY2AFpbSx1QbWZ55T39GtDZ2cktt9xCe3s7F154Ie3t7dxyyy10dp53Ngwzyznv6deAffv28f7779Pd3X12T3/NmjW89tprWbc2I0nqBv4cOBoRjak2D/gRUE/hJINfjoh3JAn4B2A5cAL4q4h4Pou+zcrhPf0aMHv2bNrb22lpaWHWrFm0tLTQ3t7O7Nmzs25tpnqY868LvQF4KiIWAU+lx1C47vOidFtL4QJDZlXLoV8DTp06xQMPPEBvby+nT5+mt7eXBx54gFOnTmXd2owUEU8Dx4aVVwDb0/J2YGVR/ZEoeAa4eNj1JsyqikO/BixevLjkmP7ixYuzbq2WXBoRhwHS/SWp7us/24zi0K8BHR0d7Nixg/vvv5/f/e533H///ezYsYOOjo6sW8sDX//ZZhR/kVsDWltb+cUvfsGyZcs4efIkc+bM4bbbbvN0zal1RNJlEXE4Dd8cTfXRrgttVnW8p18Denp62LVrF0888QSnTp3iiSeeYNeuXT5Aa2rtBFan5dXAY0X1W1VwLfDu0DCQWTVy6NeAzs5Ourq6zpm909XV5Xn6E5SuC/2vwKckHZLUBmwCbpC0H7ghPYbCJUBfBQ4A24C/zqBls7J5eKcGDAwM0NzcfE6tubmZgYGBjDqa2Ua4LjTA9SXWDeD26e1oZPUbdmX11jZDeU+/BjQ0NNDX13dOra+vj4aGhow6MrNq5dCvAR0dHbS1tZ0zT7+trc2zd8zsPB7eqQGevWNm5fKefg3w7B0zK5dDvwb4LJtmVi4P79SAffv2cfToUT72sY8REbz//vts3bqVt956K+vWzKzKeE+/BtTV1XHixAkACmf6hRMnTlBXV5dlW2ZWhRz6NeDMmTN88MEHtLe3c/z4cdrb2/nggw84c+ZM1q2ZWZVx6NeIVatW0d3dzUUXXUR3dzerVq3KuiUzq0Ie068RP//5z9mxY8fZK2fdcsstWbdkVaz4SN6Dm27OsBOrNId+DVi4cCFvv/02N910E6dPn2bWrFlccMEFLFy4MOvWzKzKOPRrwMqVK3nwwQdZsGABR44cYd68eQwODrJy5cqxX2xVx+fTsenkMf0a0Nvby8aNG5k/fz4f+chHmD9/Phs3bqS3tzfr1sysynhPvwYMDAzwwgsv8O1vf/ts7fTp03znO9/JsCszq0Zj7ulL6pZ0VNLeoto8Sbsl7U/3c1Ndkr4v6YCkFyVdXfSa1Wn9/ZJWl3ovm5iGhgbuueceGhsbqauro7GxkXvuucdn2TSz85QzvPMwsHRYbQPwVEQsAp5KjwGWAYvSbS3wEBT+SAB3AZ8FrgHuGvpDYZPX0tLC5s2bWbNmDcePH2fNmjVs3ryZlpaWrFszsyozZuhHxNPAsWHlFcD2tLwdWFlUfyQKngEuTtcTvQnYHRHHIuIdYDfn/yGxCert7WX9+vXnzNNfv369x/TN7DwTHdO/dOg6oOlC0Zek+uXA60XrHUq1kernkbSWwr8SuPLKKyfYXr54TN/MyjXVs3dUohaj1M8vRmyNiKaIaFqwYMGUNlerPKZvZuWaaOgfScM2pPujqX4IuKJovYXAm6PUbQp4TN/MyjXR0N8JDM3AWQ08VlS/Nc3iuRZ4Nw0DPQncKGlu+gL3xlSzKeAx/cqQ9ClJe4pu70n6hqS7Jb1RVF+eda9mIxlzTF9SD3AdMF/SIQqzcDYBP5bUBvwH8Jdp9ceB5cAB4ATwVYCIOCbpW8Czab1vRsTwL4dtgjymXxkR8TKwBEBSHfAG8FMKn/PvRcTfZ9ieWVnGDP2IGOlCq9eXWDeA20f4Od1A97i6s7I0NDTQ19d3znBOX1+fx/Sn1/XAKxHx2tA1DMxmAp+GoQZ0dHTQ1tZGb28vp0+fpre3l7a2Njo6OrJurZatAoovQnxHOiCx28egWDXzaRhqQGtr4R9j7e3tDAwM0NDQQGdn59m6TS1Js4EvAhtT6SHgWxRmpH0LuA9YU+J1no5smXPo14jW1laHfOUsA56PiCMAQ/cAkrYBPyv1oojYCmwFaGpqKjll2Wy6eXjHbPxaKRraGZq+nHwJ2HveK8yqhPf0zcZB0keBG4CvFZX/VtISCsM7B4c9Z1ZVvKdfI9rb27nwwguRxIUXXkh7e3vWLdWkiDgREb8fEe8W1b4SEX8UEX8cEV8cOkWJWTVy6NeA9vZ2tmzZwr333sv777/Pvffey5YtWxz8Vpb6Dbt8ta4ccejXgG3btrF582buvPNOPvrRj3LnnXeyefNmtm3blnVrZlZlHPo14OTJk6xbt+6c2rp16zh58mRGHZlZtXLo14A5c+awZcuWc2pbtmxhzpw5GXVkZtXKs3dqwG233cb69euBwh7+li1bWL9+/Xl7/2ZmKpwupzo1NTVFf39/1m1UpfGe76Waf89ZkvRcRDRV+n1H+2xn9aXqwU03Z/K+NvVG+1x7T3+GGinEJTngzWxEHtM3M8sRh76ZWY449M3McsShb2aWIw59M7McceibmeWIQ9/MLEcc+mZmOeLQNzPLEYe+mVmOOPTNzHLEoW9mliMOfTOzHHHom5nliE+tbDYOkg4Cx4EPgTMR0SRpHvAjoB44CHw5It7Jqkez0XhP32z8WiJiSdFFKjYAT0XEIuCp9NisKjn0zSZvBbA9LW8HVmbYi9moHPpm4xPAP0t6TtLaVLs0Ig4DpPtLSr1Q0lpJ/ZL6BwcHK9Su2bk8pm82Pp+LiDclXQLslvTbcl8YEVuBrVC4Ru50NWg2Gu/pm41DRLyZ7o8CPwWuAY5Iugwg3R/NrkOz0U0q9CUdlPQbSXsk9afaPEm7Je1P93NTXZK+L+mApBclXT0VG2BWKZI+JumioWXgRmAvsBNYnVZbDTyWTYdmY5uKPf1yZzIsAxal21rgoSl4b7NKuhTok/Rr4FfAroj4J2ATcIOk/cAN6bFZVZqOMf0VwHVpeTvwf4H1qf5IRATwjKSLJV029AWYWbWLiFeBPylRfxu4vvIdmY3fZEN/aCZDAP8zfVF1zkyG9IUXwOXA60WvPZRq54R+mhGxFuDKK6+cZHtmVq76DbvOLh/cdHOGndh0mmzoj2cmg0rUzpvB4BkOZmbTZ1Jj+uOcyXAIuKLo5QuBNyfz/mZmNj4TDv0JzGTYCdyaZvFcC7zr8Xwzs8qazPDOpcBPJQ39nB0R8U+SngV+LKkN+A/gL9P6jwPLgQPACeCrk3hvMzObgAmH/nhnMqRZO7dP9P3MzGzyfESumVmOOPTNzHLEoW9mliMOfTOzHHHoV7l58+YhqewbUPa68+bNy3jrzKzSfD79KvfOO+9QmPg09Yb+SJhZfnhP38wsRxz6ZmY54tA3M8sRh76ZWY449M3McsShb2aWIw59M7McceibmeWIQ9+sTJKukNQraUDSS5K+nup3S3pD0p50W551r2Yj8RG5VS7u+j24+z9N38+28TgD/E1EPJ+uGvecpN3pue9FxN9n2JtZWRz6VU73vDetp2GIu6flR9ekdHnPw2n5uKQB4PJsuzIbHw/vmE2ApHrgM8AvU+kOSS9K6pY0d4TXrJXUL6l/cHCwQp2ancuhbzZOkj4OPAp8IyLeAx4CPgksofAvgftKvS4itkZEU0Q0LViwoGL9mhVz6JuNg6RZFAL/BxHxE4CIOBIRH0bE/wO2Addk2aPZaBz6ZmVS4VzUXcBARHy3qH5Z0WpfAvZWujezcvmLXLPyfQ74CvAbSXtS7X8ArZKWAAEcBL6WTXtTp37DrrPLBzfdnGEnNtUc+mZliog+oNSVZx6vdC9mE+XhHTOzHHHom5nliEPfzCxHHPpmZjniL3JngMJMwak3d27JA0fNzuGZPLXFoV/lxnveHUnTdq4eM5v5PLxjZpYjDn0zsxxx6JuZ5YjH9M1s3Pzl7szlPX0zsxyp+J6+pKXAPwB1wP+KiE2V7sHMJqZ4D99mporu6UuqA/4RWAYspnB2wsWV7MHMLM8qPbxzDXAgIl6NiFPAD4EVFe7BzCy3Kh36lwOvFz0+xLALS/s6ouWRVPI20nNmZlD50C+VPuccPurriJYnIsZ1MzODyof+IeCKoscLgTcr3IOZWW5VOvSfBRZJukrSbGAVsLPCPZiZ5VZFp2xGxBlJdwBPUpiy2R0RL1WyBzOzPKv4PP2IeBxfU9SsZvjo3JnFp2EwsynjPwDVz6FvNgV8pPnUKXXUr/+ATB2fe8dsknykuc0k3tM3m7yzR5oDSBo60nxfpl1lrNRQz0TP3TPSsFEe/1UwtM0T3U5V84E7kgaB17LuY4aZD7yVdRMzyB9ExKSOApT0F8DSiPjv6fFXgM9GxB3D1lsLrE0PPwW8PMKPzNPvME/bCpXb3hE/11W9pz/Z/xnzSFJ/RDRl3UfOjHmkORSONge2jvnDcvQ7zNO2QnVsr8f0zSbPR5rbjOHQN5s8H2luM0ZVD+/YhIw5fGBTaxqONM/T7zBP2wpVsL1V/UWumZlNLQ/vmJnliEPfzCxHHPo1QFK3pKOS9mbdi02cpKWSXpZ0QNKGrPuZqFKfR0nzJO2WtD/dz011Sfp+2uYXJV1d9JrVaf39klZnsS1jkXSFpF5JA5JekvT1VK/e7R3vFZh8q74b8AXgamBv1r34NuHfYR3wCvAJYDbwa2Bx1n1NcFvO+zwCfwtsSMsbgM1peTnwBIVjHa4Ffpnq84BX0/3ctDw3620rsa2XAVen5YuAf6NwKo6q3V7v6deAiHgaOJZ1HzYpZ0/lEBGngKFTOcw4I3weVwDb0/J2YGVR/ZEoeAa4WNJlwE3A7og4FhHvALuBpdPf/fhExOGIeD4tHwcGKFz3u2q316FvVh0uB14venwo1WrFpRFxGApBCVyS6iNt94z77yGpHvgM8EuqeHsd+mbVoaxTOdSgkbZ7Rv33kPRx4FHgGxHx3mirlqhVdHsd+mbVodZP5XAkDWOQ7o+m+kjbPWP+e0iaRSHwfxARP0nlqt1eh75Zdaj1UznsBIZmpKwGHiuq35pmtVwLvJuGQ54EbpQ0N818uTHVqookAV3AQER8t+ip6t3erL/99m3yN6AHOAycprDH0JZ1T75N6Pe4nMLsj1eAjqz7mcR2nPd5BH4feArYn+7npXVF4QI0rwC/AZqKfs4a4EC6fTXr7RphW5spDMO8COxJt+XVvL0+DYOZWY54eMfMLEcc+mZmOeLQNzPLEYe+mVmOOPTNzHLEoW9mliMOfTOzHPn/9Gsy4cbxH3oAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"#판타지 사회\n",
"FS_X_train = []\n",
"FS_Y_train = [] \n",
"\n",
"FStrain_data_size = 1400//2\n",
"\n",
"for i in range(FStrain_data_size):\n",
" FS_X_train.append(SC_encoded[i])\n",
" FS_Y_train.append([1,0])\n",
" FS_X_train.append(FN_encoded[i])\n",
" FS_Y_train.append([0,1])\n",
" \n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in FS_X_train]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 1336\n",
"줄거리 평균 길이 : 193.3892857142857\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD8CAYAAACb4nSYAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAeqklEQVR4nO3df5DU9Z3n8eeLAeHCGsSInILcmCzrDUttDDdlvHOSysQV1KSCuax7gpVwzpwEI5x7eiXEqTpjspPIbRJ3w22wyM6cWhVGPY2RUjZImclaVEVX/BGDjhFiCI6wggsqgYAMvO+P/gzpGXpmeqZnpnu6X4+qrv5+P9/vt/vdOrz705/v54ciAjMzqwzjih2AmZmNHid9M7MK4qRvZlZBnPTNzCqIk76ZWQVx0jczqyADJn1JrZL2StqW49j/lBSSzkr7kvQ9STskvSRpXta5SyRtT48lw/sxzMwsH/nU9O8BLu9dKOk84DJgV1bxFcDs9FgKrE3nngncDnwcuAi4XdLUQgI3M7PBGzDpR8RTwP4ch+4CbgWyR3ctBO6LjKeBMySdAywANkfE/og4AGwmxxeJmZmNrPFDuUjS54A3I+IXkrIPzQDeyNrvTGV9lffrrLPOiurq6qGEaJaX55577u2ImDba7+u/bRtJ/f1dDzrpS/oA0ATMz3U4R1n0U57r9ZeSaRpi1qxZbN26dbAhmuVN0m+L8b7V1dX+27YR09/f9VB673wEOB/4haSdwEzgeUn/lkwN/rysc2cCu/spP0VErIuI2oionTZt1CtgZmZlbdBJPyJ+GRFnR0R1RFSTSejzIuJfgA3Al1IvnouBdyNiD7AJmC9parqBOz+VmZnZKMqny2Yb8HPgAkmdkhr7OX0j8DqwA/gB8BWAiNgPfAN4Nj2+nsrMzGwUDdimHxGLBjhenbUdwI19nNcKtA4yPjMzG0YekWtmVkGc9MtEW1sbc+fOpaqqirlz59LW1lbskMYsSedJapfUIellSTel8jMlbU6jyjd3DzDsbyS6Walx0i8DbW1tNDU1sWbNGo4cOcKaNWtoampy4h+6LuCWiKgBLgZulDQHWAU8GRGzgSfTPvQxEt2sFDnpl4Hm5mZaWlqor69nwoQJ1NfX09LSQnNzc7FDG5MiYk9EPJ+2DwIdZAYTLgTuTafdC1yVtvsaiW5Wcpz0y0BHRwd1dXU9yurq6ujo6ChSROVDUjXwMeAZYHrqgkx6PjudNqQR52bFMKRpGKy01NTUsGXLFurr60+WbdmyhZqamiJGNfZJ+iPgYeCvIuK9XlOO9Dg1R9kpI857jzbPR/Wqx09u77zzM3ldY9Yf1/TLQFNTE42NjbS3t3Ps2DHa29tpbGykqamp2KGNWZImkEn4P4yIH6Xit7qbbdLz3lSe14hzjza3UuCafhlYtCgzlGLFihV0dHRQU1NDc3PzyXIbHGWq9C1AR0R8N+vQBmAJcGd6fjSrfLmk+8lMH949Et2s5Djpl4lFixY5yQ+fS4AvAr+U9GIqu41Msn8wjUrfBVydjm0EriQzEv0wcN3ohmuWPyd9s14iYgu52+kBLs1xfp8j0c1Kjdv0zcwqiJO+mVkFcdI3M6sgTvpmZhXESd/MrII46ZuZVRAnfTOzCuKkb2ZWQZz0zcwqiJO+mVkFcdI3M6sgAyZ9Sa2S9krallX2N5JeTeuBPiLpjKxjX01rhf5K0oKs8stT2Q5Jq3q/j5mZjbx8avr3AJf3KtsMzI2IPwNeA74KkNYRvQb403TN9yVVSaoC/p7MWqJzgEXpXDMzG0UDJv2IeArY36vsiYjoSrtPk1k0AjJrhd4fEUcj4jdkppq9KD12RMTrEfE+cH8618zMRtFwtOk3AP+YtvtaK9RriJqZlYCCkr6kJqAL+GF3UY7Top/yXK+5VNJWSVv37dtXSHhmZtbLkJO+pCXAZ4Fr0yIS0PdaoXmtIQpeR9TMbCQNKelLuhxYCXwuIg5nHdoAXCNpoqTzgdnAPwPPArMlnS/pNDI3ezcUFrrZyOijx9oDkl5Mj53dyyhKqpb0+6xjdxcvcrOBDbhcoqQ24FPAWZI6gdvJ9NaZCGzOrCHN0xGxLCJelvQg8AqZZp8bI+J4ep3lwCagCmiNiJdH4POYDYd7gP8D3NddEBH/pXtb0neAd7PO/3VEXDhq0ZkVYMCkHxG5Vttu6ef8ZqA5R/lGMgtIm5W0iHhKUnWuY8rUcv4S+PRoxmQ2XDwi12xwPgG8FRHbs8rOl/SCpH+S9IliBWaWDyf9MtHW1sbcuXOpqqpi7ty5tLW1FTukcrUIyP6PuweYFREfA24G1kv6YK4L3TPNSsGAzTtW+tra2mhqaqKlpYW6ujq2bNlCY2MjAIsW5Wqds6GQNB74z8B/6C6LiKPA0bT9nKRfA38CbO19fUSsA9YB1NbW5uyybDbSXNMvA83NzbS0tFBfX8+ECROor6+npaWF5uZTbq1YYf4ceDUiOrsLJE1L04wg6cNkeqy9XqT4zAbkpF8GOjo6qKur61FWV1dHR0dHkSIa21KPtZ8DF0jqlNSYDl1Dz6YdgE8CL0n6BfAQsCwi9mNWoty8UwZqamrYsmUL9fX1J8u2bNlCTU1NEaMau/rosUZE/NccZQ8DD490TGbDxTX9MtDU1ERjYyPt7e0cO3aM9vZ2GhsbaWpqKnZoZlZiXNMvA903a1esWEFHRwc1NTU0Nzf7Jq6ZncJJv0wsWrTISd7MBuTmHTOzCuKkb2ZWQZz0zcwqiJO+mVkFcdIvE557x8zy4d47ZcBz75hZvlzTLwPNzc0sXryYFStWMGnSJFasWMHixYs9906ZqV71ONWrHi92GDbGuaZfBl555RX27t3L5MmTiQgOHTrEunXrePvtt4sdmpmVGNf0y0BVVRXHjx+ntbWVo0eP0trayvHjx6mqqip2aGZWYlzTLwNdXV10dXXR0NDArl27mDVr1skyM7NsrumXibRAPRHRY9/MLJuTfhkYP348VVVVPZp3qqqqGD/eP+TMrKcBk76kVkl7JW3LKjtT0mZJ29Pz1FQuSd+TtEPSS5LmZV2zJJ2/XdKSkfk4len48eOMGzeOhoYGJk2aRENDA+PGjeP48ePFDs3MSkw+Nf17gMt7la0CnoyI2cCTaR/gCjLLxc0GlgJrIfMlAdwOfBy4CLi9+4vCCjdnzhzq6urYs2cPJ06cYM+ePdTV1TFnzpxih2ZmJWbApB8RTwG9l39bCNybtu8Frsoqvy8yngbOkHQOsADYHBH7I+IAsJlTv0hsiOrr63nsscf45je/yaFDh/jmN7/JY4891mMlLctfH79uvybpTUkvpseVWce+mn7d/krSguJEbZafobbpT4+IPQDp+exUPgN4I+u8zlTWV/kpJC2VtFXS1n379g0xvMrS3t7OypUraW1t5fTTT6e1tZWVK1fS3t5e7NDGqnvIXSm5KyIuTI+NAJLmkFk790/TNd/vXijdrBQN952+XF1Gop/yUwsj1gHrAGpra3OeYz11dHTwwgsv8Nd//dcny44dO8a3vvWtIkY1dkXEU5Kq8zx9IXB/RBwFfiNpB5kmzJ+PUHhmBRlqTf+t1GxDet6byjuB87LOmwns7qfchkFNTQ133HFHjwnX7rjjDi+MPvyWpw4KrVn3pPwr1saUoSb9DUB3D5wlwKNZ5V9KvXguBt5NzT+bgPmSpqZ/LPNTmQ2D+vp6Vq9eTUNDAwcPHqShoYHVq1e7TX94rQU+AlwI7AG+k8oH9Ss2ImojonbatGkjE6XZAPLpstlG5qfqBZI6JTUCdwKXSdoOXJb2ATYCrwM7gB8AXwGIiP3AN4Bn0+PrqcyGgdv0R15EvBURxyPiBJm/7YvSIf+KtTFlwDb9iOhrbt5Lc5wbwI19vE4r0Dqo6CwvHR0drFy5skfZBRdcQEdHR5EiKj+SzunuvAB8Huju2bMBWC/pu8C5ZLor/3MRQjTLi4dsloFzzz2XW2+9lfXr15+cT3/x4sWce+65xQ5tTEq/bj8FnCWpk8wYk09JupBM081O4MsAEfGypAeBV4Au4MaI8Kg4K1lO+mWi91w7nntn6Pr4ddvSz/nNgBcvsDHBc++Ugd27d7N69eoei6isXr2a3bvdtGxmPbmmXwZqamqYOXMm27adHEBKe3u7u2ya2Slc0y8DTU1NNDY20t7ezrFjx2hvb6exsZGmpqZih2ZmJcY1/TLQvfj5ihUr6OjooKamhubmZi+KbmancNIvE4sWLXKSLxNe/NxGkpt3ykRbW1uPaRja2tqKHZKZlSDX9MtAW1sbTU1NtLS0nOyn39jYCODav5n14Jp+GWhubmbx4sU9umwuXryY5mZ3HTeznlzTLwOvvPIKhw8fPqWmv3PnzmKHZmYlxjX9MnDaaaexfPly6uvrmTBhAvX19SxfvpzTTjut2KGZWYlx0i8D77//PmvWrOnRT3/NmjW8//77xQ7NzEqMm3fKwJw5c7jqqqt69NO/9tpr+fGPf1zs0MysxLimXwaamppYv349a9as4ciRI6xZs4b169d7RK6ZncI1/TLgEblmli8n/TLhEblmlg8375QJj8g1s3w46ZeBtrY2brrpJg4dOkREcOjQIW666SYnfjM7hZN+Gbj11lupqqqitbWVo0eP0traSlVVFbfeemuxQxuTJLVK2itpW1bZ30h6VdJLkh6RdEYqr5b0e0kvpsfdIx1f9arHTz7MBstJvwx0dnZy3XXX9ZiG4brrrqOzs7PYoY1V9wCX9yrbDMyNiD8DXgO+mnXs1xFxYXosG6UYzYakoKQv6X9IelnSNkltkiZJOl/SM5K2S3pA0mnp3Ilpf0c6Xj0cH8Ay1q5d26N5Z+3atcUOacyKiKeA/b3KnoiIrrT7NDBz1AMzGwZDTvqSZgD/HaiNiLlAFXANsBq4KyJmAweAxnRJI3AgIv4YuCudZ8OgqqqK9957jxUrVvC73/2OFStW8N5771FVVVXs0MpVA/CPWfvnS3pB0j9J+kRfF0laKmmrpK379u0b+SjNcii0eWc88G8kjQc+AOwBPg08lI7fC1yVthemfdLxSyWpwPc34Pjx44wbN45bbrmFyZMnc8sttzBu3DiOHz9e7NDKjqQmoAv4YSraA8yKiI8BNwPrJX0w17URsS4iaiOidtq0aaMTsFkvQ076EfEm8G1gF5k//HeB54B3sn4GdwIz0vYM4I10bVc6/0O9X9e1oaHp6upi+vTpSGL69Ol0dXUNfJENiqQlwGeBayMiACLiaET8a9p+Dvg18CfFi9Ksf4U070wlU3s/HzgXmAxckePU6L6kn2N/KHBtaEhOP/102traOHr0KG1tbZx++unFDqmsSLocWAl8LiIOZ5VPk1SVtj8MzAZeL06UZgMrZETunwO/iYh9AJJ+BPwn4AxJ41NtfiawO53fCZwHdKbmoCn0ullmQzdx4kQaGhrYtWsXs2bNYuLEiRw8eLDYYY1JktqATwFnSeoEbifTW2cisDm1Sj6deup8Evi6pC7gOLAsIvx3bSWrkKS/C7hY0geA3wOXAluBduAvgPuBJcCj6fwNaf/n6fhPu38iW2EmTpzIggULePHFFwGYPHkyl1xyCQ899NAAV1ouEZFrPouWPs59GHh4ZCMyGz6FtOk/Q+aG7PPAL9NrrSPzE/hmSTvItNl3/2NpAT6Uym8GVhUQt2W5/vrreeCBB2hoaODgwYM0NDTwwAMPcP311xc7NDMrMQVNuBYRt5P56ZvtdeCiHOceAa4u5P0stzVr1gBw2223ccsttzBx4kSWLVt2stzMrJtH5JaJ11577eRKWe+//z6vvfZakSMys1LkpF8GFixYwBNPPMGyZct45513WLZsGU888QQLFiwodmhmVmI8n34Z2Lx5MzfccAPf//73AU4+3333iM/9ZWZjjGv6ZSAimDdvXo/59OfNm4c7R5lZb67pl4mbb76ZRx99lLq6OrZs2cLChQuLHZKZlSAn/TIwefJkDh48yBe+8AUOHDjA1KlTOXjwIJMnTy52aGZWYty8UwYOHz6MJA4cOADAgQMHkMThw4cHuNLMKo2TfpmICMaNy/zvHDdunNvzzSwnJ/0y0J3gp0yZ0uPZid/MenPSLxMTJ05kypQpjBs3jilTpjBx4sRih2RmJcg3csvEsWPHePPNNzlx4gRvvvmmF1Axs5yc9MvEiRMnOHHiBJD5AjAzy8XNO2ZmFcRJv4xk994xM8vF2aFMTJo06WTzzokTJ5g0aVKRIzKzUuSkXyaOHDlCWsYPSRw5cqTIEY1dklol7ZW0LavsTEmbJW1Pz1NTuSR9T9IOSS9Jmle8yM0G5qRfRrr75bt/fsHuAS7vVbYKeDIiZgNP8oeV364gsxj6bGApsHaUYjQbEvfeMeslIp6SVN2reCGZxdIB7gV+RmZp0IXAfWm956clnSHpnIjYMzrR/kH1qsdPbu+88zOj/fY2Rjjpl4lx48adbNPPtW8Fm96dyCNij6SzU/kM4I2s8zpT2SlJX9JSMr8GmDVr1rAElZ3ozfLh5p0y0TvBO+GPGuUoy9m+FhHrIqI2ImqnTZs2wmGZ5VZQ0k8/ZR+S9KqkDkn/0Te8rEy9JekcgPS8N5V3AudlnTcT2D3KsZnlrdCa/t8BP4mIfw98FOjAN7yKoqqqigkTJgAwYcIEqqqqihxR2dkALEnbS4BHs8q/lCo1FwPvFqM93yxfQ076kj4IfBJoAYiI9yPiHTI3tu5Np90LXJW2T97wioingTO6a05WuKlTpzJjxgwkMWPGDKZOnVrskMYsSW3Az4ELJHVKagTuBC6TtB24LO0DbAReB3YAPwC+UoSQzfJWyI3cDwP7gP8r6aPAc8BNDMMNLxtYd5/8bm+//TZvv/02ADt37jzlPHfjzF9ELOrj0KU5zg3gxpGNyGz4FNK8Mx6YB6yNiI8Bh/hDU04ued3wkrRU0lZJW/ft21dAeOUtIk4+5s+fD8ANN9zQ43n+/PknzzEzg8KSfifQGRHPpP2HyHwJFHTDyz0cBm/Tpk3Mnz+fu+++G4C7776b+fPns2nTpiJHZmalZshJPyL+BXhD0gWp6FLgFXzDqyg2bdrUY+4dJ3wzy6XQwVkrgB9KOo3MzazryHyRPJhufu0Crk7nbgSuJHPD63A618zMRlFBST8iXgRqcxzyDS8zsxLkEblmZhXESd/MrII46ZuZVRAnfTOzCuKkb2ZWQZz0zcwqiJO+mVkFcdI3M6sgTvpmZhXESd/MrII46ZuZVRAnfTOzClLoLJtmFSNNI/5AVtGHgf8FnAFcT2YlOYDbImLjKIdnlhcnfbM8RcSvgAsBJFUBbwKPkJkm/K6I+HYRwzPLi5t3zIbmUuDXEfHbYgdiNhhO+mZDcw3QlrW/XNJLklolTc11gdd/tlLgpG82SGmluM8B/y8VrQU+QqbpZw/wnVzXef1nKwVO+maDdwXwfES8BRARb0XE8Yg4AfwAuKio0Zn1w0nfbPAWkdW0I+mcrGOfB7aNekRmeXLvHbNBkPQB4DLgy1nF/1vShUAAO3sdMyspTvpmgxARh4EP9Sr7YpHCMRu0gpt3JFVJekHSY2n/fEnPSNou6YF00wtJE9P+jnS8utD3NjOzwRmONv2bgI6s/dVkBqrMBg4Ajam8ETgQEX8M3JXOMzOzUVRQ0pc0E/gM8A9pX8CngYfSKfcCV6XthWmfdPzSdL6ZmY2SQmv6fwvcCpxI+x8C3omIrrTfCcxI2zOANwDS8Xfp1TZqZmYja8hJX9Jngb0R8Vx2cY5TI49j2a/rUYtmZiOkkJr+JcDnJO0E7ifTrPO3wBmSunsFzQR2p+1O4DyAdHwKsL/3i3rUopnZyBly0o+Ir0bEzIioJjMPyU8j4lqgHfiLdNoS4NG0vSHtk47/NCJOqembmdnIGYkRuSuBmyXtINNm35LKW4APpfKbgVUj8N5mZtaPYRmcFRE/A36Wtl8nx9wjEXEEuHo43s/MzIbGc++YmVUQT8NgViGqVz1+cnvnnZ8pYiRWTK7pm5lVECd9M7MK4qRvVoaqVz3eoznHrJuTvplZBXHSNzOrIE76ZmYVxF02zQYhzTV1EDgOdEVEraQzgQeAajLLJf5lRBwoVoxm/XFN32zw6iPiwoioTfurgCfTwkFP4ilGrIQ56ZsVLnuBoOyFg8xKjpO+2eAE8ISk5yQtTWXTI2IPQHo+O9eFXivCSoHb9EvcmWeeyYEDg2sezncVyqlTp7J//ylLGlj/LomI3ZLOBjZLejXfCyNiHbAOoLa2tqjTiueaksHTNFQGJ/0Sd+DAAUZq2QEvUTx4EbE7Pe+V9AiZGWXfknROROyRdA6wt6hBmvXDzTtmeZI0WdLp3dvAfGAbPRcIyl44yKzkuKZvlr/pwCPpF9J4YH1E/ETSs8CDkhqBXYyxdSM8XUNlcdI3y1NaIOijOcr/Fbh09CMyGzw375iZVRAnfTOzCuKkb2ZWQdymb1bGfJPWehtyTV/SeZLaJXVIelnSTan8TEmbJW1Pz1NTuSR9T9IOSS9JmjdcH8LMRocXZxn7Cmne6QJuiYga4GLgRklz6HvyqSuA2emxFFhbwHubmdkQDDnpR8SeiHg+bR8EOoAZ9D351ELgvsh4GjgjjV40M7NRMiw3ciVVAx8DnqHvyadmAG9kXdaZynq/lielMjMbIQUnfUl/BDwM/FVEvNffqTnKTplUJiLWRURtRNROmzat0PDMzCxLQUlf0gQyCf+HEfGjVPxWd7NNr8mnOoHzsi6fCewu5P3NzGxwCum9I6AF6IiI72Yd6mvyqQ3Al1IvnouBd7ubgczMbHQU0k//EuCLwC8lvZjKbgPuJPfkUxuBK4EdwGHgugLe28zMhmDIST8itpC7nR5yTD4VmUnhbxzq+5lZcbhffnnxiFwzO4UTffly0jezgniZxbHFE66ZmVUQJ32zPPUz39TXJL0p6cX0uLLYsZr1xc07JS5u/yB8bcrIvbYNRvd8U8+ntXKfk7Q5HbsrIr5dxNjM8uKkX+J0x3tkOj6NwGtLxNdG5KXLUhpX0j3FyEFJ3fNNmY0Zbt4xG4Je800BLE9Thrd2TyduVoqc9M0GKcd8U2uBjwAXkvkl8J0+rvNkglZ0Tvpmg5BrvqmIeCsijkfECeAHwEW5rvVkglYK3KZvlqe+5puSdE7WPFKfB7YVI77R5MFbY5eTvln++ppvapGkC8lMFb4T+HJxwjMbmJO+WZ76mW9q42jHYjZUTvpmNmw8JUPp841cM7MK4qRvZlZB3LwzBmQ6jQy/qVM9hsis0jjpl7jBTsEgacSmbTCzsc/NO2ZmFcRJ38ysgjjpm5lVELfpm9mIyNVn3/34i2/Uk76ky4G/A6qAf4iIO0c7BjMrLf4yGD2j2rwjqQr4e+AKYA6ZOUvmjGYMZmaVbLRr+hcBOyLidQBJ9wMLgVdGOY4xr7+++7mOuRunjTWu/Y+M0b6ROwN4I2u/k17LzXmhifxExKAeZmYw+jX9XNXTHhkpItYB6wBqa2udrczKgOffLx2jXdPvBM7L2p8J7B7lGMzMKtZoJ/1ngdmSzpd0GnANsGGUYzAzq1ij2rwTEV2SlgObyHTZbI2Il0czBjMbe3xTd/iMej/9iNiIVxqyMuPxJ8NnoPZ/fwEUxiNyzQqUNf7kMjL3rZ6VtCEi3BW5H8Nxc7f7NbKTf1+vO5xfEGP5i8dz75gV7uT4k4h4H+gef2JWclzTNytcrvEnHy9SLJaHXL8G8vm1MNBr5ar1D/evgly/bgZDpTxwR9I+4LfFjmOMOQt4u9hBjCH/LiKmFfICkq4GFkTEf0v7XwQuiogVvc5bCixNuxcAv8rxcuX2/8+fpzj6/Lsu6Zp+of8YK5GkrRFRW+w4Kkxe40+yBx72pdz+//nzlB636ZsVzuNPbMwo6Zq+2Vjg8Sc2ljjpl59+mw9sZAzj+JNy+//nz1NiSvpGrpmZDS+36ZuZVRAn/TIgqVXSXknbih2LDZ2kyyX9StIOSauKHU++JO2U9EtJL0ramsrOlLRZ0vb0PDWVS9L30md8SdK84kaf+9/PUOKXtCSdv13SkmJ8lnw46ZeHe4DLix2EDV0ZLCVaHxEXZnVnXAU8GRGzgSfTPmQ+3+z0WAqsHfVIT3UPp/77GVT8ks4EbiczKO8i4PbuL4pS46RfBiLiKWB/seOwgpTbVA4LgXvT9r3AVVnl90XG08AZks4pRoDd+vj3M9j4FwCbI2J/RBwANlOiFTEnfbPSMOBSoiUsgCckPZdGHQNMj4g9AOn57FQ+Vj7nYOMfK5/LXTbNSsSAS4mWsEsiYreks4HNkl7t59yx/Dmh7/jHzOdyTd+sNIzZpUQjYnd63gs8Qqap6q3uZpv0vDedPlY+52DjHyufy0nfrESMyakcJE2WdHr3NjAf2EYm9u4eLEuAR9P2BuBLqRfMxcC73c0oJWaw8W8C5kuamm7gzk9lJcfNO2VAUhvwKeAsSZ3A7RHRUtyobDDG8FQO04FHJEEmn6yPiJ9IehZ4UFIjsAu4Op2/EbgS2AEcBq4b/ZB7yvXvB7iTQcQfEfslfYPMlzfA1yOiJDtXeESumVkFcfOOmVkFcdI3M6sgTvpmZhXESd/MrII46ZuZVRAnfTOzCuKkb2ZWQZz0zcwqyP8HJuEojPlrMPEAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"FS_X_test = []\n",
"FS_Y_test = [] #0 : romance, 1 : thriller \n",
"\n",
"for i in range(FStrain_data_size):\n",
" FS_X_test.append(SC_encoded[FStrain_data_size+i])\n",
" FS_Y_test.append([1,0])\n",
" FS_X_test.append(FN_encoded[FStrain_data_size+i])\n",
" FS_Y_test.append([0,1])\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in FS_X_test]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"max_len = 230\n",
"\n",
"FS_X_train = pad_sequences(FS_X_train, maxlen=max_len)\n",
"FS_X_test = pad_sequences(FS_X_test, maxlen=max_len)\n"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 1400 samples, validate on 1400 samples\n",
"Epoch 1/3\n",
" 64/1400 [>.............................] - ETA: 27sWARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: \n",
"WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n"
]
},
{
"ename": "InvalidArgumentError",
"evalue": " indices[54,195] = 8646 is not in [0, 5002)\n\t [[node sequential_6/embedding_6/embedding_lookup (defined at <ipython-input-44-4c2962189ab4>:16) ]] [Op:__inference_distributed_function_13289]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_6/embedding_6/embedding_lookup:\n sequential_6/embedding_6/embedding_lookup/12144 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-44-4c2962189ab4>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 15\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 16\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFS_X_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mFS_Y_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mFS_X_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mFS_Y_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmc\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m 817\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 818\u001b[0m \u001b[0mworkers\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[0;32m 820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 821\u001b[0m def evaluate(self,\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m 340\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[0mtraining_context\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 342\u001b[1;33m total_epochs=epochs)\n\u001b[0m\u001b[0;32m 343\u001b[0m \u001b[0mcbks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 344\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[1;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[0;32m 126\u001b[0m step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[0;32m 127\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 128\u001b[1;33m \u001b[0mbatch_outs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 129\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[1;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[1;34m(input_fn)\u001b[0m\n\u001b[0;32m 96\u001b[0m \u001b[1;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 97\u001b[0m return nest.map_structure(_non_none_constant_value,\n\u001b[1;32m---> 98\u001b[1;33m distributed_function(input_fn))\n\u001b[0m\u001b[0;32m 99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 100\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 566\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 567\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 570\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 630\u001b[0m \u001b[1;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 631\u001b[0m \u001b[1;31m# stateless function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 632\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 633\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 634\u001b[0m \u001b[0mcanon_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2361\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2363\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2365\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs)\u001b[0m\n\u001b[0;32m 1609\u001b[0m if isinstance(t, (ops.Tensor,\n\u001b[0;32m 1610\u001b[0m resource_variable_ops.BaseResourceVariable))),\n\u001b[1;32m-> 1611\u001b[1;33m self.captured_inputs)\n\u001b[0m\u001b[0;32m 1612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1613\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1690\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1691\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1692\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1693\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1694\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 543\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 544\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"executor_type\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"config_proto\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 546\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 547\u001b[0m outputs = execute.execute_with_cancellation(\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 67\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_status_to_exception\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 68\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 69\u001b[0m keras_symbolic_tensors = [\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\six.py\u001b[0m in \u001b[0;36mraise_from\u001b[1;34m(value, from_value)\u001b[0m\n",
"\u001b[1;31mInvalidArgumentError\u001b[0m: indices[54,195] = 8646 is not in [0, 5002)\n\t [[node sequential_6/embedding_6/embedding_lookup (defined at <ipython-input-44-4c2962189ab4>:16) ]] [Op:__inference_distributed_function_13289]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_6/embedding_6/embedding_lookup:\n sequential_6/embedding_6/embedding_lookup/12144 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n"
]
}
],
"source": [
"model = Sequential()\n",
"model.add(Embedding(5002, 120))\n",
"model.add(LSTM(120))\n",
"model.add(Dense(2, activation='sigmoid'))\n",
"\n",
"es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
"mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
"\n",
"FS_X_train = np.array(FS_X_train)\n",
"FS_X_test = np.array(FS_X_test)\n",
"FS_Y_train = np.array(FS_Y_train)\n",
"FS_Y_test = np.array(FS_Y_test)\n",
"\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
"model.fit(FS_X_train, FS_Y_train, validation_data=(FS_X_test, FS_Y_test), epochs=3, batch_size=64, callbacks=[es, mc])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# 위에서 실행한 베스트 모델로 뮤지컬 예측값 확인하기"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## 드라마 VS 역사"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 1389\n",
"줄거리 평균 길이 : 258.6014285714286\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAYfUlEQVR4nO3df3Dc9Z3f8ecL+VftlmDHhlLZjnwXNyfH0+YYjaGNphPFxw9DGvNHaBGdxIfV8dyU6K5NModTzdTXpOr4uE5zxlzpuJGKuQlymPQCnsKd48EKGf0BRSQ5IqykVjkDOjgssMNlwMa/3v1jvzIreyXtL+13td/XY2Zn9/vZz+6+v2Pvez/6fD8/FBGYmVk2XJV2AGZmVjtO+mZmGeKkb2aWIU76ZmYZ4qRvZpYhC9IOYCYrV66MlpaWtMOwBvbiiy++HRGrav25/r9tc2mm/9d1nfRbWloYHh5OOwxrYJJeTeNz/X/b5tJM/6/dvWNmliFO+mZmGeKkb2aWIU76ZmYZMmvSl9Qv6YSkkQLPfU1SSFqZHEvSg5LGJL0k6Ya8utskHUtu26p7GmZmVoxiWvqPALddXihpDXAz8Fpe8RZgfXLbATyc1F0B7AJuBDYBuyQtryRwm2pgYICNGzfS1NTExo0bGRgYSDskM6tDsyb9iPgRcLLAU98Cfh/IX6ZzK/Bo5DwHXCPpeuBW4HBEnIyIU8BhCvyQWHkGBgbo6elh7969nDlzhr1799LT0+PEb2ZXKKtPX9Lngb+OiL+87Klm4PW84/GkbLryQu+9Q9KwpOGJiYlywsuc3t5e+vr66OjoYOHChXR0dNDX10dvb2/aoZlZnSk56UtaCvQA/6HQ0wXKYobyKwsj9kVEW0S0rVpV84mS89Lo6Cjt7e1Tytrb2xkdHU0pIjOrV+W09H8dWAf8paTjwGrgx5L+PrkW/Jq8uquBN2YotypobW1laGhoStnQ0BCtra0pRWTV0rLzqUs3s2ooOelHxM8i4tqIaImIFnIJ/YaI+BvgIPClZBTPTcC7EfEmcAi4RdLy5ALuLUmZVUFPTw9dXV0MDg5y7tw5BgcH6erqoqenJ+3QzKzOzLr2jqQB4DPASknjwK6I6Jum+tPA7cAY8D5wL0BEnJT0TeCFpN43IqLQxWErQ2dnJwDd3d2Mjo7S2tpKb2/vpXIzs0mzJv2ImDFzJK39yccB3DdNvX6gv8T4rEidnZ1O8lUiqR/4HHAiIjYmZX8E/HPgLPD/gHsj4pfJc18HuoALwO9GhP+KtbrlGblmV3qEK4cUHwY2RsQ/Av4v8HUASRuAu4FPJq/5b5KaaheqWWmc9M0uU2huSkT8ICLOJ4fPkRuMALm5KQci4oOI+CtyXZubahasWYmc9M1Ktx348+Sx56DYvOKkb1YCST3AeeA7k0UFqnkOitWtut45y6yeJAsFfg7YnAxaAM9BsXnGLX2zIki6Dbgf+HxEvJ/31EHgbkmLJa0jt9jg/0kjRrNiuKVvdplCc1PIjdZZDByWBPBcRPxORLws6XHgKLlun/si4kI6kZvNzknf7DLTzE2ZbkIiEdELeHU7mxfcvWNmliFO+mZmGeKkb2aWIU76ZmYZ4qRvZpYhTvpmZhnipG9mliFO+mZmGeKkb2aWIU76ZmYZ4qRvZpYhsyZ9Sf2STkgaySv7I0k/l/SSpO9Luibvua9LGpP0C0m35pXflpSNSdpZ/VMxM7PZFNPSf4QK9wtN9gz9E2ALsAHoTOqamVkNzZr0q7Rf6CZgLCJeiYizwIGkrpmZ1VA1llbeDnw3edxM7kdgUv5+oZfvI3pjoTeTtAPYAbB27doqhGc2v7TsfCrtEKyBVXQht4T9Qr2PqJlZHSi7pV/GfqHeR9TMLGVltfTL2C/0BWC9pHWSFpG72HuwstDNzKxUs7b0q7VfqKQvA4eAJqA/Il6eg/MxM7MZFDN6pzMiro+IhRGxOiL6IuLjEbEmIj6V3H4nr35vRPx6RHwiIv48r/zpiPiHyXPeT7TKBgYG2LhxI01NTWzcuJGBgYG0QzKzOuSN0RvAwMAAPT099PX10d7eztDQEF1dXQB0dhba49vMssrLMDSA3t5e+vr66OjoYOHChXR0dNDX10dvr/+gMrOpnPQbwOjoKO3t7VPK2tvbGR0dTSkiM6tXTvoNoLW1laGhoSllQ0NDtLa2phSRmdUrJ/0G0NPTQ1dXF4ODg5w7d47BwUG6urro6elJOzSropadT3m2rlXMF3IbwOTF2u7ubkZHR2ltbaW3t9cXccskqZ/cxMMTEbExKVtBbrmRFuA48C8i4pRyY5b3ALcD7wO/HRE/TiNus2K4pd8gOjs7GRkZ4cKFC4yMjDjhV+YRrlxZdifwTESsB55JjiG3cuz65LYDeLhGMZqVxUnf7DKFVpYltyrs/uTxfuDOvPJHI+c54BpJ19cmUrPSOembFee6iHgTILm/Nilv5soVZJspQNIOScOShicmJuY0WLPpOOmbVcYryNq84qRvVpy3JrttkvsTSflMK8ua1R0nfbPiHAS2JY+3AU/mlX9JOTcB7052A5nVIw/ZNLvMNCvL7gYel9QFvAbclVR/mtxwzTFyQzbvrXnAZiVw0je7TERMN951c4G6Adw3txGZVY+7d8zMMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLESb9BeI9cMyvGrElfUr+kE5JG8spWSDos6Vhyvzwpl6QHJY1JeknSDXmv2ZbUPyZpW6HPsvJM7pG7d+9ezpw5w969e+np6XHiN7MrFNPSf4QKl5lN1iLfBdwIbAJ2Tf5QWOV6e3u555576O7uZsmSJXR3d3PPPfd4j1wzu8Ksk7Mi4keSWi4r3kpuxiLklpn9IXA/ecvMAs9Jmlxm9jPA4Yg4CSDpMLkfEjdFq+Do0aOcOHGCZcuWAfDee++xb98+3n777ZQjM7N6U26ffqnLzHr52TnU1NTEhQsX6O/v58yZM/T393PhwgWamprSDs3M6ky1l2GYbpnZkpafBfYBtLW1FaxjU50/f57Tp09z6623cu7cORYuXMiCBQs4f/582qGZWZ0pt6Vf6jKzXn52jp0+fZqLFy8CcPHiRU6fPp1yRGZWj8pN+qUuM3sIuEXS8uQC7i1JmVXRypUrp9ybmV1u1u6daiwzGxEnJX0TeCGp943Ji7pWHZKuuM9dTzcz+1Axo3eqssxsRPQD/SVFZ0Xr6OjgrbfeYmJigpUrV/LJT36SI0eOpB2WmdUZz8htEM8++yzbt2/nV7/6Fdu3b+fZZ59NOyQzq0PeRKUBrF69mpMnT7Jz506++tWvsnDhQhYvXsyKFSvSDs3M6oxb+g3ggQceYNmyZTQ3N3PVVVfR3NzMsmXLeOCBB9IOzczqjJN+A+js7GTPnj2XZuQuW7aMPXv20Nk53eUYM8sqd+80iM7OTid5M5uVW/pmZhnipG9mliHu3jGbZ1p2PnXp8fHdd6QYic1HbumbmWWIk36D8HaJtSHp30l6WdKIpAFJSyStk/R8sivcdyUtSjtOs+k46TcAb5dYG5Kagd8F2iJiI9AE3A38IfCtZCe5U0BXelGazcxJvwF4u8SaWgD8HUkLgKXAm8Bnge8lz+8H7kwpNrNZ+UJuAzh69Cjvvfce/f39tLe3MzQ0xPbt23n11VfTDq2hRMRfS/ov5FaWPQ38AHgR+GVETO5YM+OucOT2jmbt2rVzH7BZAW7pN4BFixbR3d1NR0cHCxcupKOjg+7ubhYtctdyNSV7QWwF1gH/AFgGbClQddpd4SKiLSLaVq1aNXeBms3ASb8BnD17loceeojBwUHOnTvH4OAgDz30EGfPnk07tEbzW8BfRcRERJwD/gz4p8A1SXcPeFc4q3Pu3mkAGzZsYP369WzZsoUPPviAxYsXs2XLFpYuXZp2aI3mNeAmSUvJde9sBoaBQeALwAGm7iRnVnfc0m8AHR0dHDx4kOXLl3PVVVexfPlyDh48SEdHR9qhNZSIeJ7cBdsfAz8j9/3ZB9wPfEXSGPBRoC+1IM1m4aTfAJ544gmWLFnCO++8w8WLF3nnnXdYsmQJTzzxRNqhNZyI2BURvxERGyPiixHxQUS8EhGbIuLjEXFXRHyQdpxm03HSbwDj4+NcffXVHDp0iLNnz3Lo0CGuvvpqxsfH0w7NzOqMk36DmByxMzlO3107ZlZIRUm/lCnpkhYnx2PJ8y3VOAHLOXDgwJQ9cg8cOJB2SGZWh8oevZM3JX1DRJyW9Di5Kem3k5uSfkDSfyc3Jf3h5P5URHxc0uTU9X9Z8RkYCxYsYPHixezdu5evfe1rfOxjH2Pp0qV88IG7lhvdXK246ZU8G1el3TulTEnfmhyTPL9Zkir8fAMuXLhwxfDMpUuXcuHChZQiMrN6VXZLv4wp6c3A68lrz0t6l9zwtrfz39dT1Ytz+e/lxMQEExMTABw/fvyKehEFJ4maWcaU3dIvY0p6oVb9FZnIU9WLExGXbo899hjr1q3jyJEjABw5coR169bx2GOPXapj1rLzqUs3y65KZuRempIOIGnKlPSktZ8/JX0cWAOMJ91BHwFOVvD5lpjcEL27u/vSfW9vrzdKN7MrVNKnf2lKetI3vxk4yodT0mHqlPSDyTHJ80fCTdCq6ezsZGRkBICRkREnfDMrqOykX8aU9D7go0n5V4CdFcRtZmZlqGjBtYjYBey6rPgVYFOBumeAuyr5PDMzq4xX2TQzAF/gzQgvw2BmliFu6ZtlkGfcZpdb+mZmGeKkb2aWIU76Zg3IM29tOk76ZmYZ4gu5Zg3MrX27nJO+mc3II30ai7t3zMwyxC19swbhrhwrhlv6ZmYZ4qRvZpYhTvpmJZB0jaTvSfq5pFFJ/0TSCkmHJR1L7penHafZdJz0zUqzB/iLiPgN4B8Do+T2hngmItYDz+C9IqyOOembFUnS1cA/I9kYKCLORsQvye0VvT+pth+4M50IzWbnpG9WvF8DJoD/Keknkr4taRlwXUS8CZDcX5tmkGYzcdI3K94C4Abg4Yj4TeA9SujKkbRD0rCk4YmJibmK0WxGTvpmxRsHxpP9oSG3R/QNwFuSrgdI7k8UenFE7IuItohoW7VqVU0CrjYv5Db/VZT0SxnJoJwHJY1JeknSDdU5BbPaiIi/AV6X9ImkaDNwFDgIbEvKtgFPphCeWVEqbemXMpJhC7A+ue0AHq7ws83S0A18R9JLwKeA/wzsBm6WdAy4OTk2q0tlL8OQN5LhtyE3kgE4K2kr8Jmk2n7gh8D95EY4PBoRATyX/JVw/eQFMLP5ICJ+CrQVeGpzrWMxK0clLf1SRzI0A6/nvX48KTMzsxqpZMG1yZEM3RHxvKQ9zDySQQXK4opK0g5y3T+sXbu2gvDMrBi+MJstlbT0Sx3JMA6syXv9auCNy9+0EUY4mJnVq7KTfhkjGQ4CX0pG8dwEvOv+fDOz2qp0Pf3JkQyLgFeAe8n9kDwuqQt4Dbgrqfs0cDswBryf1DUzsxqqKOmXMpIhGbVzXyWfZ2ZmlfGMXDOzDHHSNzPLECd9M7MMcdI3M8sQJ30zswxx0jczyxAnfTOzDKl0cpaZZVz+2j3Hd9+RYiRWDLf0zcwyxEnfzCxDnPTNzDLESd/MLEOc9M3MMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLESd/MLEOc9M3MMsRJ36xEkpok/UTS/06O10l6XtIxSd+VtCjtGM2mU3HSL/YLIGlxcjyWPN9S6WebpeT3gNG84z8EvhUR64FTQFcqUZkVoRot/WK/AF3AqYj4OPCtpJ7ZvCJpNXAH8O3kWMBnge8lVfYDd6YTndnsKkr6JX4BtibHJM9vTuqbzSd/DPw+cDE5/ijwy4g4nxyPA82FXihph6RhScMTExNzH6lZAZW29Ev5AjQDrwMkz7+b1J/CXwyrV5I+B5yIiBfziwtUjUKvj4h9EdEWEW2rVq2akxjNZlN20i/jC1DUl8NfDKtjnwY+L+k4cIDcX7V/DFwjaXJDotXAG+mEZza7Slr6pX4BxoE1AMnzHwFOVvD5ZjUVEV+PiNUR0QLcDRyJiH8FDAJfSKptA55MKUSzWZWd9Mv4AhxMjkmePxIRBf8Mtg+tWLECSUXfgKLrrlixIuWzaxj3A1+RNEauy7Iv5XjMpjUXe+TeDxyQ9J+An/DhF6AP+NPki3GS3A+FzeLUqVPM1W+jr6OXLyJ+CPwwefwKsCnNeGotf19cm1+qkvSL+QJExBngrmp8npmZlcczcs3MMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLESd/MLEPmYpy+mWVU/vj947vvSDESm45b+mZmGeKkb2ZzrmXnU57FWyec9M3MMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLESd/MLEOc9M3MMsRJ38wsQ5z0zcwyxGvv1LnYdTX8wUfm7r3NLFOc9Ouc/uPfzunG6PEHc/LWZlanyu7ekbRG0qCkUUkvS/q9pHyFpMOSjiX3y5NySXpQ0piklyTdUK2TMDOz4lTSp38e+GpEtAI3AfdJ2gDsBJ6JiPXAM8kxwBZgfXLbATxcwWebmVkZyk76EfFmRPw4efwrYBRoBrYC+5Nq+4E7k8dbgUcj5zngGknXlx25mZmVrCqjdyS1AL8JPA9cFxFvQu6HAbg2qdYMvJ73svGk7PL32iFpWNLwxMRENcIzq4pSuzTN6lHFSV/S3wX+F/BvI+JvZ6paoOyKK5QRsS8i2iKibdWqVZWGZ1ZNpXZpmtWdipK+pIXkEv53IuLPkuK3JrttkvsTSfk4sCbv5auBNyr5fLNaKqNL06zulD1kU5KAPmA0Iv5r3lMHgW3A7uT+ybzyL0s6ANwIvDvZDWQ238zUpSnp2mles4PcIAbWrl1bm0BT5J2y6lMlLf1PA18EPivpp8ntdnLJ/mZJx4Cbk2OAp4FXgDHgfwD/poLPNktNCV2aU7jr0upB2S39iBiicD89wOYC9QO4r9zPM6sHM3VpJq38/C5Ns7rjtXfMilRElyZM7dI0qztehsGseJNdmj+T9NOk7N+T68J8XFIX8BpwV0rxmc3KSd+sSKV2aZrVI3fvmJlliFv6ZlYz+cM4j+++I8VIssstfTOzDHHSNzPLECd9M7MMcdI3M8sQX8idB3Jzgqpv+XKvAGyWNU76da7U/XElzdmeumY2/7l7x8wsQ5z0zcwyxEnfzCxDnPTNrK607HzKG7DMISd9M7MMcdI3M8sQD9k0s1R48bV0uKVvZpYhNW/pS7oN2AM0Ad+OiN2zvMTMMmi6i7n+q6AyNW3pS2oC/gTYAmwAOiVtqGUMZmZZVuuW/iZgLCJeAZB0ANgKHK1xHPPeTOvxFHrOSzNYPfMQzdqpddJvBl7POx4HbqxxDA3BSdzMylHrC7mFmqdTspekHZKGJQ1PTEzUKCwzs2yodUt/HFiTd7waeCO/QkTsA/YBtLW1uTlrZtOa7BaqxsXdrAwhrXVL/wVgvaR1khYBdwMHaxyDmVlm1bSlHxHnJX0ZOERuyGZ/RLxcyxjMrLFlpcVerpqP04+Ip4Gna/25Zta4qj36p5F/ODwj16wKJN0m6ReSxiTtTDses+l47R2zCuVNOryZ3GCFFyQdjAjPP5kDpbTqqzGrt5yLxXP5l0KlF6/d0jer3KVJhxFxFpicdGhWd1TPk3wkTQCvph3HPLMSeDvtIOaRj0XEqkreQNIXgNsi4l8nx18EboyIL19WbwewIzn8BPCLAm/XKP9+Po90Tfv/uq67dyr9MmaRpOGIaEs7joyZddIhTJ2DMu0bNci/n8+jfrl7x6xys046NKsXTvpmlfOkQ5s36rp7x8oyY/eBVV+VJx02yr+fz6NO1fWFXDMzqy5375iZZYiTvplZhjjpNwBJ/ZJOSBpJOxYrz3xbxkHScUk/k/RTScNJ2QpJhyUdS+6XJ+WS9GBybi9JuiHl2K/4vpQTu6RtSf1jkralcS7lcNJvDI8At6UdhJVnHu8d3RERn8obx74TeCYi1gPPJMeQO6/1yW0H8HDNI53qEa78vpQUu6QVwC5yO/9tAnZN/lDUOyf9BhARPwJOph2Hla1RlnHYCuxPHu8H7swrfzRyngOukXR9GgHCtN+XUmO/FTgcEScj4hRwmHnS8HLSN0tfob2jm1OKpVgB/EDSi8nyEgDXRcSbAMn9tUn5fDi/UmOfD+dUkMfpm6WvqGUc6synI+INSdcChyX9fIa68/H8Jk0X+7w9J7f0zdI375ZxiIg3kvsTwPfJdVG9Ndltk9yfSKrPh/MrNfb5cE4FOembpW9eLeMgaZmkvzf5GLgFGCEX8+Qolm3Ak8njg8CXkpEwNwHvTnal1JFSYz8E3CJpeXIB95akrO65e6cBSBoAPgOslDQO7IqIvnSjsmLNw72jrwO+LwlyOeSxiPgLSS8Aj0vqAl4D7krqPw3cDowB7wP31j7kDxX6vgC7KSH2iDgp6ZvkfrABvhER82IwhZdhMDPLEHfvmJlliJO+mVmGOOmbmWWIk76ZWYY46ZuZZYiTvplZhjjpm5llyP8H/eAjQD6Gd5wAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"\n",
"DH_X_train = []\n",
"DH_Y_train = [] \n",
"\n",
"DHtrain_data_size = 1400//2\n",
"\n",
"for i in range(DHtrain_data_size):\n",
" DH_X_train.append(DR_encoded[i])\n",
" DH_Y_train.append([1,0])\n",
" DH_X_train.append(HS_encoded[i])\n",
" DH_Y_train.append([0,1])\n",
" \n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in DH_X_train]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"줄거리 최대 길이 : 1880\n",
"줄거리 평균 길이 : 183.68142857142857\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAcaklEQVR4nO3df5CcVb3n8feHOMgaw00igcoP2IAEa0J2TXQKKBitRBEil7rBresucynNymxFs2RKd6UqkflDuLtDEfYqtSI/anCyhFtkEK6i1BXU3OzcS00p6gQTyGTkMiDKmFQSCZgILPn13T/6dOjMz56Znumefj6vqqf6eU6fp/s76cl3Tp/nPOcoIjAzs2w4rdwBmJnZ5HHSNzPLECd9M7MMcdI3M8sQJ30zswx5T7kDGMlZZ50VCxcuLHcYVqW2b9/+x4iYM9nv699rm0jD/V5XfNJfuHAhXV1d5Q7DqpSk35Xjff17bRNpuN9rd++YmWWIk76ZWYY46ZuZZYiTvplZhjjpm5lliJN+lWhvb2fJkiVMmzaNJUuW0N7eXu6QzKwCVfyQTRtZe3s7zc3NtLW1UV9fT2dnJ42NjQA0NDSUOTozqyRu6VeBlpYW2traWLFiBTU1NaxYsYK2tjZaWlrKHZqZVRgn/SrQ09NDfX39KWX19fX09PSUKSIzq1RO+lWgtraWzs7OU8o6Ozupra0tU0RTm6QzJP1S0k5J3ZJuS+XnS/qFpBclfVfS6an8vem4Nz2/cDzvv3DDj05uZqXmpF8FmpubaWxspKOjg6NHj9LR0UFjYyPNzc3lDm2qegf4RER8GFgKrJR0GbARuCsiFgGvA42pfiPwekRcCNyV6plVJF/IrQL5i7VNTU309PRQW1tLS0uLL+KOUeTWEP1zOqxJWwCfAP4mlW8GbgXuA1alfYB/AL4tSeG1SK0COelXiYaGBif5EpI0DdgOXAjcA7wEvBERx1KVPmB+2p8PvAoQEcck/Qn4APDHfq+5BlgDcN555030j2A2KHfvmA0iIo5HxFJgAXAJMNgFknxLXsM8V/iarRFRFxF1c+ZM+mzOZoCTvtmwIuIN4J+By4CZkvLfjhcAe9J+H3AuQHr+L4CDkxupWXGc9M36kTRH0sy0/2+AK4EeoAP461RtNfDDtP9EOiY9/3/dn2+Vyn36ZgPNBTanfv3TgEcj4h8l7QYekfQ/gV8Dbal+G/D3knrJtfCvL0fQZsVw0jfrJyKeA5YNUv4yuf79/uX/D/jsJIRmNm7u3jEzy5ARk76kTZL2S9pVUPZdSTvS9oqkHal8oaS3C567v+Ccj0p6Pt21+C1Jg414MDOzCVRM986DwLeBh/IFEfGf8vuSvgH8qaD+S2moW3/3kRuj/AzwJLASeGr0IZuZ2ViN2NKPiKcZYvhZaq3/R2DYydslzQXOjIifp1ENDwHXjT5cMzMbj/H26X8M2BcRLxaUnS/p15L+RdLHUtl8cmOZ8wrvZhxA0hpJXZK6Dhw4MM4Qs8GLqJhZMcY7eqeBU1v5e4HzIuI1SR8FfiDpYoq8Y/HkExGtQCtAXV2dxzuPwIuomFmxxtzST3ce/gfgu/myiHgnIl5L+9vJzVdyEbmW/YKC0wvvZrRx8iIqZlas8XTvXAn8JiJOdtukOxmnpf0LgEXAyxGxFzgs6bJ0HeDzvHs3o42TF1Exs2IVM2SzHfg58CFJfZLyc4hfz8ALuB8HnpO0k9wUs1+KiPxF4LXAd4Bect8APHKnRLyIipkVa8Q+/YgYtFM4Iv7zIGXfA743RP0uYMko47Mi5BdR6d+n7+4dM+vP0zBUAS+iYmbFctKvEl5ExcyK4bl3zMwyxEnfzCxDnPTNzDLESd/MLEOc9M3MMsRJ38wsQ5z0zcwyxOP0zSrYwg0/Orn/yh1/WcZIrFq4pW9mliFO+mZmGeKkb2aWIU76ZmYZ4qRvZpYhTvpm/Ug6V1KHpB5J3ZK+nMpvlfQHSTvSdk3BOV+T1CvpBUlXly96s+F5yKbZQMeAr0bEs5JmANslbU3P3RURf1dYWdJicivJXQzMA/5J0kURcXxSozYrQjHLJW6StF/SroKyUbd4JK1MZb2SNpT+RzErjYjYGxHPpv3DQA8wf5hTVgGPRMQ7EfFbckuCXjLxkZqNXjHdOw8CKwcpvysilqbtSRjQ4lkJ3CtpWlos/R7g08BioCHVNatokhYCy4BfpKJ1kp5LjaFZqWw+8GrBaX0M8kdC0hpJXZK6Dhw4MIFRmw1txKQfEU8DB0eqlwzV4rkE6I2IlyPiCPBIqmtWsSS9n9yaz1+JiEPAfcAHgaXAXuAb+aqDnB4DCiJaI6IuIurmzJkzQVGbDW88F3JH0+IpqiWU5xaRlZukGnIJ/+GI+D5AROyLiOMRcQJ4gHe7cPqAcwtOXwDsmcx4zYo11qQ/2hZPUS2hk0+4RWRlJElAG9ATEd8sKJ9bUO0zQP461xPA9ZLeK+l8YBHwy8mK12w0xjR6JyL25fclPQD8YzocrsXjlpBNFVcAnwOel7Qjld1C7lrUUnINlleALwJERLekR4Hd5Eb+3OSRO1apxpT0Jc2NiL3psH+LZ4ukb5IbupZv8QhYlFpBfyB3sfdvxhO42USJiE4G/3b65DDntAAtExaUWYmMmPQltQPLgbMk9QFfB5aPtsUjaR3wE2AasCkiukv+05iZ2bBGTPoR0TBIcdsw9Qdt8aRhnUO2lMzMbOJ5GgYzswxx0jczyxAnfTOzDHHSNzPLECd9M7MMcdI3M8sQJ30zswxx0jczyxAn/SrR3t7OkiVLmDZtGkuWLKG9vb3cIZlZBfJyiVWgvb2d5uZm2traqK+vp7Ozk8bGRgAaGga7odrMssot/SrQ0tJCW1sbK1asoKamhhUrVtDW1kZLi+f/MrNTOelXgZ6eHurr608pq6+vp6enp0wRmVmlctKvArW1tXR2dp5S1tnZSW1tbZkiMrNK5aRfBZqbm2lsbKSjo4OjR4/S0dFBY2Mjzc3N5Q7NzCqML+RWgfzF2qamJnp6eqitraWlpcUXcc1sACf9KtHQ0OAkb2YjcveOmVmGOOmbmWXIiElf0iZJ+yXtKij7X5J+I+k5SY9LmpnKF0p6W9KOtN1fcM5HJT0vqVfStyQNtvC0mZlNoGJa+g8CK/uVbQWWRMS/B/4V+FrBcy9FxNK0famg/D5gDbAobf1f08zMJtiIST8ingYO9iv7aUQcS4fPAAuGew1Jc4EzI+LnERHAQ8B1YwvZzMzGqhR9+jcCTxUcny/p15L+RdLHUtl8oK+gTl8qG5SkNZK6JHUdOHCgBCGamRmMM+lLagaOAQ+nor3AeRGxDPjvwBZJZwKD9d/HUK8bEa0RURcRdXPmzBlPiGZmVmDMSV/SauBa4IbUZUNEvBMRr6X97cBLwEXkWvaFXUALgD1jfW+ziSTpXEkdknokdUv6ciqfLWmrpBfT46xUrjQ4oTcNbvhIeX8Cs6GNKelLWgmsB/4qIt4qKJ8jaVrav4DcBduXI2IvcFjSZWnUzueBH447erOJcQz4akTUApcBN0laDGwAtkXEImBbOgb4NO8OUFhDbtCCWUUqZshmO/Bz4EOS+iQ1At8GZgBb+w3N/DjwnKSdwD8AX4qI/EXgtcB3gF5y3wAKrwOYVYyI2BsRz6b9w0APuWtQq4DNqdpm3h2MsAp4KHKeAWamwQtmFWfEaRgiYrB7+9uGqPs94HtDPNcFLBlVdGZlJmkhsAz4BXBO+tZKROyVdHaqNh94teC0/ECFvf1eaw25bwKcd955Exq32VB8R67ZECS9n1wj5isRcWi4qoOUDRio4AEKVgmc9M0GIamGXMJ/OCK+n4r35btt0uP+VN4HnFtwugcqWMVy0q8SXhi9dNJggzagJyK+WfDUE8DqtL+adwcjPAF8Po3iuQz4U74byKzSeGrlKuCF0UvuCuBzwPOSdqSyW4A7gEfTYIbfA59Nzz0JXENukMJbwBcmN1yz4jnpV4HChdGBkwujNzU1OemPQUR0Mng/PcAnB6kfwE0TGpRZibh7pwr09PTQ19d3SvdOX1+fF0Y3swHc0q8C8+bNY/369Tz88MMnu3duuOEG5s2bV+7QzKzCuKVfJdJMGEMem5mBk35V2LNnD3feeSdNTU2cccYZNDU1ceedd7Jnj0cNmtmp3L1TBWpra1mwYAG7dp1c3IyOjg5qa2vLGJWZVSK39KtAc3MzjY2NdHR0cPToUTo6OmhsbKS5ubncoZlZhXFLvwrkh2U2NTXR09NDbW0tLS0tHq5pZgM46VeJhoYGJ3kzG5G7d8zMMsRJv0p47h0zK4a7d6qA594xs2K5pV8FCufeqampOTn3TktLS7lDM7MKU1TSl7RJ0n5JuwrKRr1ItKTVqf6LaWF1K4Genh7q6+tPKauvr/fcO2Y2QLEt/QeBlf3KRrVItKTZwNeBS4FLgK/n/1DY+NTW1nLbbbed0qd/2223+eYsMxugqKQfEU8DB/sVj3aR6KuBrRFxMCJeB7Yy8A+JjcGKFSvYuHEjN954I4cPH+bGG29k48aNJ6daNjPLG0+f/imLRAMjLRI9VPkAktZI6pLUdeDAgXGEmA0dHR2sX7+eTZs2MWPGDDZt2sT69evp6Ogod2hmVmEm4kLuUItEF7V4NHgB6dHq6enh4MGD9Pb2cuLECXp7ezl48KD79M1sgPEk/dEuEu3FoyfIzJkzaW1t5fbbb+fNN9/k9ttvp7W1lZkzZ5Y7NDOrMONJ+qNdJPonwFWSZqULuFelMhunQ4cOceaZZ7Js2TJqampYtmwZZ555JocOHSp3aGZWYYq6OUtSO7AcOEtSH7lROKNaJDoiDkr6H8CvUr2/jYj+F4dtDI4dO8a8efP4xCc+cbLs4osvpru7u4xRmVklKnb0TkNEzI2ImohYEBFtEfFaRHwyIhalx4OpbkTETRHxwYj4dxHRVfA6myLiwrT9n4n6obJGEt3d3axdu5Y33niDtWvX0t3djTTU2t5mllW+I7cKRASSuPDCC6mpqeHCCy9EkpdMNLMBnPSrxPLly7n55puZPn06N998M8uXLy93SGZWgZz0q0RXVxfbtm3jyJEjbNu2ja6urpFPMrPMcdKvAtOnT+fw4cM89thjvPXWWzz22GMcPnyY6dOnlzs0M6swTvpV4O233+bKK6/k/vvvZ+bMmdx///1ceeWVvP322+UObUoaYoLBWyX9QdKOtF1T8NzX0gSDL0i6ujxRmxXHSb8K1NbWcumll7J48WJOO+00Fi9ezKWXXuoJ18buQQafF+quiFiaticBJC0GrgcuTufcK2napEVqNkpO+lXAE66V1hATDA5lFfBIRLwTEb8ld3/KJRMWnNk4OelXgY6ODq699lpuueUWpk+fzi233MK1117rCddKb11aI2JTwbTgnkjQphQn/Sqwe/dudu7cyVNPPcWRI0d46qmn2LlzJ7t37y53aNXkPuCDwFJgL/CNVO6JBG1KcdKvAqeffjrr1q07ZbnEdevWcfrpp5c7tKoREfsi4nhEnAAe4N0uHE8kaFOKk34VOHLkCHfffTcdHR0cPXqUjo4O7r77bo4cOVLu0KpGfkbZ5DNAfmTPE8D1kt4r6XxyK8b9crLjMytWUROuWWVbvHgx1113HU1NTfT09FBbW8sNN9zAD37wg3KHNiUNMcHgcklLyXXdvAJ8ESAiuiU9CuwGjgE3RcTxcsRtVgwn/SrQ3NxMc3MzbW1t1NfX09nZSWNjIy0tLeUObUqKiIZBituGqd8C+B/bpgQn/SrQ0JDLUYUt/ZaWlpPlZmZ5TvpVoqGhwUnezEbkC7lmZhnipF8lmpqaOOOMM5DEGWecQVNTU7lDMrMKNOakL+lDBZNP7ZB0SNJXPDHV5GtqauKee+7h+PHcoJHjx49zzz33OPGb2QBjTvoR8UJ+8ingo+TWw308Pe2JqSbRvffeiyQ2btzIm2++ycaNG5HEvffeW+7QzKzClKp755PASxHxu2HqeGKqCXLixAkuuOCCU1bOuuCCCzhx4kS5Q7MSWrjhRyzc8KNyh2FTXKmS/vVAe8GxJ6aaZL29vScXQpdEb29vmSMys0o07qQv6XTgr4DHUpEnpiqT973vfac8mpn1V4qW/qeBZyNiH3hiqnL685//fMqjmVl/pUj6DRR07XhiKjOzyjWupC/pfcCngO8XFN8p6XlJzwErgP8GuYmpgPzEVD/GE1OV3OWXX86ePXu4/PLLyx2KmVWocU3DEBFvAR/oV/a5Yep7YqoJ9LOf/Yx58+aVOwwzq2C+I9fMLEOc9M3MMsRJ38wsQ5z0zcwyxEm/iqxdu5Y33niDtWvXljsUM6tQXkSlirS2tnLfffcxbZrnsTOzwbmlb2aWIW7pT1H5ydUKFc6n379exKDTHJlZxrilP0VFxMlty5YtzJgxg5qaGgBqamqYMWMGW7ZsOVnHzAzc0q8K+QXRW1pa6O7u5qKLLqK5udkLpVepwjn1X7njL8sYiU1FTvpVoqGhgYaGBiSxa9eukU8ws0xy946ZWYY46Zv1k1Z82y9pV0HZbElbJb2YHmelckn6lqTetFrcR8oXudnInPTNBnoQWNmvbAOwLSIWAdvSMeQWEVqUtjXkVo4zq1hO+mb9RMTTwMF+xauAzWl/M3BdQflDkfMMMLPfQkJmFcVJ36w450TEXoD0eHYqnw+8WlCvL5UNIGmNpC5JXQcOHJjQYM2G4qRvNj4D75KDQW+MiIjWiKiLiLo5c+ZMcFhmg3PSNyvOvny3TXrcn8r7gHML6i0A9kxybGZFG3fSl/RKWhN3h6SuVOaRDlZtngBWp/3VwA8Lyj+ffrcvA/6U7wYyq0SlaumviIilEVGXjj3SwaYsSe3Az4EPSeqT1AjcAXxK0ovAp9IxwJPAy0Av8ADwX8sQslnRJuqO3FXA8rS/GfhnYD0FIx2AZyTNlDTXLSOrJBEx1PwVnxykbgA3TWxEZqVTipZ+AD+VtF3SmlQ2rpEOHuVgZjYxStHSvyIi9kg6G9gq6TfD1C1qpENEtAKtAHV1dZ4i0sysRMbd0o+IPelxP/A4cAke6WBmVpHGlfQlTZc0I78PXAXswiMdzMwq0ni7d84BHk+rM70H2BIRP5b0K+DRNOrh98BnU/0ngWvIjXR4C/jCON/fzMxGYVxJPyJeBj48SPlreKSDmVnF8R25ZmYZ4qRvZpYhTvpmZhniNXLNpjAvkm6j5aRvVsX8R8H6c/eOmVmGuKVvViXcqrdiuKVvZpYhTvpmZhnipG9mliFO+mZmGeKkb2aWIR69Y5YRHt1j4Ja+mVmmuKVvVoUKW/VmhdzSNzPLELf0zTLOff3ZMuaWvqRzJXVI6pHULenLqfxWSX+QtCNt1xSc8zVJvZJekHR1KX6Aajd79mwkFb0BRdedPXt2mX86M5ts42npHwO+GhHPpsXRt0vamp67KyL+rrCypMXA9cDFwDzgnyRdFBHHxxFD1Xv99dfJrTJZevk/EmaWHWNu6UfE3oh4Nu0fBnqA+cOcsgp4JCLeiYjfklsc/ZKxvr9ZOUh6RdLz6VtsVyqbLWmrpBfT46xyx2k2lJJcyJW0EFgG/CIVrZP0nKRNBf8B5gOvFpzWxxB/JCStkdQlqevAgQOlCNGslFZExNKIqEvHG4BtEbEI2JaOzSrSuJO+pPcD3wO+EhGHgPuADwJLgb3AN/JVBzl90H6LiGiNiLqIqJszZ854QzSbaKuAzWl/M3BdGWMxG9a4kr6kGnIJ/+GI+D5AROyLiOMRcQJ4gHe7cPqAcwtOXwDsGc/7m5VBAD+VtF3SmlR2TkTshVy3J3D2YCf6G6xVgjFfyFXuKmAb0BMR3ywon5v/DwB8BtiV9p8Atkj6JrkLuYuAX471/c3K5IqI2CPpbGCrpN8Ue2JEtAKtAHV1dRNzdb5Ivnkru8YzeucK4HPA85J2pLJbgAZJS8m1iF4BvggQEd2SHgV2kxv5c5NH7thUExF70uN+SY+T+ya7L9/YkTQX2F/WIM2GMeakHxGdDN5P/+Qw57QALWN9T7NykjQdOC0iDqf9q4C/JfctdjVwR3r8YfmiNBue78g1K945wOPp/ob3AFsi4seSfgU8KqkR+D3w2TLGaDYsJ32zIkXEy8CHByl/Dfjk5EdkNnqecM3MLEOc9M3MMsRJ38wsQ5z0zcwyxEnfzCxDnPTNzDLEQzYrXHz9TLj1Lybutc0sU5z0K5xuOzShi6jErRPy0mZWody9Y2aWIW7pm9lJg82+6cXSq4tb+mZmGeKkb2aWIU76ZmYZ4j79KSBN5Vtys2bNGrmSmVUVJ/0KN9rhmpImbIinmU19k969I2mlpBck9UraMNnvb2aWZZPa0pc0DbgH+BTQB/xK0hMRsXsy4zCz4hUO4/Twzalvslv6lwC9EfFyRBwBHgFWTXIMZmaZNdlJfz7wasFxXyo7haQ1krokdR04cGDSgptKJA26DfWcmRlMftIfLPsMuOoYEa0RURcRdXPmzJmEsKaeiBjVZmYGk5/0+4BzC44XAHsmOQYzK4GFG3406LQNVtkmO+n/Clgk6XxJpwPXA09McgxmZpk1qaN3IuKYpHXAT4BpwKaI6J7MGMzMsmzSb86KiCeBJyf7fc1sYgzVxePhnZXJd+Sa2YTwNM2VyUnfrEQkrQT+N7muy+9ExB1lDqnkxnvhdrQ3euXr+49F6Tjpm5WA7zafWL4ruHSc9M1K4+Td5gCS8nebO+kXYTTfIEa6hlCKawyT/UdmNN9oxvvtR5V+446kA8Dvyh3HFHIW8MdyBzGF/NuIGPcdgJL+GlgZEf8lHX8OuDQi1hXUWQOsSYcfAl4Y4uUq8TN0TCOrpHiG/L2u+JZ+Kf5DZomkroioK3ccGTTi3eYR0Qq0jvhCFfgZOqaRVVo8Q/HKWWal4bvNbUpw0jcrDd9tblNCxXfv2KiN2H1gpVfiu80r8TN0TCOrtHgGVfEXcs3MrHTcvWNmliFO+mZmGeKkXyUkbZK0X9KucsdiYyNppaQXJPVK2jDJ7/2KpOcl7ZDUlcpmS9oq6cX0OCuVS9K3UpzPSfpIiWIY8Ds8lhgkrU71X5S0egJiulXSH9K/1Q5J1xQ897UU0wuSri4oL9tnO8BoV2DyVpkb8HHgI8CucsfibUyf3zTgJeAC4HRgJ7B4Et//FeCsfmV3AhvS/gZgY9q/BniK3L0JlwG/KFEMA36HRxsDMBt4OT3OSvuzShzTrcDNg9RdnD639wLnp89zWrk/2/6bW/pVIiKeBg6WOw4bs5PTOETEESA/jUM5rQI2p/3NwHUF5Q9FzjPATElzx/tmQ/wOjzaGq4GtEXEwIl4HtgIrSxzTUFYBj0TEOxHxW6CX3OdaUZ+tk75ZZZgPvFpw3JfKJksAP5W0PU0XAXBOROwFSI9np/LJjHW0MUxWbOtSt9KmfJdTBcRUFCd9s8ow4jQOE+yKiPgI8GngJkkfH6ZuuWMdLobJiO0+4IPAUmAv8I0KiKloTvpmlaGs0zhExJ70uB94nFyXxL58t0163F+GWEcbw4THFhH7IuJ4RJwAHiD3b1XWmEbDSd+sMpRtGgdJ0yXNyO8DVwG70vvnR7+sBn6Y9p8APp9G0FwG/CnfBTMBRhvDT4CrJM1K3S5XpbKS6Xf94jPk/q3yMV0v6b2SzgcWAb+k0qboKNcVZG+l3YB2cl81j5JrWTSWOyZvo/4MrwH+ldxIj+ZJfN8LyI0o2Ql0598b+ACwDXgxPc5O5SK3YMxLwPNAXYniGPA7PJYYgBvJXUTtBb4wATH9fXrP58gl77kF9ZtTTC8Any73ZzvY5mkYzMwyxN07ZmYZ4qRvZpYhTvpmZhnipG9mliFO+mZmGeKkb2aWIU76ZmYZ8v8BvNZYS6tJWloAAAAASUVORK5CYII=\n",
"text/plain": [
"<Figure size 432x288 with 2 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"DH_X_test = []\n",
"DH_Y_test = [] \n",
"\n",
"for i in range(DHtrain_data_size):\n",
" DH_X_test.append(DR_encoded[DHtrain_data_size+i])\n",
" DH_Y_test.append([1,0])\n",
" DH_X_test.append(HS_encoded[DHtrain_data_size+i])\n",
" DH_Y_test.append([0,1])\n",
"\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"len_result = [len(s) for s in DH_X_test]\n",
"print(\"줄거리 최대 길이 : \",max(len_result))\n",
"print(\"줄거리 평균 길이 : \",sum(len_result)/len(len_result))\n",
"\n",
"plt.subplot(1,2,1)\n",
"plt.boxplot(len_result)\n",
"plt.subplot(1,2,2)\n",
"plt.hist(len_result, bins=50)\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train on 1400 samples, validate on 1400 samples\n",
"Epoch 1/3\n",
" 64/1400 [>.............................] - ETA: 31sWARNING:tensorflow:Early stopping conditioned on metric `val_loss` which is not available. Available metrics are: \n",
"WARNING:tensorflow:Can save best model only with val_acc available, skipping.\n"
]
},
{
"ename": "InvalidArgumentError",
"evalue": " indices[46,167] = 15001 is not in [0, 5002)\n\t [[node sequential_9/embedding_9/embedding_lookup (defined at <ipython-input-49-edff8fe77726>:22) ]] [Op:__inference_distributed_function_17701]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_9/embedding_9/embedding_lookup:\n sequential_9/embedding_9/embedding_lookup/16556 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mInvalidArgumentError\u001b[0m Traceback (most recent call last)",
"\u001b[1;32m<ipython-input-49-edff8fe77726>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 21\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'binary_crossentropy'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0moptimizer\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'adam'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmetrics\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m'acc'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 22\u001b[1;33m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mDH_X_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDH_Y_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mvalidation_data\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mDH_X_test\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mDH_Y_test\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m3\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m64\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcallbacks\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mes\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmc\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m 817\u001b[0m \u001b[0mmax_queue_size\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmax_queue_size\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 818\u001b[0m \u001b[0mworkers\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mworkers\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 819\u001b[1;33m use_multiprocessing=use_multiprocessing)\n\u001b[0m\u001b[0;32m 820\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 821\u001b[0m def evaluate(self,\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mfit\u001b[1;34m(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)\u001b[0m\n\u001b[0;32m 340\u001b[0m \u001b[0mmode\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[0mtraining_context\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtraining_context\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 342\u001b[1;33m total_epochs=epochs)\n\u001b[0m\u001b[0;32m 343\u001b[0m \u001b[0mcbks\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmake_logs\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepoch_logs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtraining_result\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mModeKeys\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mTRAIN\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 344\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2.py\u001b[0m in \u001b[0;36mrun_one_epoch\u001b[1;34m(model, iterator, execution_function, dataset_size, batch_size, strategy, steps_per_epoch, num_samples, mode, training_context, total_epochs)\u001b[0m\n\u001b[0;32m 126\u001b[0m step=step, mode=mode, size=current_batch_size) as batch_logs:\n\u001b[0;32m 127\u001b[0m \u001b[1;32mtry\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 128\u001b[1;33m \u001b[0mbatch_outs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0miterator\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 129\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mStopIteration\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mOutOfRangeError\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[1;31m# TODO(kaftan): File bug about tf function and errors.OutOfRangeError?\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\keras\\engine\\training_v2_utils.py\u001b[0m in \u001b[0;36mexecution_function\u001b[1;34m(input_fn)\u001b[0m\n\u001b[0;32m 96\u001b[0m \u001b[1;31m# `numpy` translates Tensors to values in Eager mode.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 97\u001b[0m return nest.map_structure(_non_none_constant_value,\n\u001b[1;32m---> 98\u001b[1;33m distributed_function(input_fn))\n\u001b[0m\u001b[0;32m 99\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 100\u001b[0m \u001b[1;32mreturn\u001b[0m \u001b[0mexecution_function\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 566\u001b[0m \u001b[0mxla_context\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mExit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 567\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 568\u001b[1;33m \u001b[0mresult\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 569\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 570\u001b[0m \u001b[1;32mif\u001b[0m \u001b[0mtracing_count\u001b[0m \u001b[1;33m==\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_get_tracing_count\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\def_function.py\u001b[0m in \u001b[0;36m_call\u001b[1;34m(self, *args, **kwds)\u001b[0m\n\u001b[0;32m 630\u001b[0m \u001b[1;31m# Lifting succeeded, so variables are initialized and we can run the\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 631\u001b[0m \u001b[1;31m# stateless function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 632\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_stateless_fn\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m*\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mkwds\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 633\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 634\u001b[0m \u001b[0mcanon_args\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcanon_kwds\u001b[0m \u001b[1;33m=\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 2361\u001b[0m \u001b[1;32mwith\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_lock\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2362\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_maybe_define_function\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2363\u001b[1;33m \u001b[1;32mreturn\u001b[0m \u001b[0mgraph_function\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_filtered_call\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mkwargs\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# pylint: disable=protected-access\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2364\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 2365\u001b[0m \u001b[1;33m@\u001b[0m\u001b[0mproperty\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_filtered_call\u001b[1;34m(self, args, kwargs)\u001b[0m\n\u001b[0;32m 1609\u001b[0m if isinstance(t, (ops.Tensor,\n\u001b[0;32m 1610\u001b[0m resource_variable_ops.BaseResourceVariable))),\n\u001b[1;32m-> 1611\u001b[1;33m self.captured_inputs)\n\u001b[0m\u001b[0;32m 1612\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1613\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0m_call_flat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcaptured_inputs\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcancellation_manager\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36m_call_flat\u001b[1;34m(self, args, captured_inputs, cancellation_manager)\u001b[0m\n\u001b[0;32m 1690\u001b[0m \u001b[1;31m# No tape is watching; skip to running the function.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 1691\u001b[0m return self._build_call_outputs(self._inference_function.call(\n\u001b[1;32m-> 1692\u001b[1;33m ctx, args, cancellation_manager=cancellation_manager))\n\u001b[0m\u001b[0;32m 1693\u001b[0m forward_backward = self._select_forward_and_backward_functions(\n\u001b[0;32m 1694\u001b[0m \u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\function.py\u001b[0m in \u001b[0;36mcall\u001b[1;34m(self, ctx, args, cancellation_manager)\u001b[0m\n\u001b[0;32m 543\u001b[0m \u001b[0minputs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0margs\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 544\u001b[0m \u001b[0mattrs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"executor_type\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mexecutor_type\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"config_proto\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mconfig\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 545\u001b[1;33m ctx=ctx)\n\u001b[0m\u001b[0;32m 546\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 547\u001b[0m outputs = execute.execute_with_cancellation(\n",
"\u001b[1;32mC:\\ProgramData\\Anaconda3\\lib\\site-packages\\tensorflow_core\\python\\eager\\execute.py\u001b[0m in \u001b[0;36mquick_execute\u001b[1;34m(op_name, num_outputs, inputs, attrs, ctx, name)\u001b[0m\n\u001b[0;32m 65\u001b[0m \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 66\u001b[0m \u001b[0mmessage\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mmessage\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 67\u001b[1;33m \u001b[0msix\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mraise_from\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcore\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_status_to_exception\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0me\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmessage\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 68\u001b[0m \u001b[1;32mexcept\u001b[0m \u001b[0mTypeError\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 69\u001b[0m keras_symbolic_tensors = [\n",
"\u001b[1;32m~\\AppData\\Roaming\\Python\\Python37\\site-packages\\six.py\u001b[0m in \u001b[0;36mraise_from\u001b[1;34m(value, from_value)\u001b[0m\n",
"\u001b[1;31mInvalidArgumentError\u001b[0m: indices[46,167] = 15001 is not in [0, 5002)\n\t [[node sequential_9/embedding_9/embedding_lookup (defined at <ipython-input-49-edff8fe77726>:22) ]] [Op:__inference_distributed_function_17701]\n\nErrors may have originated from an input operation.\nInput Source operations connected to node sequential_9/embedding_9/embedding_lookup:\n sequential_9/embedding_9/embedding_lookup/16556 (defined at C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:112)\n\nFunction call stack:\ndistributed_function\n"
]
}
],
"source": [
"max_len = 200\n",
"\n",
"DH_X_train = pad_sequences(DH_X_train, maxlen=max_len)\n",
"DH_X_test = pad_sequences(DH_X_test, maxlen=max_len)\n",
"\n",
"\n",
"model = Sequential()\n",
"model.add(Embedding(5002, 120))\n",
"model.add(LSTM(120))\n",
"model.add(Dense(2, activation='sigmoid'))\n",
"\n",
"es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=4)\n",
"mc = ModelCheckpoint('best_model.h5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)\n",
"\n",
"DH_X_train = np.array(DH_X_train)\n",
"DH_X_test = np.array(DH_X_test)\n",
"DH_Y_train = np.array(DH_Y_train)\n",
"DH_Y_test = np.array(DH_Y_test)\n",
"\n",
"\n",
"model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['acc'])\n",
"model.fit(DH_X_train, DH_Y_train, validation_data=(DH_X_test, DH_Y_test), epochs=3, batch_size=64, callbacks=[es, mc])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}