Showing
1 changed file
with
37 additions
and
5 deletions
... | @@ -2,10 +2,15 @@ import os | ... | @@ -2,10 +2,15 @@ import os |
2 | import csv | 2 | import csv |
3 | import re | 3 | import re |
4 | 4 | ||
5 | +import matplotlib.pyplot as plt | ||
6 | +import numpy as np | ||
5 | from konlpy.tag import Okt | 7 | from konlpy.tag import Okt |
6 | from konlpy.tag import Komoran | 8 | from konlpy.tag import Komoran |
9 | +from PIL import Image | ||
7 | 10 | ||
8 | from textrank import KeywordSummarizer | 11 | from textrank import KeywordSummarizer |
12 | +from wordcloud import WordCloud, get_single_color_func | ||
13 | + | ||
9 | 14 | ||
10 | okt = Okt() | 15 | okt = Okt() |
11 | def Okt_tokenizer(sent): | 16 | def Okt_tokenizer(sent): |
... | @@ -15,9 +20,13 @@ def Okt_tokenizer(sent): | ... | @@ -15,9 +20,13 @@ def Okt_tokenizer(sent): |
15 | komoran = Komoran() | 20 | komoran = Komoran() |
16 | def komoran_tokenizer(sent): | 21 | def komoran_tokenizer(sent): |
17 | words = komoran.pos(sent, join=True) | 22 | words = komoran.pos(sent, join=True) |
18 | - words = [w for w in words if('/NNG' in w or '/NNP' in w)] | 23 | + words = [w for w in words if('/NNG' in w or '/NNP' in w) and (len(w) > 5)] |
24 | + | ||
19 | return words | 25 | return words |
20 | 26 | ||
27 | +def color_func(word, font_size, position,orientation,random_state=None, **kwargs): | ||
28 | + return("hsl(0, 100%, {:d}%)".format(np.random.randint(40,65))) | ||
29 | + | ||
21 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | 30 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) |
22 | 31 | ||
23 | def GetKeywords(): | 32 | def GetKeywords(): |
... | @@ -33,9 +42,32 @@ def GetKeywords(): | ... | @@ -33,9 +42,32 @@ def GetKeywords(): |
33 | verbose= False | 42 | verbose= False |
34 | ) | 43 | ) |
35 | 44 | ||
36 | - keywords = keyword_extractor.summarize(posts, topk=30) | 45 | + keywords = keyword_extractor.summarize(posts, topk=100) |
37 | - print(keywords) | ||
38 | - exit() | ||
39 | return keywords | 46 | return keywords |
40 | 47 | ||
41 | -GetKeywords() | ||
... | \ No newline at end of file | ... | \ No newline at end of file |
48 | +def GetWordCloud(): | ||
49 | + keywords = GetKeywords() | ||
50 | + texts = {} | ||
51 | + for word in keywords: | ||
52 | + texts[word[0][0:-4]] = word[1] | ||
53 | + | ||
54 | + khu_mask = np.array(Image.open('./static/images/khu_lion.png')) | ||
55 | + | ||
56 | + wordcloud = WordCloud( | ||
57 | + font_path = './static/fonts/NanumGothicBold.ttf', | ||
58 | + width = 800, | ||
59 | + height = 600, | ||
60 | + background_color="white", | ||
61 | + mask = khu_mask, | ||
62 | + color_func = color_func | ||
63 | + ) | ||
64 | + wordcloud = wordcloud.generate_from_frequencies(texts) | ||
65 | + array = wordcloud.to_array() | ||
66 | + | ||
67 | + fig = plt.figure(figsize=(10,10)) | ||
68 | + plt.imshow(array, interpolation="bilinear") | ||
69 | + plt.axis("off") | ||
70 | + plt.show() | ||
71 | + fig.savefig('./static/images/wordcloud.png') | ||
72 | + | ||
73 | +GetWordCloud() | ||
... | \ No newline at end of file | ... | \ No newline at end of file | ... | ... |
-
Please register or login to post a comment