박은주

Add WordCloud

...@@ -2,10 +2,15 @@ import os ...@@ -2,10 +2,15 @@ import os
2 import csv 2 import csv
3 import re 3 import re
4 4
5 +import matplotlib.pyplot as plt
6 +import numpy as np
5 from konlpy.tag import Okt 7 from konlpy.tag import Okt
6 from konlpy.tag import Komoran 8 from konlpy.tag import Komoran
9 +from PIL import Image
7 10
8 from textrank import KeywordSummarizer 11 from textrank import KeywordSummarizer
12 +from wordcloud import WordCloud, get_single_color_func
13 +
9 14
10 okt = Okt() 15 okt = Okt()
11 def Okt_tokenizer(sent): 16 def Okt_tokenizer(sent):
...@@ -15,9 +20,13 @@ def Okt_tokenizer(sent): ...@@ -15,9 +20,13 @@ def Okt_tokenizer(sent):
15 komoran = Komoran() 20 komoran = Komoran()
16 def komoran_tokenizer(sent): 21 def komoran_tokenizer(sent):
17 words = komoran.pos(sent, join=True) 22 words = komoran.pos(sent, join=True)
18 - words = [w for w in words if('/NNG' in w or '/NNP' in w)] 23 + words = [w for w in words if('/NNG' in w or '/NNP' in w) and (len(w) > 5)]
24 +
19 return words 25 return words
20 26
27 +def color_func(word, font_size, position,orientation,random_state=None, **kwargs):
28 + return("hsl(0, 100%, {:d}%)".format(np.random.randint(40,65)))
29 +
21 BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 30 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
22 31
23 def GetKeywords(): 32 def GetKeywords():
...@@ -33,9 +42,32 @@ def GetKeywords(): ...@@ -33,9 +42,32 @@ def GetKeywords():
33 verbose= False 42 verbose= False
34 ) 43 )
35 44
36 - keywords = keyword_extractor.summarize(posts, topk=30) 45 + keywords = keyword_extractor.summarize(posts, topk=100)
37 - print(keywords)
38 - exit()
39 return keywords 46 return keywords
40 47
41 -GetKeywords()
...\ No newline at end of file ...\ No newline at end of file
48 +def GetWordCloud():
49 + keywords = GetKeywords()
50 + texts = {}
51 + for word in keywords:
52 + texts[word[0][0:-4]] = word[1]
53 +
54 + khu_mask = np.array(Image.open('./static/images/khu_lion.png'))
55 +
56 + wordcloud = WordCloud(
57 + font_path = './static/fonts/NanumGothicBold.ttf',
58 + width = 800,
59 + height = 600,
60 + background_color="white",
61 + mask = khu_mask,
62 + color_func = color_func
63 + )
64 + wordcloud = wordcloud.generate_from_frequencies(texts)
65 + array = wordcloud.to_array()
66 +
67 + fig = plt.figure(figsize=(10,10))
68 + plt.imshow(array, interpolation="bilinear")
69 + plt.axis("off")
70 + plt.show()
71 + fig.savefig('./static/images/wordcloud.png')
72 +
73 +GetWordCloud()
...\ No newline at end of file ...\ No newline at end of file
......