박은주

Add WordCloud

......@@ -2,10 +2,15 @@ import os
import csv
import re
import matplotlib.pyplot as plt
import numpy as np
from konlpy.tag import Okt
from konlpy.tag import Komoran
from PIL import Image
from textrank import KeywordSummarizer
from wordcloud import WordCloud, get_single_color_func
okt = Okt()
def Okt_tokenizer(sent):
......@@ -15,9 +20,13 @@ def Okt_tokenizer(sent):
komoran = Komoran()
def komoran_tokenizer(sent):
words = komoran.pos(sent, join=True)
words = [w for w in words if('/NNG' in w or '/NNP' in w)]
words = [w for w in words if('/NNG' in w or '/NNP' in w) and (len(w) > 5)]
return words
def color_func(word, font_size, position,orientation,random_state=None, **kwargs):
return("hsl(0, 100%, {:d}%)".format(np.random.randint(40,65)))
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
def GetKeywords():
......@@ -33,9 +42,32 @@ def GetKeywords():
verbose= False
)
keywords = keyword_extractor.summarize(posts, topk=30)
print(keywords)
exit()
keywords = keyword_extractor.summarize(posts, topk=100)
return keywords
GetKeywords()
\ No newline at end of file
def GetWordCloud():
keywords = GetKeywords()
texts = {}
for word in keywords:
texts[word[0][0:-4]] = word[1]
khu_mask = np.array(Image.open('./static/images/khu_lion.png'))
wordcloud = WordCloud(
font_path = './static/fonts/NanumGothicBold.ttf',
width = 800,
height = 600,
background_color="white",
mask = khu_mask,
color_func = color_func
)
wordcloud = wordcloud.generate_from_frequencies(texts)
array = wordcloud.to_array()
fig = plt.figure(figsize=(10,10))
plt.imshow(array, interpolation="bilinear")
plt.axis("off")
plt.show()
fig.savefig('./static/images/wordcloud.png')
GetWordCloud()
\ No newline at end of file
......