GetTopic.py
616 Bytes
import os
import csv
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from konlpy.tag import Okt
okt = Okt()
def DocToNouns(docs):
return [{
'id': i,
'nouns': ' '.join(okt.nouns(doc)),
} for i, doc in enumerate(docs)]
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
posts = []
with open(os.path.join(BASE_DIR + '/', 'data.csv'), 'r', encoding='utf-8-sig') as db:
reader = csv.reader(db)
for data in reader:
posts.append(data)
nouns = DocToNouns(posts)
print(nouns)